xref: /netbsd-src/sys/net/if_ipsec.c (revision 87d689fb734c654d2486f87f7be32f1b53ecdbec)
1 /*	$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/time.h>
45 #include <sys/syslog.h>
46 #include <sys/cpu.h>
47 #include <sys/kmem.h>
48 #include <sys/mutex.h>
49 #include <sys/pserialize.h>
50 #include <sys/psref.h>
51 
52 #include <net/if.h>
53 #include <net/if_types.h>
54 #include <net/route.h>
55 #include <net/bpf.h>
56 #include <net/pfkeyv2.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/ip.h>
61 #ifdef	INET
62 #include <netinet/in_var.h>
63 #endif	/* INET */
64 
65 #ifdef INET6
66 #include <netinet6/in6_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet6/ip6_var.h>
69 #endif /* INET6 */
70 
71 #include <netinet/ip_encap.h>
72 
73 #include <net/if_ipsec.h>
74 
75 #include <net/raw_cb.h>
76 #include <net/pfkeyv2.h>
77 
78 #include <netipsec/key.h>
79 #include <netipsec/ipsec.h>
80 #include <netipsec/ipsecif.h>
81 
82 static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *);
83 static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *);
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t);
113 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
114 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
115 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
116 /* SPD */
117 static int if_ipsec_share_sp(struct ipsec_variant *);
118 static int if_ipsec_unshare_sp(struct ipsec_variant *);
119 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
120     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
121 static inline int if_ipsec_del_sp0(struct secpolicy *);
122 static int if_ipsec_add_sp(struct ipsec_variant *,
123     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
124 static void if_ipsec_del_sp(struct ipsec_variant *);
125 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
126     struct ipsec_variant *);
127 
128 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
129     in_port_t);
130 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
131 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
132 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
133 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
134 
135 /*
136  * ipsec global variable definitions
137  */
138 
139 /* This list is used in ioctl context only. */
140 LIST_HEAD(ipsec_sclist, ipsec_softc);
141 static struct {
142 	struct ipsec_sclist list;
143 	kmutex_t lock;
144 } ipsec_softcs __cacheline_aligned;
145 
146 pserialize_t ipsec_psz __read_mostly;
147 struct psref_class *iv_psref_class __read_mostly;
148 
149 struct if_clone ipsec_cloner =
150     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
151 static int max_ipsec_nesting = MAX_IPSEC_NEST;
152 
153 /* ARGSUSED */
154 void
155 ipsecifattach(int count)
156 {
157 
158 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
159 	LIST_INIT(&ipsec_softcs.list);
160 
161 	ipsec_psz = pserialize_create();
162 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
163 
164 	if_clone_attach(&ipsec_cloner);
165 }
166 
167 static int
168 if_ipsec_clone_create(struct if_clone *ifc, int unit)
169 {
170 	struct ipsec_softc *sc;
171 	struct ipsec_variant *var;
172 
173 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
174 
175 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
176 
177 	if_ipsec_attach0(sc);
178 
179 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
180 	var->iv_softc = sc;
181 	psref_target_init(&var->iv_psref, iv_psref_class);
182 
183 	sc->ipsec_var = var;
184 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
185 	sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro));
186 	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL);
187 
188 	mutex_enter(&ipsec_softcs.lock);
189 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
190 	mutex_exit(&ipsec_softcs.lock);
191 	return 0;
192 }
193 
194 static void
195 if_ipsec_attach0(struct ipsec_softc *sc)
196 {
197 
198 	sc->ipsec_if.if_addrlen = 0;
199 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
200 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
201 	/* set ipsec(4) specific default flags. */
202 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
203 	sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
204 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
205 	sc->ipsec_if.if_output = if_ipsec_output;
206 	sc->ipsec_if.if_type   = IFT_IPSEC;
207 	sc->ipsec_if.if_dlt    = DLT_NULL;
208 	sc->ipsec_if.if_softc  = sc;
209 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
210 	if_initialize(&sc->ipsec_if);
211 	if_alloc_sadl(&sc->ipsec_if);
212 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
213 	if_register(&sc->ipsec_if);
214 }
215 
216 static void
217 if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
218 {
219 	struct ipsec_ro *iro = p;
220 
221 	mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE);
222 }
223 
224 static void
225 if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
226 {
227 	struct ipsec_ro *iro = p;
228 
229 	rtcache_free(&iro->ir_ro);
230 
231 	mutex_destroy(&iro->ir_lock);
232 }
233 
234 static int
235 if_ipsec_clone_destroy(struct ifnet *ifp)
236 {
237 	struct ipsec_softc *sc = ifp->if_softc;
238 	struct ipsec_variant *var;
239 	int bound;
240 
241 	mutex_enter(&ipsec_softcs.lock);
242 	LIST_REMOVE(sc, ipsec_list);
243 	mutex_exit(&ipsec_softcs.lock);
244 
245 	bound = curlwp_bind();
246 	if_ipsec_delete_tunnel(&sc->ipsec_if);
247 	curlwp_bindx(bound);
248 
249 	bpf_detach(ifp);
250 	if_detach(ifp);
251 
252 	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL);
253 	percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro));
254 
255 	mutex_destroy(&sc->ipsec_lock);
256 
257 	var = sc->ipsec_var;
258 	kmem_free(var, sizeof(*var));
259 	kmem_free(sc, sizeof(*sc));
260 
261 	return 0;
262 }
263 
264 static inline bool
265 if_ipsec_nat_t(struct ipsec_softc *sc)
266 {
267 
268 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
269 }
270 
271 static inline bool
272 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
273 {
274 
275 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
276 }
277 
278 int
279 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
280 {
281 	struct ip ip;
282 	struct ipsec_softc *sc;
283 	struct ipsec_variant *var = NULL;
284 	struct psref psref;
285 	int ret = 0;
286 
287 	sc = arg;
288 	KASSERT(sc != NULL);
289 
290 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
291 		goto out;
292 
293 	var = if_ipsec_getref_variant(sc, &psref);
294 	if (if_ipsec_variant_is_unconfigured(var))
295 		goto out;
296 
297 	switch (proto) {
298 	case IPPROTO_IPV4:
299 	case IPPROTO_IPV6:
300 		break;
301 	default:
302 		goto out;
303 	}
304 
305 	if (m->m_pkthdr.len < sizeof(ip))
306 		goto out;
307 
308 	m_copydata(m, 0, sizeof(ip), &ip);
309 	switch (ip.ip_v) {
310 #ifdef INET
311 	case IPVERSION:
312 		if (var->iv_psrc->sa_family != AF_INET ||
313 		    var->iv_pdst->sa_family != AF_INET)
314 			goto out;
315 		ret = ipsecif4_encap_func(m, &ip, var);
316 		break;
317 #endif
318 	default:
319 		goto out;
320 	}
321 
322 out:
323 	if (var != NULL)
324 		if_ipsec_putref_variant(var, &psref);
325 	return ret;
326 }
327 
328 /*
329  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
330  * We'll prevent this by introducing upper limit.
331  */
332 static int
333 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
334 {
335 
336 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
337 }
338 
339 int
340 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
341     const struct rtentry *rt)
342 {
343 	struct ipsec_softc *sc = ifp->if_softc;
344 	struct ipsec_variant *var;
345 	struct psref psref;
346 	int error;
347 	int bound;
348 
349 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
350 
351 	error = if_ipsec_check_nesting(ifp, m);
352 	if (error) {
353 		m_freem(m);
354 		goto noref_end;
355 	}
356 
357 	if ((ifp->if_flags & IFF_UP) == 0) {
358 		m_freem(m);
359 		error = ENETDOWN;
360 		goto noref_end;
361 	}
362 
363 
364 	bound = curlwp_bind();
365 	var = if_ipsec_getref_variant(sc, &psref);
366 	if (if_ipsec_variant_is_unconfigured(var)) {
367 		m_freem(m);
368 		error = ENETDOWN;
369 		goto end;
370 	}
371 
372 	m->m_flags &= ~(M_BCAST|M_MCAST);
373 
374 	/* use DLT_NULL encapsulation here to pass inner af type */
375 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
376 	if (!m) {
377 		error = ENOBUFS;
378 		goto end;
379 	}
380 	*mtod(m, int *) = dst->sa_family;
381 
382 #if INET6
383 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
384 	if (dst->sa_family == AF_INET6 &&
385 	    !if_ipsec_fwd_ipv6(sc)) {
386 		/*
387 		 * IPv6 packet is not allowed to forward,that is not error.
388 		 */
389 		error = 0;
390 		IF_DROP(&ifp->if_snd);
391 		m_freem(m);
392 		goto end;
393 	}
394 #endif
395 
396 	error = if_ipsec_out_direct(var, m, dst->sa_family);
397 
398 end:
399 	if_ipsec_putref_variant(var, &psref);
400 	curlwp_bindx(bound);
401 noref_end:
402 	if (error)
403 		ifp->if_oerrors++;
404 
405 	return error;
406 }
407 
408 static inline int
409 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
410 {
411 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
412 	int error;
413 	int len;
414 
415 	KASSERT(if_ipsec_heldref_variant(var));
416 	KASSERT(var->iv_output != NULL);
417 
418 	len = m->m_pkthdr.len;
419 
420 	/* input DLT_NULL frame to BPF */
421 	bpf_mtap(ifp, m);
422 
423 	/* grab and chop off inner af type */
424 	/* XXX need pullup? */
425 	m_adj(m, sizeof(int));
426 
427 	error = var->iv_output(var, family, m);
428 	if (error)
429 		return error;
430 
431 	ifp->if_opackets++;
432 	ifp->if_obytes += len;
433 
434 	return 0;
435 }
436 
437 void
438 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
439 {
440 
441 	KASSERT(ifp != NULL);
442 
443 	m_set_rcvif(m, ifp);
444 
445 	bpf_mtap_af(ifp, af, m);
446 
447 	if_ipsec_in_enqueue(m, af, ifp);
448 
449 	return;
450 }
451 
452 static inline void
453 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
454 {
455 	pktqueue_t *pktq;
456 	int pktlen;
457 
458 	/*
459 	 * Put the packet to the network layer input queue according to the
460 	 * specified address family.
461 	 */
462 	switch (af) {
463 #ifdef INET
464 	case AF_INET:
465 		pktq = ip_pktq;
466 		break;
467 #endif
468 #ifdef INET6
469 	case AF_INET6:
470 		pktq = ip6_pktq;
471 		break;
472 #endif
473 	default:
474 		ifp->if_ierrors++;
475 		m_freem(m);
476 		return;
477 	}
478 
479 #if 1
480 	const u_int h = curcpu()->ci_index;
481 #else
482 	const uint32_t h = pktq_rps_hash(m);
483 #endif
484 	pktlen = m->m_pkthdr.len;
485 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
486 		ifp->if_ibytes += pktlen;
487 		ifp->if_ipackets++;
488 	} else {
489 		m_freem(m);
490 	}
491 
492 	return;
493 }
494 
495 static inline int
496 if_ipsec_check_salen(struct sockaddr *addr)
497 {
498 
499 	switch (addr->sa_family) {
500 #ifdef INET
501 	case AF_INET:
502 		if (addr->sa_len != sizeof(struct sockaddr_in))
503 			return EINVAL;
504 		break;
505 #endif /* INET */
506 #ifdef INET6
507 	case AF_INET6:
508 		if (addr->sa_len != sizeof(struct sockaddr_in6))
509 			return EINVAL;
510 		break;
511 #endif /* INET6 */
512 	default:
513 		return EAFNOSUPPORT;
514 	}
515 
516 	return 0;
517 }
518 
519 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
520 int
521 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
522 {
523 	struct ipsec_softc *sc  = ifp->if_softc;
524 	struct ipsec_variant *var = NULL;
525 	struct ifreq     *ifr = (struct ifreq*)data;
526 	struct ifaddr    *ifa = (struct ifaddr*)data;
527 	int error = 0, size;
528 	struct sockaddr *dst, *src;
529 	u_long mtu;
530 	short oflags = ifp->if_flags;
531 	int bound;
532 	struct psref psref;
533 
534 	switch (cmd) {
535 	case SIOCINITIFADDR:
536 		ifp->if_flags |= IFF_UP;
537 		ifa->ifa_rtrequest = p2p_rtrequest;
538 		break;
539 
540 	case SIOCSIFDSTADDR:
541 		break;
542 
543 	case SIOCADDMULTI:
544 	case SIOCDELMULTI:
545 		switch (ifr->ifr_addr.sa_family) {
546 #ifdef INET
547 		case AF_INET:	/* IP supports Multicast */
548 			break;
549 #endif /* INET */
550 #ifdef INET6
551 		case AF_INET6:	/* IP6 supports Multicast */
552 			break;
553 #endif /* INET6 */
554 		default:  /* Other protocols doesn't support Multicast */
555 			error = EAFNOSUPPORT;
556 			break;
557 		}
558 		break;
559 
560 	case SIOCSIFMTU:
561 		mtu = ifr->ifr_mtu;
562 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
563 			return EINVAL;
564 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
565 			error = 0;
566 		break;
567 
568 #ifdef INET
569 	case SIOCSIFPHYADDR:
570 #endif
571 #ifdef INET6
572 	case SIOCSIFPHYADDR_IN6:
573 #endif /* INET6 */
574 	case SIOCSLIFPHYADDR:
575 		switch (cmd) {
576 #ifdef INET
577 		case SIOCSIFPHYADDR:
578 			src = (struct sockaddr *)
579 				&(((struct in_aliasreq *)data)->ifra_addr);
580 			dst = (struct sockaddr *)
581 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
582 			break;
583 #endif /* INET */
584 #ifdef INET6
585 		case SIOCSIFPHYADDR_IN6:
586 			src = (struct sockaddr *)
587 				&(((struct in6_aliasreq *)data)->ifra_addr);
588 			dst = (struct sockaddr *)
589 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
590 			break;
591 #endif /* INET6 */
592 		case SIOCSLIFPHYADDR:
593 			src = (struct sockaddr *)
594 				&(((struct if_laddrreq *)data)->addr);
595 			dst = (struct sockaddr *)
596 				&(((struct if_laddrreq *)data)->dstaddr);
597 			break;
598 		default:
599 			return EINVAL;
600 		}
601 
602 		/* sa_family must be equal */
603 		if (src->sa_family != dst->sa_family)
604 			return EINVAL;
605 
606 		error = if_ipsec_check_salen(src);
607 		if (error)
608 			return error;
609 		error = if_ipsec_check_salen(dst);
610 		if (error)
611 			return error;
612 
613 		/* check sa_family looks sane for the cmd */
614 		switch (cmd) {
615 #ifdef INET
616 		case SIOCSIFPHYADDR:
617 			if (src->sa_family == AF_INET)
618 				break;
619 			return EAFNOSUPPORT;
620 #endif /* INET */
621 #ifdef INET6
622 		case SIOCSIFPHYADDR_IN6:
623 			if (src->sa_family == AF_INET6)
624 				break;
625 			return EAFNOSUPPORT;
626 #endif /* INET6 */
627 		case SIOCSLIFPHYADDR:
628 			/* checks done in the above */
629 			break;
630 		}
631 		/*
632 		 * calls if_ipsec_getref_variant() for other softcs to check
633 		 * address pair duplicattion
634 		 */
635 		bound = curlwp_bind();
636 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
637 		if (error)
638 			goto bad;
639 		break;
640 
641 	case SIOCDIFPHYADDR:
642 		bound = curlwp_bind();
643 		if_ipsec_delete_tunnel(&sc->ipsec_if);
644 		curlwp_bindx(bound);
645 		break;
646 
647 	case SIOCGIFPSRCADDR:
648 #ifdef INET6
649 	case SIOCGIFPSRCADDR_IN6:
650 #endif /* INET6 */
651 		bound = curlwp_bind();
652 		var = if_ipsec_getref_variant(sc, &psref);
653 		if (var->iv_psrc == NULL) {
654 			error = EADDRNOTAVAIL;
655 			goto bad;
656 		}
657 		src = var->iv_psrc;
658 		switch (cmd) {
659 #ifdef INET
660 		case SIOCGIFPSRCADDR:
661 			dst = &ifr->ifr_addr;
662 			size = sizeof(ifr->ifr_addr);
663 			break;
664 #endif /* INET */
665 #ifdef INET6
666 		case SIOCGIFPSRCADDR_IN6:
667 			dst = (struct sockaddr *)
668 				&(((struct in6_ifreq *)data)->ifr_addr);
669 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
670 			break;
671 #endif /* INET6 */
672 		default:
673 			error = EADDRNOTAVAIL;
674 			goto bad;
675 		}
676 		if (src->sa_len > size) {
677 			error = EINVAL;
678 			goto bad;
679 		}
680 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
681 		if (error)
682 			goto bad;
683 		if_ipsec_putref_variant(var, &psref);
684 		curlwp_bindx(bound);
685 		break;
686 
687 	case SIOCGIFPDSTADDR:
688 #ifdef INET6
689 	case SIOCGIFPDSTADDR_IN6:
690 #endif /* INET6 */
691 		bound = curlwp_bind();
692 		var = if_ipsec_getref_variant(sc, &psref);
693 		if (var->iv_pdst == NULL) {
694 			error = EADDRNOTAVAIL;
695 			goto bad;
696 		}
697 		src = var->iv_pdst;
698 		switch (cmd) {
699 #ifdef INET
700 		case SIOCGIFPDSTADDR:
701 			dst = &ifr->ifr_addr;
702 			size = sizeof(ifr->ifr_addr);
703 			break;
704 #endif /* INET */
705 #ifdef INET6
706 		case SIOCGIFPDSTADDR_IN6:
707 			dst = (struct sockaddr *)
708 				&(((struct in6_ifreq *)data)->ifr_addr);
709 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
710 			break;
711 #endif /* INET6 */
712 		default:
713 			error = EADDRNOTAVAIL;
714 			goto bad;
715 		}
716 		if (src->sa_len > size) {
717 			error = EINVAL;
718 			goto bad;
719 		}
720 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
721 		if (error)
722 			goto bad;
723 		if_ipsec_putref_variant(var, &psref);
724 		curlwp_bindx(bound);
725 		break;
726 
727 	case SIOCGLIFPHYADDR:
728 		bound = curlwp_bind();
729 		var = if_ipsec_getref_variant(sc, &psref);
730 		if (if_ipsec_variant_is_unconfigured(var)) {
731 			error = EADDRNOTAVAIL;
732 			goto bad;
733 		}
734 
735 		/* copy src */
736 		src = var->iv_psrc;
737 		dst = (struct sockaddr *)
738 			&(((struct if_laddrreq *)data)->addr);
739 		size = sizeof(((struct if_laddrreq *)data)->addr);
740 		if (src->sa_len > size) {
741 			error = EINVAL;
742 			goto bad;
743 		}
744 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
745 		if (error)
746 			goto bad;
747 
748 		/* copy dst */
749 		src = var->iv_pdst;
750 		dst = (struct sockaddr *)
751 			&(((struct if_laddrreq *)data)->dstaddr);
752 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
753 		if (src->sa_len > size) {
754 			error = EINVAL;
755 			goto bad;
756 		}
757 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
758 		if (error)
759 			goto bad;
760 		if_ipsec_putref_variant(var, &psref);
761 		curlwp_bindx(bound);
762 		break;
763 
764 	default:
765 		error = ifioctl_common(ifp, cmd, data);
766 		if (!error) {
767 			bound = curlwp_bind();
768 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
769 			if (error)
770 				goto bad;
771 		}
772 		break;
773 	}
774 	return error;
775 
776 bad:
777 	if (var != NULL)
778 		if_ipsec_putref_variant(var, &psref);
779 	curlwp_bindx(bound);
780 
781 	return error;
782 }
783 
784 struct encap_funcs {
785 	int (*ef_inet)(struct ipsec_variant *);
786 	int (*ef_inet6)(struct ipsec_variant *);
787 };
788 
789 static struct encap_funcs ipsec_encap_attach = {
790 	.ef_inet = ipsecif4_attach,
791 	.ef_inet6 = &ipsecif6_attach,
792 };
793 
794 static struct encap_funcs ipsec_encap_detach = {
795 	.ef_inet = ipsecif4_detach,
796 	.ef_inet6 = &ipsecif6_detach,
797 };
798 
799 static int
800 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
801 {
802 	int error;
803 
804 	KASSERT(var != NULL);
805 	KASSERT(if_ipsec_variant_is_configured(var));
806 
807 	switch (var->iv_psrc->sa_family) {
808 #ifdef INET
809 	case AF_INET:
810 		error = (funcs->ef_inet)(var);
811 		break;
812 #endif /* INET */
813 #ifdef INET6
814 	case AF_INET6:
815 		error = (funcs->ef_inet6)(var);
816 		break;
817 #endif /* INET6 */
818 	default:
819 		error = EINVAL;
820 		break;
821 	}
822 
823 	return error;
824 }
825 
826 static int
827 if_ipsec_encap_attach(struct ipsec_variant *var)
828 {
829 
830 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
831 }
832 
833 static int
834 if_ipsec_encap_detach(struct ipsec_variant *var)
835 {
836 
837 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
838 }
839 
840 /*
841  * Validate and set ipsec(4) I/F configurations.
842  *     (1) validate
843  *         (1-1) Check the argument src and dst address pair will change
844  *               configuration from current src and dst address pair.
845  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
846  *               with argument src and dst address pair, except for NAT-T shared
847  *               tunnels.
848  *     (2) set
849  *         (2-1) Create variant for new configuration.
850  *         (2-2) Create temporary "null" variant used to avoid to access
851  *               dangling variant while SPs are deleted and added.
852  *         (2-3) Swap variant include its SPs.
853  *         (2-4) Cleanup last configurations.
854  */
855 static int
856 if_ipsec_set_tunnel(struct ifnet *ifp,
857     struct sockaddr *src, struct sockaddr *dst)
858 {
859 	struct ipsec_softc *sc = ifp->if_softc;
860 	struct ipsec_softc *sc2;
861 	struct ipsec_variant *ovar, *nvar, *nullvar;
862 	struct sockaddr *osrc, *odst;
863 	struct sockaddr *nsrc, *ndst;
864 	in_port_t nsport = 0, ndport = 0;
865 	int error;
866 
867 	error = encap_lock_enter();
868 	if (error)
869 		return error;
870 
871 	nsrc = sockaddr_dup(src, M_WAITOK);
872 	ndst = sockaddr_dup(dst, M_WAITOK);
873 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
874 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
875 
876 	mutex_enter(&sc->ipsec_lock);
877 
878 	ovar = sc->ipsec_var;
879 
880 	switch(nsrc->sa_family) {
881 #ifdef INET
882 	case AF_INET:
883 		nsport = ntohs(satosin(src)->sin_port);
884 		/*
885 		 * avoid confuse SP when NAT-T disabled,
886 		 * e.g.
887 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
888 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
889 		 */
890 		satosin(nsrc)->sin_port = 0;
891 		ndport = ntohs(satosin(dst)->sin_port);
892 		satosin(ndst)->sin_port = 0;
893 		break;
894 #endif /* INET */
895 #ifdef INET6
896 	case AF_INET6:
897 		nsport = ntohs(satosin6(src)->sin6_port);
898 		satosin6(nsrc)->sin6_port = 0;
899 		ndport = ntohs(satosin6(dst)->sin6_port);
900 		satosin6(ndst)->sin6_port = 0;
901 		break;
902 #endif /* INET6 */
903 	default:
904 		log(LOG_DEBUG,
905 		    "%s: Invalid address family: %d.\n",
906 		    __func__, src->sa_family);
907 		error = EINVAL;
908 		goto out;
909 	}
910 
911 	/*
912 	 * (1-1) Check the argument src and dst address pair will change
913 	 *       configuration from current src and dst address pair.
914 	 */
915 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
916 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
917 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
918 		/* address and port pair not changed. */
919 		error = 0;
920 		goto out;
921 	}
922 
923 	/*
924 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
925 	 *       with argument src and dst address pair, except for NAT-T shared
926 	 *       tunnels.
927 	 */
928 	mutex_enter(&ipsec_softcs.lock);
929 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
930 		struct ipsec_variant *var2;
931 		struct psref psref;
932 
933 		if (sc2 == sc)
934 			continue;
935 		var2 = if_ipsec_getref_variant(sc2, &psref);
936 		if (if_ipsec_variant_is_unconfigured(var2)) {
937 			if_ipsec_putref_variant(var2, &psref);
938 			continue;
939 		}
940 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
941 			if_ipsec_putref_variant(var2, &psref);
942 			continue; /* NAT-T shared tunnel */
943 		}
944 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
945 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
946 			if_ipsec_putref_variant(var2, &psref);
947 			mutex_exit(&ipsec_softcs.lock);
948 			error = EADDRNOTAVAIL;
949 			goto out;
950 		}
951 
952 		if_ipsec_putref_variant(var2, &psref);
953 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
954 	}
955 	mutex_exit(&ipsec_softcs.lock);
956 
957 
958 	osrc = ovar->iv_psrc;
959 	odst = ovar->iv_pdst;
960 
961 	/*
962 	 * (2-1) Create ipsec_variant for new configuration.
963 	 */
964 	if_ipsec_copy_variant(nvar, ovar);
965 	nvar->iv_psrc = nsrc;
966 	nvar->iv_pdst = ndst;
967 	nvar->iv_sport = nsport;
968 	nvar->iv_dport = ndport;
969 	nvar->iv_encap_cookie4 = NULL;
970 	nvar->iv_encap_cookie6 = NULL;
971 	psref_target_init(&nvar->iv_psref, iv_psref_class);
972 	error = if_ipsec_encap_attach(nvar);
973 	if (error)
974 		goto out;
975 
976 	/*
977 	 * (2-2) Create temporary "null" variant.
978 	 */
979 	if_ipsec_copy_variant(nullvar, ovar);
980 	if_ipsec_clear_config(nullvar);
981 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
982 	membar_producer();
983 	/*
984 	 * (2-3) Swap variant include its SPs.
985 	 */
986 	error = if_ipsec_update_variant(sc, nvar, nullvar);
987 	if (error) {
988 		if_ipsec_encap_detach(nvar);
989 		goto out;
990 	}
991 
992 	mutex_exit(&sc->ipsec_lock);
993 
994 	/*
995 	 * (2-4) Cleanup last configurations.
996 	 */
997 	if (if_ipsec_variant_is_configured(ovar))
998 		if_ipsec_encap_detach(ovar);
999 	encap_lock_exit();
1000 
1001 	if (osrc != NULL)
1002 		sockaddr_free(osrc);
1003 	if (odst != NULL)
1004 		sockaddr_free(odst);
1005 	kmem_free(ovar, sizeof(*ovar));
1006 	kmem_free(nullvar, sizeof(*nullvar));
1007 
1008 	return 0;
1009 
1010 out:
1011 	mutex_exit(&sc->ipsec_lock);
1012 	encap_lock_exit();
1013 
1014 	sockaddr_free(nsrc);
1015 	sockaddr_free(ndst);
1016 	kmem_free(nvar, sizeof(*nvar));
1017 	kmem_free(nullvar, sizeof(*nullvar));
1018 
1019 	return error;
1020 }
1021 
1022 /*
1023  * Validate and delete ipsec(4) I/F configurations.
1024  *     (1) validate
1025  *         (1-1) Check current src and dst address pair are null,
1026  *               which means the ipsec(4) I/F is already done deletetunnel.
1027  *     (2) delete
1028  *         (2-1) Create variant for deleted status.
1029  *         (2-2) Create temporary "null" variant used to avoid to access
1030  *               dangling variant while SPs are deleted and added.
1031  *               NOTE:
1032  *               The contents of temporary "null" variant equal to the variant
1033  *               of (2-1), however two psref_target_destroy() synchronization
1034  *               points are necessary to avoid to access dangling variant
1035  *               while SPs are deleted and added. To implement that simply,
1036  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1037  *               create extra "null" variant and use it temporarily.
1038  *         (2-3) Swap variant include its SPs.
1039  *         (2-4) Cleanup last configurations.
1040  */
1041 static void
1042 if_ipsec_delete_tunnel(struct ifnet *ifp)
1043 {
1044 	struct ipsec_softc *sc = ifp->if_softc;
1045 	struct ipsec_variant *ovar, *nvar, *nullvar;
1046 	struct sockaddr *osrc, *odst;
1047 	int error;
1048 
1049 	error = encap_lock_enter();
1050 	if (error)
1051 		return;
1052 
1053 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1054 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1055 
1056 	mutex_enter(&sc->ipsec_lock);
1057 
1058 	ovar = sc->ipsec_var;
1059 	osrc = ovar->iv_psrc;
1060 	odst = ovar->iv_pdst;
1061 	/*
1062 	 * (1-1) Check current src and dst address pair are null,
1063 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1064 	 */
1065 	if (osrc == NULL || odst == NULL) {
1066 		/* address pair not changed. */
1067 		mutex_exit(&sc->ipsec_lock);
1068 		encap_lock_exit();
1069 		kmem_free(nvar, sizeof(*nvar));
1070 		return;
1071 	}
1072 
1073 	/*
1074 	 * (2-1) Create variant for deleted status.
1075 	 */
1076 	if_ipsec_copy_variant(nvar, ovar);
1077 	if_ipsec_clear_config(nvar);
1078 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1079 
1080 	/*
1081 	 * (2-2) Create temporary "null" variant used to avoid to access
1082 	 *       dangling variant while SPs are deleted and added.
1083 	 */
1084 	if_ipsec_copy_variant(nullvar, ovar);
1085 	if_ipsec_clear_config(nullvar);
1086 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1087 	membar_producer();
1088 	/*
1089 	 * (2-3) Swap variant include its SPs.
1090 	 */
1091 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1092 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1093 
1094 	mutex_exit(&sc->ipsec_lock);
1095 
1096 	/*
1097 	 * (2-4) Cleanup last configurations.
1098 	 */
1099 	if (if_ipsec_variant_is_configured(ovar))
1100 		if_ipsec_encap_detach(ovar);
1101 	encap_lock_exit();
1102 
1103 	sockaddr_free(osrc);
1104 	sockaddr_free(odst);
1105 	kmem_free(ovar, sizeof(*ovar));
1106 	kmem_free(nullvar, sizeof(*nullvar));
1107 }
1108 
1109 /*
1110  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1111  *     (1) check
1112  *         (1-1) Check flags are changed.
1113  *         (1-2) Check current src and dst address pair. If they are null,
1114  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1115  *               not needed to update.
1116  *     (2) update
1117  *         (2-1) Create variant for new SPs.
1118  *         (2-2) Create temporary "null" variant used to avoid to access
1119  *               dangling variant while SPs are deleted and added.
1120  *               NOTE:
1121  *               There is the same problem as if_ipsec_delete_tunnel().
1122  *         (2-3) Swap variant include its SPs.
1123  *         (2-4) Cleanup unused configurations.
1124  *               NOTE: use the same encap_cookies.
1125  */
1126 static int
1127 if_ipsec_ensure_flags(struct ifnet *ifp, short oflags)
1128 {
1129 	struct ipsec_softc *sc = ifp->if_softc;
1130 	struct ipsec_variant *ovar, *nvar, *nullvar;
1131 	int error;
1132 
1133 	/*
1134 	 * (1) Check flags are changed.
1135 	 */
1136 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1137 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1138 		return 0; /* flags not changed. */
1139 
1140 	error = encap_lock_enter();
1141 	if (error)
1142 		return error;
1143 
1144 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1145 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1146 
1147 	mutex_enter(&sc->ipsec_lock);
1148 
1149 	ovar = sc->ipsec_var;
1150 	/*
1151 	 * (1-2) Check current src and dst address pair.
1152 	 */
1153 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1154 		/* nothing to do */
1155 		mutex_exit(&sc->ipsec_lock);
1156 		return 0;
1157 	}
1158 
1159 	/*
1160 	 * (2-1) Create variant for new SPs.
1161 	 */
1162 	if_ipsec_copy_variant(nvar, ovar);
1163 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1164 	/*
1165 	 * (2-2) Create temporary "null" variant used to avoid to access
1166 	 *       dangling variant while SPs are deleted and added.
1167 	 */
1168 	if_ipsec_copy_variant(nullvar, ovar);
1169 	if_ipsec_clear_config(nullvar);
1170 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1171 	membar_producer();
1172 	/*
1173 	 * (2-3) Swap variant include its SPs.
1174 	 */
1175 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1176 
1177 	mutex_exit(&sc->ipsec_lock);
1178 	encap_lock_exit();
1179 
1180 	/*
1181 	 * (2-4) Cleanup unused configurations.
1182 	 */
1183 	if (!error)
1184 		kmem_free(ovar, sizeof(*ovar));
1185 	else
1186 		kmem_free(nvar, sizeof(*ovar));
1187 	kmem_free(nullvar, sizeof(*nullvar));
1188 
1189 	return error;
1190 }
1191 
1192 /*
1193  * SPD management
1194  */
1195 
1196 /*
1197  * Share SP set with other NAT-T ipsec(4) I/F(s).
1198  *     Return 1, when "var" shares SP set.
1199  *     Return 0, when "var" cannot share SP set.
1200  *
1201  * NOTE:
1202  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1203  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1204  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1205  * set_tunnel causes race.
1206  * Currently, (fortunately) encap_lock works as this global lock.
1207  */
1208 static int
1209 if_ipsec_share_sp(struct ipsec_variant *var)
1210 {
1211 	struct ipsec_softc *sc = var->iv_softc;
1212 	struct ipsec_softc *sc2;
1213 	struct ipsec_variant *var2;
1214 	struct psref psref;
1215 
1216 	KASSERT(encap_lock_held());
1217 	KASSERT(var->iv_pdst != NULL && var->iv_pdst != NULL);
1218 
1219 	mutex_enter(&ipsec_softcs.lock);
1220 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1221 		if (sc2 == sc)
1222 			continue;
1223 		var2 = if_ipsec_getref_variant(sc2, &psref);
1224 		if (if_ipsec_variant_is_unconfigured(var2)) {
1225 			if_ipsec_putref_variant(var2, &psref);
1226 			continue;
1227 		}
1228 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1229 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1230 			if_ipsec_putref_variant(var2, &psref);
1231 			continue;
1232 		}
1233 
1234 		break;
1235 	}
1236 	mutex_exit(&ipsec_softcs.lock);
1237 	if (sc2 == NULL)
1238 		return 0; /* not shared */
1239 
1240 	IV_SP_IN(var) = IV_SP_IN(var2);
1241 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1242 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1243 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1244 
1245 	if_ipsec_putref_variant(var2, &psref);
1246 	return 1; /* shared */
1247 }
1248 
1249 /*
1250  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1251  *     Return 1, when "var" shared SP set, and then unshare them.
1252  *     Return 0, when "var" did not share SP set.
1253  *
1254  * NOTE:
1255  * See if_ipsec_share_sp()'s note.
1256  */
1257 static int
1258 if_ipsec_unshare_sp(struct ipsec_variant *var)
1259 {
1260 	struct ipsec_softc *sc = var->iv_softc;
1261 	struct ipsec_softc *sc2;
1262 	struct ipsec_variant *var2;
1263 	struct psref psref;
1264 
1265 	KASSERT(encap_lock_held());
1266 
1267 	if (!var->iv_pdst || !var->iv_psrc)
1268 		return 0;
1269 
1270 	mutex_enter(&ipsec_softcs.lock);
1271 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1272 		if (sc2 == sc)
1273 			continue;
1274 		var2 = if_ipsec_getref_variant(sc2, &psref);
1275 		if (!var2->iv_pdst || !var2->iv_psrc) {
1276 			if_ipsec_putref_variant(var2, &psref);
1277 			continue;
1278 		}
1279 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1280 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1281 			if_ipsec_putref_variant(var2, &psref);
1282 			continue;
1283 		}
1284 
1285 		break;
1286 	}
1287 	mutex_exit(&ipsec_softcs.lock);
1288 	if (sc2 == NULL)
1289 		return 0; /* not shared */
1290 
1291 	IV_SP_IN(var) = NULL;
1292 	IV_SP_IN6(var) = NULL;
1293 	IV_SP_OUT(var) = NULL;
1294 	IV_SP_OUT6(var) = NULL;
1295 	if_ipsec_putref_variant(var2, &psref);
1296 	return 1; /* shared */
1297 }
1298 
1299 static inline void
1300 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1301 {
1302 	struct mbuf *m;
1303 
1304 	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1305 	m->m_len = PFKEY_ALIGN8(len);
1306 	m_copyback(m, 0, len, data);
1307 	m_cat(m0, m);
1308 }
1309 
1310 static inline void
1311 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1312 {
1313 	struct mbuf *m;
1314 
1315 	if (len == 0)
1316 		return;
1317 
1318 	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1319 	m->m_len = len;
1320 	m_cat(m0, m);
1321 }
1322 
1323 static inline size_t
1324 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1325     int proto, uint16_t exttype)
1326 {
1327 	size_t size;
1328 
1329 	KASSERT(saaddr != NULL);
1330 	KASSERT(addr != NULL);
1331 
1332 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1333 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1334 	saaddr->sadb_address_exttype = exttype;
1335 	saaddr->sadb_address_proto = proto;
1336 	switch (addr->sa_family) {
1337 #ifdef INET
1338 	case AF_INET:
1339 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1340 		break;
1341 #endif /* INET */
1342 #ifdef INET6
1343 	case AF_INET6:
1344 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1345 		break;
1346 #endif /* INET6 */
1347 	default:
1348 		log(LOG_DEBUG,
1349 		    "%s: Invalid address family: %d.\n",
1350 		    __func__, addr->sa_family);
1351 		break;
1352 	}
1353 	saaddr->sadb_address_reserved = 0;
1354 
1355 	return size;
1356 }
1357 
1358 static inline size_t
1359 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1360     int proto)
1361 {
1362 
1363 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1364 	    SADB_EXT_ADDRESS_SRC);
1365 }
1366 
1367 static inline size_t
1368 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1369     int proto)
1370 {
1371 
1372 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1373 	    SADB_EXT_ADDRESS_DST);
1374 }
1375 
1376 static inline size_t
1377 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1378     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1379     uint8_t level)
1380 {
1381 	size_t size;
1382 
1383 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1384 
1385 	size = sizeof(*xpl);
1386 	if (policy == IPSEC_POLICY_IPSEC) {
1387 		size += PFKEY_ALIGN8(sizeof(*xisr));
1388 	}
1389 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1390 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1391 	xpl->sadb_x_policy_type = policy;
1392 	xpl->sadb_x_policy_dir = dir;
1393 	xpl->sadb_x_policy_reserved = 0;
1394 	xpl->sadb_x_policy_id = id;
1395 	xpl->sadb_x_policy_reserved2 = 0;
1396 
1397 	if (policy == IPSEC_POLICY_IPSEC) {
1398 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1399 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1400 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1401 		xisr->sadb_x_ipsecrequest_level = level;
1402 		xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1403 	}
1404 
1405 	return size;
1406 }
1407 
1408 static inline void
1409 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1410 {
1411 
1412 	KASSERT(msg != NULL);
1413 
1414 	msg->sadb_msg_version = PF_KEY_V2;
1415 	msg->sadb_msg_type = msgtype;
1416 	msg->sadb_msg_errno = 0;
1417 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1418 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1419 	msg->sadb_msg_reserved = 0;
1420 	msg->sadb_msg_seq = 0; /* XXXX */
1421 	msg->sadb_msg_pid = 0; /* XXXX */
1422 }
1423 
1424 static inline void
1425 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1426 {
1427 
1428 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1429 }
1430 
1431 static inline void
1432 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1433 {
1434 
1435 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1436 }
1437 
1438 static int
1439 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1440     in_port_t port)
1441 {
1442 	int error = 0;
1443 
1444 	sockaddr_copy(addrport, addr->sa_len, addr);
1445 
1446 	switch (addr->sa_family) {
1447 #ifdef INET
1448 	case AF_INET: {
1449 		struct sockaddr_in *sin = satosin(addrport);
1450 		sin->sin_port = htons(port);
1451 		break;
1452 	}
1453 #endif /* INET */
1454 #ifdef INET6
1455 	case AF_INET6: {
1456 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1457 		sin6->sin6_port = htons(port);
1458 		break;
1459 	}
1460 #endif /* INET6 */
1461 	default:
1462 		log(LOG_DEBUG,
1463 		    "%s: Invalid address family: %d.\n",
1464 		    __func__, addr->sa_family);
1465 		error = EINVAL;
1466 	}
1467 
1468 	return error;
1469 }
1470 
1471 static struct secpolicy *
1472 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1473     struct sockaddr *dst, in_port_t dport,
1474     int dir, int proto, int level, u_int policy)
1475 {
1476 	struct sadb_msg msg;
1477 	struct sadb_address xsrc, xdst;
1478 	struct sadb_x_policy xpl;
1479 	struct sadb_x_ipsecrequest xisr;
1480 	size_t size;
1481 	size_t padlen;
1482 	uint16_t ext_msg_len = 0;
1483 	struct mbuf *m;
1484 
1485 	memset(&msg, 0, sizeof(msg));
1486 	memset(&xsrc, 0, sizeof(xsrc));
1487 	memset(&xdst, 0, sizeof(xdst));
1488 	memset(&xpl, 0, sizeof(xpl));
1489 	memset(&xisr, 0, sizeof(xisr));
1490 
1491 	MGETHDR(m, M_WAITOK, MT_DATA);
1492 
1493 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1494 	ext_msg_len += PFKEY_UNIT64(size);
1495 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1496 	ext_msg_len += PFKEY_UNIT64(size);
1497 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level);
1498 	ext_msg_len += PFKEY_UNIT64(size);
1499 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1500 
1501 	/* build PF_KEY message */
1502 
1503 	m->m_len = sizeof(msg);
1504 	m_copyback(m, 0, sizeof(msg), &msg);
1505 
1506 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1507 	if (sport == 0) {
1508 		if_ipsec_add_mbuf(m, src, src->sa_len);
1509 	} else {
1510 		struct sockaddr addrport;
1511 
1512 		if_ipsec_set_addr_port(&addrport, src, sport);
1513 		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
1514 	}
1515 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1516 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1517 	if_ipsec_add_pad(m, padlen);
1518 
1519 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1520 	if (dport == 0) {
1521 		if_ipsec_add_mbuf(m, dst, dst->sa_len);
1522 	} else {
1523 		struct sockaddr addrport;
1524 
1525 		if_ipsec_set_addr_port(&addrport, dst, dport);
1526 		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
1527 	}
1528 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1529 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1530 	if_ipsec_add_pad(m, padlen);
1531 
1532 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1533 	if (policy == IPSEC_POLICY_IPSEC)
1534 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1535 
1536 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1537 	return key_kpi_spdadd(m);
1538 }
1539 
1540 static int
1541 if_ipsec_add_sp(struct ipsec_variant *var,
1542     struct sockaddr *src, in_port_t sport,
1543     struct sockaddr *dst, in_port_t dport)
1544 {
1545 	struct ipsec_softc *sc = var->iv_softc;
1546 	int level;
1547 	u_int v6policy;
1548 
1549 	/*
1550 	 * must delete sp before add it.
1551 	 */
1552 	KASSERT(IV_SP_IN(var) == NULL);
1553 	KASSERT(IV_SP_OUT(var) == NULL);
1554 	KASSERT(IV_SP_IN6(var) == NULL);
1555 	KASSERT(IV_SP_OUT6(var) == NULL);
1556 
1557 	/*
1558 	 * can be shared?
1559 	 */
1560 	if (if_ipsec_share_sp(var))
1561 		return 0;
1562 
1563 	if (if_ipsec_nat_t(sc))
1564 		level = IPSEC_LEVEL_REQUIRE;
1565 	else
1566 		level = IPSEC_LEVEL_UNIQUE;
1567 
1568 	if (if_ipsec_fwd_ipv6(sc))
1569 		v6policy = IPSEC_POLICY_IPSEC;
1570 	else
1571 		v6policy = IPSEC_POLICY_DISCARD;
1572 
1573 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1574 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1575 	if (IV_SP_IN(var) == NULL)
1576 		goto fail;
1577 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1578 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1579 	if (IV_SP_OUT(var) == NULL)
1580 		goto fail;
1581 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1582 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1583 	if (IV_SP_IN6(var) == NULL)
1584 		goto fail;
1585 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1586 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1587 	if (IV_SP_OUT6(var) == NULL)
1588 		goto fail;
1589 
1590 	return 0;
1591 
1592 fail:
1593 	if (IV_SP_IN6(var) != NULL) {
1594 		if_ipsec_del_sp0(IV_SP_IN6(var));
1595 		IV_SP_IN6(var) = NULL;
1596 	}
1597 	if (IV_SP_OUT(var) != NULL) {
1598 		if_ipsec_del_sp0(IV_SP_OUT(var));
1599 		IV_SP_OUT(var) = NULL;
1600 	}
1601 	if (IV_SP_IN(var) != NULL) {
1602 		if_ipsec_del_sp0(IV_SP_IN(var));
1603 		IV_SP_IN(var) = NULL;
1604 	}
1605 
1606 	return EEXIST;
1607 }
1608 
1609 static int
1610 if_ipsec_del_sp0(struct secpolicy *sp)
1611 {
1612 	struct sadb_msg msg;
1613 	struct sadb_x_policy xpl;
1614 	size_t size;
1615 	uint16_t ext_msg_len = 0;
1616 	int error;
1617 	struct mbuf *m;
1618 
1619 	if (sp == NULL)
1620 		return 0;
1621 
1622 	memset(&msg, 0, sizeof(msg));
1623 	memset(&xpl, 0, sizeof(xpl));
1624 
1625 	MGETHDR(m, M_WAITOK, MT_DATA);
1626 
1627 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0);
1628 	ext_msg_len += PFKEY_UNIT64(size);
1629 
1630 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1631 
1632 	m->m_len = sizeof(msg);
1633 	m_copyback(m, 0, sizeof(msg), &msg);
1634 
1635 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1636 
1637 	/*  unreference correspond to key_kpi_spdadd(). */
1638 	KEY_SP_UNREF(&sp);
1639 	error = key_kpi_spddelete2(m);
1640 	if (error != 0) {
1641 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1642 		    __func__, sp->id, error);
1643 	}
1644 	return error;
1645 }
1646 
1647 static void
1648 if_ipsec_del_sp(struct ipsec_variant *var)
1649 {
1650 
1651 	/* are the SPs shared? */
1652 	if (if_ipsec_unshare_sp(var))
1653 		return;
1654 
1655 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1656 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1657 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1658 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1659 	IV_SP_IN(var) = NULL;
1660 	IV_SP_IN6(var) = NULL;
1661 	IV_SP_OUT(var) = NULL;
1662 	IV_SP_OUT6(var) = NULL;
1663 }
1664 
1665 static int
1666 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1667     struct ipsec_variant *nvar)
1668 {
1669 	in_port_t src_port = 0;
1670 	in_port_t dst_port = 0;
1671 	struct sockaddr *src;
1672 	struct sockaddr *dst;
1673 	int error = 0;
1674 
1675 	KASSERT(mutex_owned(&sc->ipsec_lock));
1676 
1677 	if_ipsec_del_sp(ovar);
1678 
1679 	src = nvar->iv_psrc;
1680 	dst = nvar->iv_pdst;
1681 	if (if_ipsec_nat_t(sc)) {
1682 		/* NAT-T enabled */
1683 		src_port = nvar->iv_sport;
1684 		dst_port = nvar->iv_dport;
1685 	}
1686 	if (src && dst)
1687 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1688 
1689 	return error;
1690 }
1691 
1692 /*
1693  * ipsec_variant and its SPs update API.
1694  *
1695  * Assumption:
1696  * reader side dereferences sc->ipsec_var in reader critical section only,
1697  * that is, all of reader sides do not reader the sc->ipsec_var after
1698  * pserialize_perform().
1699  */
1700 static int
1701 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1702     struct ipsec_variant *nullvar)
1703 {
1704 	struct ifnet *ifp = &sc->ipsec_if;
1705 	struct ipsec_variant *ovar = sc->ipsec_var;
1706 	int error;
1707 
1708 	KASSERT(mutex_owned(&sc->ipsec_lock));
1709 
1710 	/*
1711 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1712 	 * we stop packet processing while replacing SPs, that is, we set
1713 	 * "null" config variant to sc->ipsec_var.
1714 	 */
1715 	sc->ipsec_var = nullvar;
1716 	pserialize_perform(ipsec_psz);
1717 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1718 
1719 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1720 	if (!error)
1721 		sc->ipsec_var = nvar;
1722 	else {
1723 		sc->ipsec_var = ovar; /* rollback */
1724 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1725 	}
1726 
1727 	pserialize_perform(ipsec_psz);
1728 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1729 
1730 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1731 		ifp->if_flags |= IFF_RUNNING;
1732 	else
1733 		ifp->if_flags &= ~IFF_RUNNING;
1734 
1735 	return error;
1736 }
1737