xref: /netbsd-src/sys/net/if_ipsec.c (revision c9496f6b604074a9451a67df576a5b423068e71e)
1 /*	$NetBSD: if_ipsec.c,v 1.3 2018/01/31 07:33:18 mrg Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.3 2018/01/31 07:33:18 mrg Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/time.h>
45 #include <sys/syslog.h>
46 #include <sys/cpu.h>
47 #include <sys/kmem.h>
48 #include <sys/mutex.h>
49 #include <sys/pserialize.h>
50 #include <sys/psref.h>
51 
52 #include <net/if.h>
53 #include <net/if_types.h>
54 #include <net/route.h>
55 #include <net/bpf.h>
56 #include <net/pfkeyv2.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/ip.h>
61 #ifdef	INET
62 #include <netinet/in_var.h>
63 #endif	/* INET */
64 
65 #ifdef INET6
66 #include <netinet6/in6_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet6/ip6_var.h>
69 #endif /* INET6 */
70 
71 #include <netinet/ip_encap.h>
72 
73 #include <net/if_ipsec.h>
74 
75 #include <net/raw_cb.h>
76 #include <net/pfkeyv2.h>
77 
78 #include <netipsec/key.h>
79 #include <netipsec/ipsec.h>
80 #include <netipsec/ipsecif.h>
81 
82 static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *);
83 static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *);
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t);
113 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
114 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
115 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
116 /* SPD */
117 static int if_ipsec_share_sp(struct ipsec_variant *);
118 static int if_ipsec_unshare_sp(struct ipsec_variant *);
119 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
120     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
121 static inline int if_ipsec_del_sp0(struct secpolicy *);
122 static int if_ipsec_add_sp(struct ipsec_variant *,
123     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
124 static void if_ipsec_del_sp(struct ipsec_variant *);
125 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
126     struct ipsec_variant *);
127 
128 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
129     in_port_t);
130 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
131 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
132 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
133 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
134 
135 /*
136  * ipsec global variable definitions
137  */
138 
139 /* This list is used in ioctl context only. */
140 LIST_HEAD(ipsec_sclist, ipsec_softc);
141 static struct {
142 	struct ipsec_sclist list;
143 	kmutex_t lock;
144 } ipsec_softcs __cacheline_aligned;
145 
146 pserialize_t ipsec_psz __read_mostly;
147 struct psref_class *iv_psref_class __read_mostly;
148 
149 struct if_clone ipsec_cloner =
150     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
151 static int max_ipsec_nesting = MAX_IPSEC_NEST;
152 
153 /* ARGSUSED */
154 void
155 ipsecifattach(int count)
156 {
157 
158 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
159 	LIST_INIT(&ipsec_softcs.list);
160 
161 	ipsec_psz = pserialize_create();
162 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
163 
164 	if_clone_attach(&ipsec_cloner);
165 }
166 
167 static int
168 if_ipsec_clone_create(struct if_clone *ifc, int unit)
169 {
170 	struct ipsec_softc *sc;
171 	struct ipsec_variant *var;
172 
173 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
174 
175 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
176 
177 	if_ipsec_attach0(sc);
178 
179 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
180 	var->iv_softc = sc;
181 	psref_target_init(&var->iv_psref, iv_psref_class);
182 
183 	sc->ipsec_var = var;
184 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
185 	sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro));
186 	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL);
187 
188 	mutex_enter(&ipsec_softcs.lock);
189 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
190 	mutex_exit(&ipsec_softcs.lock);
191 	return 0;
192 }
193 
194 static void
195 if_ipsec_attach0(struct ipsec_softc *sc)
196 {
197 
198 	sc->ipsec_if.if_addrlen = 0;
199 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
200 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
201 	/* set ipsec(4) specific default flags. */
202 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
203 	sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
204 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
205 	sc->ipsec_if.if_output = if_ipsec_output;
206 	sc->ipsec_if.if_type   = IFT_IPSEC;
207 	sc->ipsec_if.if_dlt    = DLT_NULL;
208 	sc->ipsec_if.if_softc  = sc;
209 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
210 	if_initialize(&sc->ipsec_if);
211 	if_alloc_sadl(&sc->ipsec_if);
212 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
213 	if_register(&sc->ipsec_if);
214 }
215 
216 static void
217 if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
218 {
219 	struct ipsec_ro *iro = p;
220 
221 	mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE);
222 }
223 
224 static void
225 if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
226 {
227 	struct ipsec_ro *iro = p;
228 
229 	rtcache_free(&iro->ir_ro);
230 
231 	mutex_destroy(&iro->ir_lock);
232 }
233 
234 static int
235 if_ipsec_clone_destroy(struct ifnet *ifp)
236 {
237 	struct ipsec_softc *sc = ifp->if_softc;
238 	struct ipsec_variant *var;
239 	int bound;
240 
241 	mutex_enter(&ipsec_softcs.lock);
242 	LIST_REMOVE(sc, ipsec_list);
243 	mutex_exit(&ipsec_softcs.lock);
244 
245 	bound = curlwp_bind();
246 	if_ipsec_delete_tunnel(&sc->ipsec_if);
247 	curlwp_bindx(bound);
248 
249 	bpf_detach(ifp);
250 	if_detach(ifp);
251 
252 	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL);
253 	percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro));
254 
255 	mutex_destroy(&sc->ipsec_lock);
256 
257 	var = sc->ipsec_var;
258 	kmem_free(var, sizeof(*var));
259 	kmem_free(sc, sizeof(*sc));
260 
261 	return 0;
262 }
263 
264 static inline bool
265 if_ipsec_nat_t(struct ipsec_softc *sc)
266 {
267 
268 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
269 }
270 
271 static inline bool
272 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
273 {
274 
275 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
276 }
277 
278 int
279 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
280 {
281 	struct ip ip;
282 	struct ipsec_softc *sc;
283 	struct ipsec_variant *var = NULL;
284 	struct psref psref;
285 	int ret = 0;
286 
287 	sc = arg;
288 	KASSERT(sc != NULL);
289 
290 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
291 		goto out;
292 
293 	var = if_ipsec_getref_variant(sc, &psref);
294 	if (if_ipsec_variant_is_unconfigured(var))
295 		goto out;
296 
297 	switch (proto) {
298 	case IPPROTO_IPV4:
299 	case IPPROTO_IPV6:
300 		break;
301 	default:
302 		goto out;
303 	}
304 
305 	if (m->m_pkthdr.len < sizeof(ip))
306 		goto out;
307 
308 	m_copydata(m, 0, sizeof(ip), &ip);
309 	switch (ip.ip_v) {
310 #ifdef INET
311 	case IPVERSION:
312 		if (var->iv_psrc->sa_family != AF_INET ||
313 		    var->iv_pdst->sa_family != AF_INET)
314 			goto out;
315 		ret = ipsecif4_encap_func(m, &ip, var);
316 		break;
317 #endif
318 	default:
319 		goto out;
320 	}
321 
322 out:
323 	if (var != NULL)
324 		if_ipsec_putref_variant(var, &psref);
325 	return ret;
326 }
327 
328 /*
329  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
330  * We'll prevent this by introducing upper limit.
331  */
332 static int
333 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
334 {
335 
336 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
337 }
338 
339 int
340 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
341     const struct rtentry *rt)
342 {
343 	struct ipsec_softc *sc = ifp->if_softc;
344 	struct ipsec_variant *var;
345 	struct psref psref;
346 	int error;
347 	int bound;
348 
349 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
350 
351 	error = if_ipsec_check_nesting(ifp, m);
352 	if (error) {
353 		m_freem(m);
354 		goto noref_end;
355 	}
356 
357 	if ((ifp->if_flags & IFF_UP) == 0) {
358 		m_freem(m);
359 		error = ENETDOWN;
360 		goto noref_end;
361 	}
362 
363 
364 	bound = curlwp_bind();
365 	var = if_ipsec_getref_variant(sc, &psref);
366 	if (if_ipsec_variant_is_unconfigured(var)) {
367 		m_freem(m);
368 		error = ENETDOWN;
369 		goto end;
370 	}
371 
372 	m->m_flags &= ~(M_BCAST|M_MCAST);
373 
374 	/* use DLT_NULL encapsulation here to pass inner af type */
375 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
376 	if (!m) {
377 		error = ENOBUFS;
378 		goto end;
379 	}
380 	*mtod(m, int *) = dst->sa_family;
381 
382 #if INET6
383 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
384 	if (dst->sa_family == AF_INET6 &&
385 	    !if_ipsec_fwd_ipv6(sc)) {
386 		/*
387 		 * IPv6 packet is not allowed to forward,that is not error.
388 		 */
389 		error = 0;
390 		IF_DROP(&ifp->if_snd);
391 		m_freem(m);
392 		goto end;
393 	}
394 #endif
395 
396 	error = if_ipsec_out_direct(var, m, dst->sa_family);
397 
398 end:
399 	if_ipsec_putref_variant(var, &psref);
400 	curlwp_bindx(bound);
401 noref_end:
402 	if (error)
403 		ifp->if_oerrors++;
404 
405 	return error;
406 }
407 
408 static inline int
409 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
410 {
411 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
412 	int error;
413 	int len;
414 
415 	KASSERT(if_ipsec_heldref_variant(var));
416 	KASSERT(var->iv_output != NULL);
417 
418 	len = m->m_pkthdr.len;
419 
420 	/* input DLT_NULL frame to BPF */
421 	bpf_mtap(ifp, m);
422 
423 	/* grab and chop off inner af type */
424 	/* XXX need pullup? */
425 	m_adj(m, sizeof(int));
426 
427 	error = var->iv_output(var, family, m);
428 	if (error)
429 		return error;
430 
431 	ifp->if_opackets++;
432 	ifp->if_obytes += len;
433 
434 	return 0;
435 }
436 
437 void
438 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
439 {
440 
441 	KASSERT(ifp != NULL);
442 
443 	m_set_rcvif(m, ifp);
444 
445 	bpf_mtap_af(ifp, af, m);
446 
447 	if_ipsec_in_enqueue(m, af, ifp);
448 
449 	return;
450 }
451 
452 static inline void
453 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
454 {
455 	pktqueue_t *pktq;
456 	int pktlen;
457 
458 	/*
459 	 * Put the packet to the network layer input queue according to the
460 	 * specified address family.
461 	 */
462 	switch (af) {
463 #ifdef INET
464 	case AF_INET:
465 		pktq = ip_pktq;
466 		break;
467 #endif
468 #ifdef INET6
469 	case AF_INET6:
470 		pktq = ip6_pktq;
471 		break;
472 #endif
473 	default:
474 		ifp->if_ierrors++;
475 		m_freem(m);
476 		return;
477 	}
478 
479 #if 1
480 	const u_int h = curcpu()->ci_index;
481 #else
482 	const uint32_t h = pktq_rps_hash(m);
483 #endif
484 	pktlen = m->m_pkthdr.len;
485 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
486 		ifp->if_ibytes += pktlen;
487 		ifp->if_ipackets++;
488 	} else {
489 		m_freem(m);
490 	}
491 
492 	return;
493 }
494 
495 static inline int
496 if_ipsec_check_salen(struct sockaddr *addr)
497 {
498 
499 	switch (addr->sa_family) {
500 #ifdef INET
501 	case AF_INET:
502 		if (addr->sa_len != sizeof(struct sockaddr_in))
503 			return EINVAL;
504 		break;
505 #endif /* INET */
506 #ifdef INET6
507 	case AF_INET6:
508 		if (addr->sa_len != sizeof(struct sockaddr_in6))
509 			return EINVAL;
510 		break;
511 #endif /* INET6 */
512 	default:
513 		return EAFNOSUPPORT;
514 	}
515 
516 	return 0;
517 }
518 
519 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
520 int
521 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
522 {
523 	struct ipsec_softc *sc  = ifp->if_softc;
524 	struct ipsec_variant *var = NULL;
525 	struct ifreq     *ifr = (struct ifreq*)data;
526 	struct ifaddr    *ifa = (struct ifaddr*)data;
527 	int error = 0, size;
528 	struct sockaddr *dst, *src;
529 	u_long mtu;
530 	short oflags = ifp->if_flags;
531 	int bound;
532 	struct psref psref;
533 
534 	switch (cmd) {
535 	case SIOCINITIFADDR:
536 		ifp->if_flags |= IFF_UP;
537 		ifa->ifa_rtrequest = p2p_rtrequest;
538 		break;
539 
540 	case SIOCSIFDSTADDR:
541 		break;
542 
543 	case SIOCADDMULTI:
544 	case SIOCDELMULTI:
545 		switch (ifr->ifr_addr.sa_family) {
546 #ifdef INET
547 		case AF_INET:	/* IP supports Multicast */
548 			break;
549 #endif /* INET */
550 #ifdef INET6
551 		case AF_INET6:	/* IP6 supports Multicast */
552 			break;
553 #endif /* INET6 */
554 		default:  /* Other protocols doesn't support Multicast */
555 			error = EAFNOSUPPORT;
556 			break;
557 		}
558 		break;
559 
560 	case SIOCSIFMTU:
561 		mtu = ifr->ifr_mtu;
562 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
563 			return EINVAL;
564 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
565 			error = 0;
566 		break;
567 
568 #ifdef INET
569 	case SIOCSIFPHYADDR:
570 #endif
571 #ifdef INET6
572 	case SIOCSIFPHYADDR_IN6:
573 #endif /* INET6 */
574 	case SIOCSLIFPHYADDR:
575 		switch (cmd) {
576 #ifdef INET
577 		case SIOCSIFPHYADDR:
578 			src = (struct sockaddr *)
579 				&(((struct in_aliasreq *)data)->ifra_addr);
580 			dst = (struct sockaddr *)
581 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
582 			break;
583 #endif /* INET */
584 #ifdef INET6
585 		case SIOCSIFPHYADDR_IN6:
586 			src = (struct sockaddr *)
587 				&(((struct in6_aliasreq *)data)->ifra_addr);
588 			dst = (struct sockaddr *)
589 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
590 			break;
591 #endif /* INET6 */
592 		case SIOCSLIFPHYADDR:
593 			src = (struct sockaddr *)
594 				&(((struct if_laddrreq *)data)->addr);
595 			dst = (struct sockaddr *)
596 				&(((struct if_laddrreq *)data)->dstaddr);
597 			break;
598 		default:
599 			return EINVAL;
600 		}
601 
602 		/* sa_family must be equal */
603 		if (src->sa_family != dst->sa_family)
604 			return EINVAL;
605 
606 		error = if_ipsec_check_salen(src);
607 		if (error)
608 			return error;
609 		error = if_ipsec_check_salen(dst);
610 		if (error)
611 			return error;
612 
613 		/* check sa_family looks sane for the cmd */
614 		switch (cmd) {
615 #ifdef INET
616 		case SIOCSIFPHYADDR:
617 			if (src->sa_family == AF_INET)
618 				break;
619 			return EAFNOSUPPORT;
620 #endif /* INET */
621 #ifdef INET6
622 		case SIOCSIFPHYADDR_IN6:
623 			if (src->sa_family == AF_INET6)
624 				break;
625 			return EAFNOSUPPORT;
626 #endif /* INET6 */
627 		case SIOCSLIFPHYADDR:
628 			/* checks done in the above */
629 			break;
630 		}
631 		/*
632 		 * calls if_ipsec_getref_variant() for other softcs to check
633 		 * address pair duplicattion
634 		 */
635 		bound = curlwp_bind();
636 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
637 		if (error)
638 			goto bad;
639 		break;
640 
641 	case SIOCDIFPHYADDR:
642 		bound = curlwp_bind();
643 		if_ipsec_delete_tunnel(&sc->ipsec_if);
644 		curlwp_bindx(bound);
645 		break;
646 
647 	case SIOCGIFPSRCADDR:
648 #ifdef INET6
649 	case SIOCGIFPSRCADDR_IN6:
650 #endif /* INET6 */
651 		bound = curlwp_bind();
652 		var = if_ipsec_getref_variant(sc, &psref);
653 		if (var->iv_psrc == NULL) {
654 			error = EADDRNOTAVAIL;
655 			goto bad;
656 		}
657 		src = var->iv_psrc;
658 		switch (cmd) {
659 #ifdef INET
660 		case SIOCGIFPSRCADDR:
661 			dst = &ifr->ifr_addr;
662 			size = sizeof(ifr->ifr_addr);
663 			break;
664 #endif /* INET */
665 #ifdef INET6
666 		case SIOCGIFPSRCADDR_IN6:
667 			dst = (struct sockaddr *)
668 				&(((struct in6_ifreq *)data)->ifr_addr);
669 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
670 			break;
671 #endif /* INET6 */
672 		default:
673 			error = EADDRNOTAVAIL;
674 			goto bad;
675 		}
676 		if (src->sa_len > size) {
677 			error = EINVAL;
678 			goto bad;
679 		}
680 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
681 		if (error)
682 			goto bad;
683 		if_ipsec_putref_variant(var, &psref);
684 		curlwp_bindx(bound);
685 		break;
686 
687 	case SIOCGIFPDSTADDR:
688 #ifdef INET6
689 	case SIOCGIFPDSTADDR_IN6:
690 #endif /* INET6 */
691 		bound = curlwp_bind();
692 		var = if_ipsec_getref_variant(sc, &psref);
693 		if (var->iv_pdst == NULL) {
694 			error = EADDRNOTAVAIL;
695 			goto bad;
696 		}
697 		src = var->iv_pdst;
698 		switch (cmd) {
699 #ifdef INET
700 		case SIOCGIFPDSTADDR:
701 			dst = &ifr->ifr_addr;
702 			size = sizeof(ifr->ifr_addr);
703 			break;
704 #endif /* INET */
705 #ifdef INET6
706 		case SIOCGIFPDSTADDR_IN6:
707 			dst = (struct sockaddr *)
708 				&(((struct in6_ifreq *)data)->ifr_addr);
709 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
710 			break;
711 #endif /* INET6 */
712 		default:
713 			error = EADDRNOTAVAIL;
714 			goto bad;
715 		}
716 		if (src->sa_len > size) {
717 			error = EINVAL;
718 			goto bad;
719 		}
720 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
721 		if (error)
722 			goto bad;
723 		if_ipsec_putref_variant(var, &psref);
724 		curlwp_bindx(bound);
725 		break;
726 
727 	case SIOCGLIFPHYADDR:
728 		bound = curlwp_bind();
729 		var = if_ipsec_getref_variant(sc, &psref);
730 		if (if_ipsec_variant_is_unconfigured(var)) {
731 			error = EADDRNOTAVAIL;
732 			goto bad;
733 		}
734 
735 		/* copy src */
736 		src = var->iv_psrc;
737 		dst = (struct sockaddr *)
738 			&(((struct if_laddrreq *)data)->addr);
739 		size = sizeof(((struct if_laddrreq *)data)->addr);
740 		if (src->sa_len > size) {
741 			error = EINVAL;
742 			goto bad;
743 		}
744 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
745 		if (error)
746 			goto bad;
747 
748 		/* copy dst */
749 		src = var->iv_pdst;
750 		dst = (struct sockaddr *)
751 			&(((struct if_laddrreq *)data)->dstaddr);
752 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
753 		if (src->sa_len > size) {
754 			error = EINVAL;
755 			goto bad;
756 		}
757 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
758 		if (error)
759 			goto bad;
760 		if_ipsec_putref_variant(var, &psref);
761 		curlwp_bindx(bound);
762 		break;
763 
764 	default:
765 		error = ifioctl_common(ifp, cmd, data);
766 		if (!error) {
767 			bound = curlwp_bind();
768 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
769 			if (error)
770 				goto bad;
771 		}
772 		break;
773 	}
774 	return error;
775 
776 bad:
777 	if (var != NULL)
778 		if_ipsec_putref_variant(var, &psref);
779 	curlwp_bindx(bound);
780 
781 	return error;
782 }
783 
784 struct encap_funcs {
785 #ifdef INET
786 	int (*ef_inet)(struct ipsec_variant *);
787 #endif
788 #ifdef INET6
789 	int (*ef_inet6)(struct ipsec_variant *);
790 #endif
791 };
792 
793 static struct encap_funcs ipsec_encap_attach = {
794 #ifdef INET
795 	.ef_inet = ipsecif4_attach,
796 #endif
797 #ifdef INET6
798 	.ef_inet6 = &ipsecif6_attach,
799 #endif
800 };
801 
802 static struct encap_funcs ipsec_encap_detach = {
803 #ifdef INET
804 	.ef_inet = ipsecif4_detach,
805 #endif
806 #ifdef INET6
807 	.ef_inet6 = &ipsecif6_detach,
808 #endif
809 };
810 
811 static int
812 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
813 {
814 	int error;
815 
816 	KASSERT(var != NULL);
817 	KASSERT(if_ipsec_variant_is_configured(var));
818 
819 	switch (var->iv_psrc->sa_family) {
820 #ifdef INET
821 	case AF_INET:
822 		error = (funcs->ef_inet)(var);
823 		break;
824 #endif /* INET */
825 #ifdef INET6
826 	case AF_INET6:
827 		error = (funcs->ef_inet6)(var);
828 		break;
829 #endif /* INET6 */
830 	default:
831 		error = EINVAL;
832 		break;
833 	}
834 
835 	return error;
836 }
837 
838 static int
839 if_ipsec_encap_attach(struct ipsec_variant *var)
840 {
841 
842 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
843 }
844 
845 static int
846 if_ipsec_encap_detach(struct ipsec_variant *var)
847 {
848 
849 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
850 }
851 
852 /*
853  * Validate and set ipsec(4) I/F configurations.
854  *     (1) validate
855  *         (1-1) Check the argument src and dst address pair will change
856  *               configuration from current src and dst address pair.
857  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
858  *               with argument src and dst address pair, except for NAT-T shared
859  *               tunnels.
860  *     (2) set
861  *         (2-1) Create variant for new configuration.
862  *         (2-2) Create temporary "null" variant used to avoid to access
863  *               dangling variant while SPs are deleted and added.
864  *         (2-3) Swap variant include its SPs.
865  *         (2-4) Cleanup last configurations.
866  */
867 static int
868 if_ipsec_set_tunnel(struct ifnet *ifp,
869     struct sockaddr *src, struct sockaddr *dst)
870 {
871 	struct ipsec_softc *sc = ifp->if_softc;
872 	struct ipsec_softc *sc2;
873 	struct ipsec_variant *ovar, *nvar, *nullvar;
874 	struct sockaddr *osrc, *odst;
875 	struct sockaddr *nsrc, *ndst;
876 	in_port_t nsport = 0, ndport = 0;
877 	int error;
878 
879 	error = encap_lock_enter();
880 	if (error)
881 		return error;
882 
883 	nsrc = sockaddr_dup(src, M_WAITOK);
884 	ndst = sockaddr_dup(dst, M_WAITOK);
885 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
886 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
887 
888 	mutex_enter(&sc->ipsec_lock);
889 
890 	ovar = sc->ipsec_var;
891 
892 	switch(nsrc->sa_family) {
893 #ifdef INET
894 	case AF_INET:
895 		nsport = ntohs(satosin(src)->sin_port);
896 		/*
897 		 * avoid confuse SP when NAT-T disabled,
898 		 * e.g.
899 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
900 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
901 		 */
902 		satosin(nsrc)->sin_port = 0;
903 		ndport = ntohs(satosin(dst)->sin_port);
904 		satosin(ndst)->sin_port = 0;
905 		break;
906 #endif /* INET */
907 #ifdef INET6
908 	case AF_INET6:
909 		nsport = ntohs(satosin6(src)->sin6_port);
910 		satosin6(nsrc)->sin6_port = 0;
911 		ndport = ntohs(satosin6(dst)->sin6_port);
912 		satosin6(ndst)->sin6_port = 0;
913 		break;
914 #endif /* INET6 */
915 	default:
916 		log(LOG_DEBUG,
917 		    "%s: Invalid address family: %d.\n",
918 		    __func__, src->sa_family);
919 		error = EINVAL;
920 		goto out;
921 	}
922 
923 	/*
924 	 * (1-1) Check the argument src and dst address pair will change
925 	 *       configuration from current src and dst address pair.
926 	 */
927 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
928 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
929 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
930 		/* address and port pair not changed. */
931 		error = 0;
932 		goto out;
933 	}
934 
935 	/*
936 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
937 	 *       with argument src and dst address pair, except for NAT-T shared
938 	 *       tunnels.
939 	 */
940 	mutex_enter(&ipsec_softcs.lock);
941 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
942 		struct ipsec_variant *var2;
943 		struct psref psref;
944 
945 		if (sc2 == sc)
946 			continue;
947 		var2 = if_ipsec_getref_variant(sc2, &psref);
948 		if (if_ipsec_variant_is_unconfigured(var2)) {
949 			if_ipsec_putref_variant(var2, &psref);
950 			continue;
951 		}
952 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
953 			if_ipsec_putref_variant(var2, &psref);
954 			continue; /* NAT-T shared tunnel */
955 		}
956 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
957 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
958 			if_ipsec_putref_variant(var2, &psref);
959 			mutex_exit(&ipsec_softcs.lock);
960 			error = EADDRNOTAVAIL;
961 			goto out;
962 		}
963 
964 		if_ipsec_putref_variant(var2, &psref);
965 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
966 	}
967 	mutex_exit(&ipsec_softcs.lock);
968 
969 
970 	osrc = ovar->iv_psrc;
971 	odst = ovar->iv_pdst;
972 
973 	/*
974 	 * (2-1) Create ipsec_variant for new configuration.
975 	 */
976 	if_ipsec_copy_variant(nvar, ovar);
977 	nvar->iv_psrc = nsrc;
978 	nvar->iv_pdst = ndst;
979 	nvar->iv_sport = nsport;
980 	nvar->iv_dport = ndport;
981 	nvar->iv_encap_cookie4 = NULL;
982 	nvar->iv_encap_cookie6 = NULL;
983 	psref_target_init(&nvar->iv_psref, iv_psref_class);
984 	error = if_ipsec_encap_attach(nvar);
985 	if (error)
986 		goto out;
987 
988 	/*
989 	 * (2-2) Create temporary "null" variant.
990 	 */
991 	if_ipsec_copy_variant(nullvar, ovar);
992 	if_ipsec_clear_config(nullvar);
993 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
994 	membar_producer();
995 	/*
996 	 * (2-3) Swap variant include its SPs.
997 	 */
998 	error = if_ipsec_update_variant(sc, nvar, nullvar);
999 	if (error) {
1000 		if_ipsec_encap_detach(nvar);
1001 		goto out;
1002 	}
1003 
1004 	mutex_exit(&sc->ipsec_lock);
1005 
1006 	/*
1007 	 * (2-4) Cleanup last configurations.
1008 	 */
1009 	if (if_ipsec_variant_is_configured(ovar))
1010 		if_ipsec_encap_detach(ovar);
1011 	encap_lock_exit();
1012 
1013 	if (osrc != NULL)
1014 		sockaddr_free(osrc);
1015 	if (odst != NULL)
1016 		sockaddr_free(odst);
1017 	kmem_free(ovar, sizeof(*ovar));
1018 	kmem_free(nullvar, sizeof(*nullvar));
1019 
1020 	return 0;
1021 
1022 out:
1023 	mutex_exit(&sc->ipsec_lock);
1024 	encap_lock_exit();
1025 
1026 	sockaddr_free(nsrc);
1027 	sockaddr_free(ndst);
1028 	kmem_free(nvar, sizeof(*nvar));
1029 	kmem_free(nullvar, sizeof(*nullvar));
1030 
1031 	return error;
1032 }
1033 
1034 /*
1035  * Validate and delete ipsec(4) I/F configurations.
1036  *     (1) validate
1037  *         (1-1) Check current src and dst address pair are null,
1038  *               which means the ipsec(4) I/F is already done deletetunnel.
1039  *     (2) delete
1040  *         (2-1) Create variant for deleted status.
1041  *         (2-2) Create temporary "null" variant used to avoid to access
1042  *               dangling variant while SPs are deleted and added.
1043  *               NOTE:
1044  *               The contents of temporary "null" variant equal to the variant
1045  *               of (2-1), however two psref_target_destroy() synchronization
1046  *               points are necessary to avoid to access dangling variant
1047  *               while SPs are deleted and added. To implement that simply,
1048  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1049  *               create extra "null" variant and use it temporarily.
1050  *         (2-3) Swap variant include its SPs.
1051  *         (2-4) Cleanup last configurations.
1052  */
1053 static void
1054 if_ipsec_delete_tunnel(struct ifnet *ifp)
1055 {
1056 	struct ipsec_softc *sc = ifp->if_softc;
1057 	struct ipsec_variant *ovar, *nvar, *nullvar;
1058 	struct sockaddr *osrc, *odst;
1059 	int error;
1060 
1061 	error = encap_lock_enter();
1062 	if (error)
1063 		return;
1064 
1065 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1066 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1067 
1068 	mutex_enter(&sc->ipsec_lock);
1069 
1070 	ovar = sc->ipsec_var;
1071 	osrc = ovar->iv_psrc;
1072 	odst = ovar->iv_pdst;
1073 	/*
1074 	 * (1-1) Check current src and dst address pair are null,
1075 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1076 	 */
1077 	if (osrc == NULL || odst == NULL) {
1078 		/* address pair not changed. */
1079 		mutex_exit(&sc->ipsec_lock);
1080 		encap_lock_exit();
1081 		kmem_free(nvar, sizeof(*nvar));
1082 		return;
1083 	}
1084 
1085 	/*
1086 	 * (2-1) Create variant for deleted status.
1087 	 */
1088 	if_ipsec_copy_variant(nvar, ovar);
1089 	if_ipsec_clear_config(nvar);
1090 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1091 
1092 	/*
1093 	 * (2-2) Create temporary "null" variant used to avoid to access
1094 	 *       dangling variant while SPs are deleted and added.
1095 	 */
1096 	if_ipsec_copy_variant(nullvar, ovar);
1097 	if_ipsec_clear_config(nullvar);
1098 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1099 	membar_producer();
1100 	/*
1101 	 * (2-3) Swap variant include its SPs.
1102 	 */
1103 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1104 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1105 
1106 	mutex_exit(&sc->ipsec_lock);
1107 
1108 	/*
1109 	 * (2-4) Cleanup last configurations.
1110 	 */
1111 	if (if_ipsec_variant_is_configured(ovar))
1112 		if_ipsec_encap_detach(ovar);
1113 	encap_lock_exit();
1114 
1115 	sockaddr_free(osrc);
1116 	sockaddr_free(odst);
1117 	kmem_free(ovar, sizeof(*ovar));
1118 	kmem_free(nullvar, sizeof(*nullvar));
1119 }
1120 
1121 /*
1122  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1123  *     (1) check
1124  *         (1-1) Check flags are changed.
1125  *         (1-2) Check current src and dst address pair. If they are null,
1126  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1127  *               not needed to update.
1128  *     (2) update
1129  *         (2-1) Create variant for new SPs.
1130  *         (2-2) Create temporary "null" variant used to avoid to access
1131  *               dangling variant while SPs are deleted and added.
1132  *               NOTE:
1133  *               There is the same problem as if_ipsec_delete_tunnel().
1134  *         (2-3) Swap variant include its SPs.
1135  *         (2-4) Cleanup unused configurations.
1136  *               NOTE: use the same encap_cookies.
1137  */
1138 static int
1139 if_ipsec_ensure_flags(struct ifnet *ifp, short oflags)
1140 {
1141 	struct ipsec_softc *sc = ifp->if_softc;
1142 	struct ipsec_variant *ovar, *nvar, *nullvar;
1143 	int error;
1144 
1145 	/*
1146 	 * (1) Check flags are changed.
1147 	 */
1148 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1149 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1150 		return 0; /* flags not changed. */
1151 
1152 	error = encap_lock_enter();
1153 	if (error)
1154 		return error;
1155 
1156 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1157 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1158 
1159 	mutex_enter(&sc->ipsec_lock);
1160 
1161 	ovar = sc->ipsec_var;
1162 	/*
1163 	 * (1-2) Check current src and dst address pair.
1164 	 */
1165 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1166 		/* nothing to do */
1167 		mutex_exit(&sc->ipsec_lock);
1168 		return 0;
1169 	}
1170 
1171 	/*
1172 	 * (2-1) Create variant for new SPs.
1173 	 */
1174 	if_ipsec_copy_variant(nvar, ovar);
1175 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1176 	/*
1177 	 * (2-2) Create temporary "null" variant used to avoid to access
1178 	 *       dangling variant while SPs are deleted and added.
1179 	 */
1180 	if_ipsec_copy_variant(nullvar, ovar);
1181 	if_ipsec_clear_config(nullvar);
1182 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1183 	membar_producer();
1184 	/*
1185 	 * (2-3) Swap variant include its SPs.
1186 	 */
1187 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1188 
1189 	mutex_exit(&sc->ipsec_lock);
1190 	encap_lock_exit();
1191 
1192 	/*
1193 	 * (2-4) Cleanup unused configurations.
1194 	 */
1195 	if (!error)
1196 		kmem_free(ovar, sizeof(*ovar));
1197 	else
1198 		kmem_free(nvar, sizeof(*ovar));
1199 	kmem_free(nullvar, sizeof(*nullvar));
1200 
1201 	return error;
1202 }
1203 
1204 /*
1205  * SPD management
1206  */
1207 
1208 /*
1209  * Share SP set with other NAT-T ipsec(4) I/F(s).
1210  *     Return 1, when "var" shares SP set.
1211  *     Return 0, when "var" cannot share SP set.
1212  *
1213  * NOTE:
1214  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1215  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1216  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1217  * set_tunnel causes race.
1218  * Currently, (fortunately) encap_lock works as this global lock.
1219  */
1220 static int
1221 if_ipsec_share_sp(struct ipsec_variant *var)
1222 {
1223 	struct ipsec_softc *sc = var->iv_softc;
1224 	struct ipsec_softc *sc2;
1225 	struct ipsec_variant *var2;
1226 	struct psref psref;
1227 
1228 	KASSERT(encap_lock_held());
1229 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1230 
1231 	mutex_enter(&ipsec_softcs.lock);
1232 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1233 		if (sc2 == sc)
1234 			continue;
1235 		var2 = if_ipsec_getref_variant(sc2, &psref);
1236 		if (if_ipsec_variant_is_unconfigured(var2)) {
1237 			if_ipsec_putref_variant(var2, &psref);
1238 			continue;
1239 		}
1240 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1241 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1242 			if_ipsec_putref_variant(var2, &psref);
1243 			continue;
1244 		}
1245 
1246 		break;
1247 	}
1248 	mutex_exit(&ipsec_softcs.lock);
1249 	if (sc2 == NULL)
1250 		return 0; /* not shared */
1251 
1252 	IV_SP_IN(var) = IV_SP_IN(var2);
1253 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1254 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1255 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1256 
1257 	if_ipsec_putref_variant(var2, &psref);
1258 	return 1; /* shared */
1259 }
1260 
1261 /*
1262  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1263  *     Return 1, when "var" shared SP set, and then unshare them.
1264  *     Return 0, when "var" did not share SP set.
1265  *
1266  * NOTE:
1267  * See if_ipsec_share_sp()'s note.
1268  */
1269 static int
1270 if_ipsec_unshare_sp(struct ipsec_variant *var)
1271 {
1272 	struct ipsec_softc *sc = var->iv_softc;
1273 	struct ipsec_softc *sc2;
1274 	struct ipsec_variant *var2;
1275 	struct psref psref;
1276 
1277 	KASSERT(encap_lock_held());
1278 
1279 	if (!var->iv_pdst || !var->iv_psrc)
1280 		return 0;
1281 
1282 	mutex_enter(&ipsec_softcs.lock);
1283 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1284 		if (sc2 == sc)
1285 			continue;
1286 		var2 = if_ipsec_getref_variant(sc2, &psref);
1287 		if (!var2->iv_pdst || !var2->iv_psrc) {
1288 			if_ipsec_putref_variant(var2, &psref);
1289 			continue;
1290 		}
1291 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1292 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1293 			if_ipsec_putref_variant(var2, &psref);
1294 			continue;
1295 		}
1296 
1297 		break;
1298 	}
1299 	mutex_exit(&ipsec_softcs.lock);
1300 	if (sc2 == NULL)
1301 		return 0; /* not shared */
1302 
1303 	IV_SP_IN(var) = NULL;
1304 	IV_SP_IN6(var) = NULL;
1305 	IV_SP_OUT(var) = NULL;
1306 	IV_SP_OUT6(var) = NULL;
1307 	if_ipsec_putref_variant(var2, &psref);
1308 	return 1; /* shared */
1309 }
1310 
1311 static inline void
1312 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1313 {
1314 	struct mbuf *m;
1315 
1316 	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1317 	m->m_len = PFKEY_ALIGN8(len);
1318 	m_copyback(m, 0, len, data);
1319 	m_cat(m0, m);
1320 }
1321 
1322 static inline void
1323 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1324 {
1325 	struct mbuf *m;
1326 
1327 	if (len == 0)
1328 		return;
1329 
1330 	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1331 	m->m_len = len;
1332 	m_cat(m0, m);
1333 }
1334 
1335 static inline size_t
1336 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1337     int proto, uint16_t exttype)
1338 {
1339 	size_t size;
1340 
1341 	KASSERT(saaddr != NULL);
1342 	KASSERT(addr != NULL);
1343 
1344 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1345 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1346 	saaddr->sadb_address_exttype = exttype;
1347 	saaddr->sadb_address_proto = proto;
1348 	switch (addr->sa_family) {
1349 #ifdef INET
1350 	case AF_INET:
1351 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1352 		break;
1353 #endif /* INET */
1354 #ifdef INET6
1355 	case AF_INET6:
1356 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1357 		break;
1358 #endif /* INET6 */
1359 	default:
1360 		log(LOG_DEBUG,
1361 		    "%s: Invalid address family: %d.\n",
1362 		    __func__, addr->sa_family);
1363 		break;
1364 	}
1365 	saaddr->sadb_address_reserved = 0;
1366 
1367 	return size;
1368 }
1369 
1370 static inline size_t
1371 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1372     int proto)
1373 {
1374 
1375 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1376 	    SADB_EXT_ADDRESS_SRC);
1377 }
1378 
1379 static inline size_t
1380 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1381     int proto)
1382 {
1383 
1384 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1385 	    SADB_EXT_ADDRESS_DST);
1386 }
1387 
1388 static inline size_t
1389 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1390     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1391     uint8_t level)
1392 {
1393 	size_t size;
1394 
1395 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1396 
1397 	size = sizeof(*xpl);
1398 	if (policy == IPSEC_POLICY_IPSEC) {
1399 		size += PFKEY_ALIGN8(sizeof(*xisr));
1400 	}
1401 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1402 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1403 	xpl->sadb_x_policy_type = policy;
1404 	xpl->sadb_x_policy_dir = dir;
1405 	xpl->sadb_x_policy_reserved = 0;
1406 	xpl->sadb_x_policy_id = id;
1407 	xpl->sadb_x_policy_reserved2 = 0;
1408 
1409 	if (policy == IPSEC_POLICY_IPSEC) {
1410 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1411 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1412 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1413 		xisr->sadb_x_ipsecrequest_level = level;
1414 		xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1415 	}
1416 
1417 	return size;
1418 }
1419 
1420 static inline void
1421 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1422 {
1423 
1424 	KASSERT(msg != NULL);
1425 
1426 	msg->sadb_msg_version = PF_KEY_V2;
1427 	msg->sadb_msg_type = msgtype;
1428 	msg->sadb_msg_errno = 0;
1429 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1430 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1431 	msg->sadb_msg_reserved = 0;
1432 	msg->sadb_msg_seq = 0; /* XXXX */
1433 	msg->sadb_msg_pid = 0; /* XXXX */
1434 }
1435 
1436 static inline void
1437 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1438 {
1439 
1440 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1441 }
1442 
1443 static inline void
1444 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1445 {
1446 
1447 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1448 }
1449 
1450 static int
1451 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1452     in_port_t port)
1453 {
1454 	int error = 0;
1455 
1456 	sockaddr_copy(addrport, addr->sa_len, addr);
1457 
1458 	switch (addr->sa_family) {
1459 #ifdef INET
1460 	case AF_INET: {
1461 		struct sockaddr_in *sin = satosin(addrport);
1462 		sin->sin_port = htons(port);
1463 		break;
1464 	}
1465 #endif /* INET */
1466 #ifdef INET6
1467 	case AF_INET6: {
1468 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1469 		sin6->sin6_port = htons(port);
1470 		break;
1471 	}
1472 #endif /* INET6 */
1473 	default:
1474 		log(LOG_DEBUG,
1475 		    "%s: Invalid address family: %d.\n",
1476 		    __func__, addr->sa_family);
1477 		error = EINVAL;
1478 	}
1479 
1480 	return error;
1481 }
1482 
1483 static struct secpolicy *
1484 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1485     struct sockaddr *dst, in_port_t dport,
1486     int dir, int proto, int level, u_int policy)
1487 {
1488 	struct sadb_msg msg;
1489 	struct sadb_address xsrc, xdst;
1490 	struct sadb_x_policy xpl;
1491 	struct sadb_x_ipsecrequest xisr;
1492 	size_t size;
1493 	size_t padlen;
1494 	uint16_t ext_msg_len = 0;
1495 	struct mbuf *m;
1496 
1497 	memset(&msg, 0, sizeof(msg));
1498 	memset(&xsrc, 0, sizeof(xsrc));
1499 	memset(&xdst, 0, sizeof(xdst));
1500 	memset(&xpl, 0, sizeof(xpl));
1501 	memset(&xisr, 0, sizeof(xisr));
1502 
1503 	MGETHDR(m, M_WAITOK, MT_DATA);
1504 
1505 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1506 	ext_msg_len += PFKEY_UNIT64(size);
1507 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1508 	ext_msg_len += PFKEY_UNIT64(size);
1509 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level);
1510 	ext_msg_len += PFKEY_UNIT64(size);
1511 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1512 
1513 	/* build PF_KEY message */
1514 
1515 	m->m_len = sizeof(msg);
1516 	m_copyback(m, 0, sizeof(msg), &msg);
1517 
1518 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1519 	if (sport == 0) {
1520 		if_ipsec_add_mbuf(m, src, src->sa_len);
1521 	} else {
1522 		struct sockaddr addrport;
1523 
1524 		if_ipsec_set_addr_port(&addrport, src, sport);
1525 		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
1526 	}
1527 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1528 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1529 	if_ipsec_add_pad(m, padlen);
1530 
1531 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1532 	if (dport == 0) {
1533 		if_ipsec_add_mbuf(m, dst, dst->sa_len);
1534 	} else {
1535 		struct sockaddr addrport;
1536 
1537 		if_ipsec_set_addr_port(&addrport, dst, dport);
1538 		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
1539 	}
1540 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1541 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1542 	if_ipsec_add_pad(m, padlen);
1543 
1544 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1545 	if (policy == IPSEC_POLICY_IPSEC)
1546 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1547 
1548 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1549 	return key_kpi_spdadd(m);
1550 }
1551 
1552 static int
1553 if_ipsec_add_sp(struct ipsec_variant *var,
1554     struct sockaddr *src, in_port_t sport,
1555     struct sockaddr *dst, in_port_t dport)
1556 {
1557 	struct ipsec_softc *sc = var->iv_softc;
1558 	int level;
1559 	u_int v6policy;
1560 
1561 	/*
1562 	 * must delete sp before add it.
1563 	 */
1564 	KASSERT(IV_SP_IN(var) == NULL);
1565 	KASSERT(IV_SP_OUT(var) == NULL);
1566 	KASSERT(IV_SP_IN6(var) == NULL);
1567 	KASSERT(IV_SP_OUT6(var) == NULL);
1568 
1569 	/*
1570 	 * can be shared?
1571 	 */
1572 	if (if_ipsec_share_sp(var))
1573 		return 0;
1574 
1575 	if (if_ipsec_nat_t(sc))
1576 		level = IPSEC_LEVEL_REQUIRE;
1577 	else
1578 		level = IPSEC_LEVEL_UNIQUE;
1579 
1580 	if (if_ipsec_fwd_ipv6(sc))
1581 		v6policy = IPSEC_POLICY_IPSEC;
1582 	else
1583 		v6policy = IPSEC_POLICY_DISCARD;
1584 
1585 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1586 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1587 	if (IV_SP_IN(var) == NULL)
1588 		goto fail;
1589 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1590 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1591 	if (IV_SP_OUT(var) == NULL)
1592 		goto fail;
1593 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1594 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1595 	if (IV_SP_IN6(var) == NULL)
1596 		goto fail;
1597 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1598 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1599 	if (IV_SP_OUT6(var) == NULL)
1600 		goto fail;
1601 
1602 	return 0;
1603 
1604 fail:
1605 	if (IV_SP_IN6(var) != NULL) {
1606 		if_ipsec_del_sp0(IV_SP_IN6(var));
1607 		IV_SP_IN6(var) = NULL;
1608 	}
1609 	if (IV_SP_OUT(var) != NULL) {
1610 		if_ipsec_del_sp0(IV_SP_OUT(var));
1611 		IV_SP_OUT(var) = NULL;
1612 	}
1613 	if (IV_SP_IN(var) != NULL) {
1614 		if_ipsec_del_sp0(IV_SP_IN(var));
1615 		IV_SP_IN(var) = NULL;
1616 	}
1617 
1618 	return EEXIST;
1619 }
1620 
1621 static int
1622 if_ipsec_del_sp0(struct secpolicy *sp)
1623 {
1624 	struct sadb_msg msg;
1625 	struct sadb_x_policy xpl;
1626 	size_t size;
1627 	uint16_t ext_msg_len = 0;
1628 	int error;
1629 	struct mbuf *m;
1630 
1631 	if (sp == NULL)
1632 		return 0;
1633 
1634 	memset(&msg, 0, sizeof(msg));
1635 	memset(&xpl, 0, sizeof(xpl));
1636 
1637 	MGETHDR(m, M_WAITOK, MT_DATA);
1638 
1639 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0);
1640 	ext_msg_len += PFKEY_UNIT64(size);
1641 
1642 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1643 
1644 	m->m_len = sizeof(msg);
1645 	m_copyback(m, 0, sizeof(msg), &msg);
1646 
1647 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1648 
1649 	/*  unreference correspond to key_kpi_spdadd(). */
1650 	KEY_SP_UNREF(&sp);
1651 	error = key_kpi_spddelete2(m);
1652 	if (error != 0) {
1653 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1654 		    __func__, sp->id, error);
1655 	}
1656 	return error;
1657 }
1658 
1659 static void
1660 if_ipsec_del_sp(struct ipsec_variant *var)
1661 {
1662 
1663 	/* are the SPs shared? */
1664 	if (if_ipsec_unshare_sp(var))
1665 		return;
1666 
1667 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1668 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1669 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1670 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1671 	IV_SP_IN(var) = NULL;
1672 	IV_SP_IN6(var) = NULL;
1673 	IV_SP_OUT(var) = NULL;
1674 	IV_SP_OUT6(var) = NULL;
1675 }
1676 
1677 static int
1678 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1679     struct ipsec_variant *nvar)
1680 {
1681 	in_port_t src_port = 0;
1682 	in_port_t dst_port = 0;
1683 	struct sockaddr *src;
1684 	struct sockaddr *dst;
1685 	int error = 0;
1686 
1687 	KASSERT(mutex_owned(&sc->ipsec_lock));
1688 
1689 	if_ipsec_del_sp(ovar);
1690 
1691 	src = nvar->iv_psrc;
1692 	dst = nvar->iv_pdst;
1693 	if (if_ipsec_nat_t(sc)) {
1694 		/* NAT-T enabled */
1695 		src_port = nvar->iv_sport;
1696 		dst_port = nvar->iv_dport;
1697 	}
1698 	if (src && dst)
1699 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1700 
1701 	return error;
1702 }
1703 
1704 /*
1705  * ipsec_variant and its SPs update API.
1706  *
1707  * Assumption:
1708  * reader side dereferences sc->ipsec_var in reader critical section only,
1709  * that is, all of reader sides do not reader the sc->ipsec_var after
1710  * pserialize_perform().
1711  */
1712 static int
1713 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1714     struct ipsec_variant *nullvar)
1715 {
1716 	struct ifnet *ifp = &sc->ipsec_if;
1717 	struct ipsec_variant *ovar = sc->ipsec_var;
1718 	int error;
1719 
1720 	KASSERT(mutex_owned(&sc->ipsec_lock));
1721 
1722 	/*
1723 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1724 	 * we stop packet processing while replacing SPs, that is, we set
1725 	 * "null" config variant to sc->ipsec_var.
1726 	 */
1727 	sc->ipsec_var = nullvar;
1728 	pserialize_perform(ipsec_psz);
1729 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1730 
1731 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1732 	if (!error)
1733 		sc->ipsec_var = nvar;
1734 	else {
1735 		sc->ipsec_var = ovar; /* rollback */
1736 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1737 	}
1738 
1739 	pserialize_perform(ipsec_psz);
1740 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1741 
1742 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1743 		ifp->if_flags |= IFF_RUNNING;
1744 	else
1745 		ifp->if_flags &= ~IFF_RUNNING;
1746 
1747 	return error;
1748 }
1749