xref: /netbsd-src/sys/net/if_ipsec.c (revision 1cd43426d582b6650b153797f2db305dcd93c554)
1 /*	$NetBSD: if_ipsec.c,v 1.36 2024/02/10 18:43:53 andvar Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.36 2024/02/10 18:43:53 andvar Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/errno.h>
44 #include <sys/ioctl.h>
45 #include <sys/time.h>
46 #include <sys/syslog.h>
47 #include <sys/cpu.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/psref.h>
52 #include <sys/sysctl.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/bpf.h>
58 #include <net/pfkeyv2.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef	INET
64 #include <netinet/in_var.h>
65 #endif	/* INET */
66 
67 #ifdef INET6
68 #include <netinet6/in6_var.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_encap.h>
74 
75 #include <net/if_ipsec.h>
76 
77 #include <net/raw_cb.h>
78 #include <net/pfkeyv2.h>
79 
80 #include <netipsec/key.h>
81 #include <netipsec/keydb.h> /* for union sockaddr_union */
82 #include <netipsec/ipsec.h>
83 #include <netipsec/ipsecif.h>
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113     struct sockaddr *, struct sockaddr *, uint16_t);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int, uint16_t);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127     struct ipsec_variant *);
128 
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130     in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
132 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
134 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135 
136 /*
137  * ipsec global variable definitions
138  */
139 
140 /* This list is used in ioctl context only. */
141 static struct {
142 	LIST_HEAD(ipsec_sclist, ipsec_softc) list;
143 	bool use_fixed_reqid;
144 #define REQID_BASE_DEFAULT	0x2000
145 #define REQID_LAST_DEFAULT	0x2fff
146 	u_int16_t reqid_base;
147 	u_int16_t reqid_last;
148 	kmutex_t lock;
149 } ipsec_softcs __cacheline_aligned = {
150 	.use_fixed_reqid = false,
151 	.reqid_base = REQID_BASE_DEFAULT,
152 	.reqid_last = REQID_LAST_DEFAULT,
153 };
154 
155 struct psref_class *iv_psref_class __read_mostly;
156 
157 struct if_clone ipsec_cloner =
158     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
159 static int max_ipsec_nesting = MAX_IPSEC_NEST;
160 
161 static struct sysctllog *if_ipsec_sysctl;
162 
163 static pktq_rps_hash_func_t if_ipsec_pktq_rps_hash_p;
164 
165 enum {
166 	REQID_INDEX_IPV4IN = 0,
167 	REQID_INDEX_IPV4OUT,
168 	REQID_INDEX_IPV6IN,
169 	REQID_INDEX_IPV6OUT,
170 	REQID_INDEX_NUM,
171 };
172 
173 #ifdef INET6
174 static int
sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)175 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
176 {
177 	int error, pmtu;
178 	struct sysctlnode node = *rnode;
179 
180 	pmtu = ip6_ipsec_pmtu;
181 	node.sysctl_data = &pmtu;
182 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
183 	if (error || newp == NULL)
184 		return error;
185 
186 	switch (pmtu) {
187 	case IPSEC_PMTU_MINMTU:
188 	case IPSEC_PMTU_OUTERMTU:
189 		ip6_ipsec_pmtu = pmtu;
190 		break;
191 	default:
192 		return EINVAL;
193 	}
194 
195 	return 0;
196 }
197 
198 static int
sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)199 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
200 {
201 	int error, pmtu;
202 	struct sysctlnode node = *rnode;
203 	struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
204 
205 	pmtu = sc->ipsec_pmtu;
206 	node.sysctl_data = &pmtu;
207 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
208 	if (error || newp == NULL)
209 		return error;
210 
211 	switch (pmtu) {
212 	case IPSEC_PMTU_SYSDEFAULT:
213 	case IPSEC_PMTU_MINMTU:
214 	case IPSEC_PMTU_OUTERMTU:
215 		sc->ipsec_pmtu = pmtu;
216 		break;
217 	default:
218 		return EINVAL;
219 	}
220 
221 	return 0;
222 }
223 #endif
224 
225 static int
sysctl_if_ipsec_use_fixed_reqid(SYSCTLFN_ARGS)226 sysctl_if_ipsec_use_fixed_reqid(SYSCTLFN_ARGS)
227 {
228 	bool fixed;
229 	int error;
230 	struct sysctlnode node = *rnode;
231 
232 	mutex_enter(&ipsec_softcs.lock);
233 	fixed = ipsec_softcs.use_fixed_reqid;
234 	node.sysctl_data = &fixed;
235 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
236 	if (error || newp == NULL) {
237 		mutex_exit(&ipsec_softcs.lock);
238 		return error;
239 	}
240 
241 	if (!LIST_EMPTY(&ipsec_softcs.list)) {
242 		mutex_exit(&ipsec_softcs.lock);
243 		return EBUSY;
244 	}
245 	ipsec_softcs.use_fixed_reqid = fixed;
246 	mutex_exit(&ipsec_softcs.lock);
247 
248 	return 0;
249 }
250 
251 static int
sysctl_if_ipsec_reqid_base(SYSCTLFN_ARGS)252 sysctl_if_ipsec_reqid_base(SYSCTLFN_ARGS)
253 {
254 	int base;
255 	int error;
256 	struct sysctlnode node = *rnode;
257 
258 	mutex_enter(&ipsec_softcs.lock);
259 	base = ipsec_softcs.reqid_base;
260 	node.sysctl_data = &base;
261 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
262 	if (error || newp == NULL) {
263 		mutex_exit(&ipsec_softcs.lock);
264 		return error;
265 	}
266 
267 	if (!LIST_EMPTY(&ipsec_softcs.list)) {
268 		mutex_exit(&ipsec_softcs.lock);
269 		return EBUSY;
270 	}
271 	ipsec_softcs.reqid_base = base;
272 	mutex_exit(&ipsec_softcs.lock);
273 
274 	return 0;
275 }
276 
277 static int
sysctl_if_ipsec_reqid_last(SYSCTLFN_ARGS)278 sysctl_if_ipsec_reqid_last(SYSCTLFN_ARGS)
279 {
280 	int last;
281 	int error;
282 	struct sysctlnode node = *rnode;
283 
284 	mutex_enter(&ipsec_softcs.lock);
285 	last = ipsec_softcs.reqid_last;
286 	node.sysctl_data = &last;
287 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
288 	if (error || newp == NULL) {
289 		mutex_exit(&ipsec_softcs.lock);
290 		return error;
291 	}
292 
293 	if (!LIST_EMPTY(&ipsec_softcs.list)) {
294 		mutex_exit(&ipsec_softcs.lock);
295 		return EBUSY;
296 	}
297 	ipsec_softcs.reqid_last = last;
298 	mutex_exit(&ipsec_softcs.lock);
299 
300 	return 0;
301 }
302 
303 static void
if_ipsec_sysctl_setup(void)304 if_ipsec_sysctl_setup(void)
305 {
306 	const struct sysctlnode *node = NULL;
307 
308 	if_ipsec_sysctl = NULL;
309 
310 #ifdef INET6
311 	/*
312 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
313 	 */
314 	sysctl_createv(NULL, 0, NULL, NULL,
315 		       CTLFLAG_PERMANENT,
316 		       CTLTYPE_NODE, "inet6",
317 		       SYSCTL_DESCR("PF_INET6 related settings"),
318 		       NULL, 0, NULL, 0,
319 		       CTL_NET, PF_INET6, CTL_EOL);
320 	sysctl_createv(NULL, 0, NULL, NULL,
321 		       CTLFLAG_PERMANENT,
322 		       CTLTYPE_NODE, "ip6",
323 		       SYSCTL_DESCR("IPv6 related settings"),
324 		       NULL, 0, NULL, 0,
325 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
326 
327 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
328 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
329 		       CTLTYPE_INT, "ipsecifhlim",
330 		       SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
331 		       NULL, 0, &ip6_ipsec_hlim, 0,
332 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
333 		       IPV6CTL_IPSEC_HLIM, CTL_EOL);
334 
335 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
336 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
337 		       CTLTYPE_INT, "ipsecifpmtu",
338 		       SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
339 		       sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
340 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
341 		       IPV6CTL_IPSEC_PMTU, CTL_EOL);
342 #endif
343 
344 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, &node,
345 	    CTLFLAG_PERMANENT,
346 	    CTLTYPE_NODE, "ipsecif",
347 	    SYSCTL_DESCR("ipsecif global control"),
348 	    NULL, 0, NULL, 0,
349 	    CTL_NET, CTL_CREATE, CTL_EOL);
350 
351 	sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
352 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
353 	    CTLTYPE_STRING, "rps_hash",
354 	    SYSCTL_DESCR("Interface rps hash function control"),
355 	    sysctl_pktq_rps_hash_handler, 0, (void *)&if_ipsec_pktq_rps_hash_p,
356 	    PKTQ_RPS_HASH_NAME_LEN,
357 	    CTL_CREATE, CTL_EOL);
358 
359 	sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
360 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
361 	    CTLTYPE_BOOL, "use_fixed_reqid",
362 	    SYSCTL_DESCR("use fixed reqid for SP"),
363 	    sysctl_if_ipsec_use_fixed_reqid, 0, NULL, 0,
364 	    CTL_CREATE, CTL_EOL);
365 	sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
366 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
367 	    CTLTYPE_INT, "reqid_base",
368 	    SYSCTL_DESCR("base value of fixed reqid"),
369 	    sysctl_if_ipsec_reqid_base, 0, NULL, 0,
370 	    CTL_CREATE, CTL_EOL);
371 	sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
372 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
373 	    CTLTYPE_INT, "reqid_last",
374 	    SYSCTL_DESCR("last value of fixed reqid"),
375 	    sysctl_if_ipsec_reqid_last, 0, NULL, 0,
376 	    CTL_CREATE, CTL_EOL);
377 
378 }
379 
380 static void
if_ipsec_perif_sysctl_setup(struct sysctllog ** clog,struct ipsec_softc * sc)381 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
382 {
383 #ifdef INET6
384 	const struct sysctlnode *cnode, *rnode;
385 	struct ifnet *ifp = &sc->ipsec_if;
386 	const char *ifname = ifp->if_xname;
387 	int rv;
388 
389 	/*
390 	 * Already created in sysctl_sndq_setup().
391 	 */
392 	sysctl_createv(clog, 0, NULL, &rnode,
393 		       CTLFLAG_PERMANENT,
394 		       CTLTYPE_NODE, "interfaces",
395 		       SYSCTL_DESCR("Per-interface controls"),
396 		       NULL, 0, NULL, 0,
397 		       CTL_NET, CTL_CREATE, CTL_EOL);
398 	sysctl_createv(clog, 0, &rnode, &rnode,
399 		       CTLFLAG_PERMANENT,
400 		       CTLTYPE_NODE, ifname,
401 		       SYSCTL_DESCR("Interface controls"),
402 		       NULL, 0, NULL, 0,
403 		       CTL_CREATE, CTL_EOL);
404 
405 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
406 			    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
407 			    CTLTYPE_INT, "pmtu",
408 			    SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
409 			    sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
410 			    CTL_CREATE, CTL_EOL);
411 	if (rv != 0)
412 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
413 
414 	sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
415 #endif
416 }
417 
418 /* ARGSUSED */
419 void
ipsecifattach(int count)420 ipsecifattach(int count)
421 {
422 
423 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
424 	LIST_INIT(&ipsec_softcs.list);
425 
426 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
427 
428 	if_ipsec_pktq_rps_hash_p = pktq_rps_hash_default;
429 	if_ipsec_sysctl_setup();
430 
431 	if_clone_attach(&ipsec_cloner);
432 }
433 
434 static int
if_ipsec_clone_create(struct if_clone * ifc,int unit)435 if_ipsec_clone_create(struct if_clone *ifc, int unit)
436 {
437 	struct ipsec_softc *sc;
438 	struct ipsec_variant *var;
439 	struct ifnet *ifp;
440 
441 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
442 
443 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
444 
445 	if_ipsec_attach0(sc);
446 
447 	ifp = &sc->ipsec_if;
448 	if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
449 
450 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
451 	var->iv_softc = sc;
452 	psref_target_init(&var->iv_psref, iv_psref_class);
453 
454 	sc->ipsec_var = var;
455 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
456 	sc->ipsec_psz = pserialize_create();
457 	sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
458 
459 	mutex_enter(&ipsec_softcs.lock);
460 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
461 	mutex_exit(&ipsec_softcs.lock);
462 	return 0;
463 }
464 
465 static void
if_ipsec_attach0(struct ipsec_softc * sc)466 if_ipsec_attach0(struct ipsec_softc *sc)
467 {
468 
469 	sc->ipsec_if.if_addrlen = 0;
470 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
471 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
472 	/* set ipsec(4) specific default flags. */
473 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
474 	sc->ipsec_if.if_extflags = IFEF_MPSAFE;
475 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
476 	sc->ipsec_if.if_output = if_ipsec_output;
477 	sc->ipsec_if.if_type   = IFT_IPSEC;
478 	sc->ipsec_if.if_dlt    = DLT_NULL;
479 	sc->ipsec_if.if_softc  = sc;
480 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
481 	if_initialize(&sc->ipsec_if);
482 	sc->ipsec_if.if_link_state = LINK_STATE_DOWN;
483 	if_alloc_sadl(&sc->ipsec_if);
484 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
485 	if_register(&sc->ipsec_if);
486 }
487 
488 static int
if_ipsec_clone_destroy(struct ifnet * ifp)489 if_ipsec_clone_destroy(struct ifnet *ifp)
490 {
491 	struct ipsec_softc *sc = ifp->if_softc;
492 	struct ipsec_variant *var;
493 	int bound;
494 
495 	mutex_enter(&ipsec_softcs.lock);
496 	LIST_REMOVE(sc, ipsec_list);
497 	mutex_exit(&ipsec_softcs.lock);
498 
499 	bound = curlwp_bind();
500 	if_ipsec_delete_tunnel(&sc->ipsec_if);
501 	curlwp_bindx(bound);
502 
503 	bpf_detach(ifp);
504 	if_detach(ifp);
505 
506 	if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
507 
508 	pserialize_destroy(sc->ipsec_psz);
509 	mutex_destroy(&sc->ipsec_lock);
510 
511 	var = sc->ipsec_var;
512 	kmem_free(var, sizeof(*var));
513 	kmem_free(sc, sizeof(*sc));
514 
515 	return 0;
516 }
517 
518 static inline bool
if_ipsec_nat_t(struct ipsec_softc * sc)519 if_ipsec_nat_t(struct ipsec_softc *sc)
520 {
521 
522 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
523 }
524 
525 static inline bool
if_ipsec_fwd_ipv6(struct ipsec_softc * sc)526 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
527 {
528 
529 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
530 }
531 
532 int
if_ipsec_encap_func(struct mbuf * m,int off,int proto,void * arg)533 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
534 {
535 	uint8_t v;
536 	struct ipsec_softc *sc;
537 	struct ipsec_variant *var = NULL;
538 	struct psref psref;
539 	int ret = 0;
540 
541 	sc = arg;
542 	KASSERT(sc != NULL);
543 
544 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
545 		goto out;
546 
547 	var = if_ipsec_getref_variant(sc, &psref);
548 	if (if_ipsec_variant_is_unconfigured(var))
549 		goto out;
550 
551 	switch (proto) {
552 	case IPPROTO_IPV4:
553 	case IPPROTO_IPV6:
554 		break;
555 	default:
556 		goto out;
557 	}
558 
559 	m_copydata(m, 0, sizeof(v), &v);
560 	v = (v >> 4) & 0xff;  /* Get the IP version number. */
561 
562 	switch (v) {
563 #ifdef INET
564 	case IPVERSION: {
565 		struct ip ip;
566 
567 		if (m->m_pkthdr.len < sizeof(ip))
568 			goto out;
569 
570 		m_copydata(m, 0, sizeof(ip), &ip);
571 		if (var->iv_psrc->sa_family != AF_INET ||
572 		    var->iv_pdst->sa_family != AF_INET)
573 			goto out;
574 		ret = ipsecif4_encap_func(m, &ip, var);
575 		break;
576 	}
577 #endif
578 #ifdef INET6
579 	case (IPV6_VERSION >> 4): {
580 		struct ip6_hdr ip6;
581 
582 		if (m->m_pkthdr.len < sizeof(ip6))
583 			goto out;
584 
585 		m_copydata(m, 0, sizeof(ip6), &ip6);
586 		if (var->iv_psrc->sa_family != AF_INET6 ||
587 		    var->iv_pdst->sa_family != AF_INET6)
588 			goto out;
589 		ret = ipsecif6_encap_func(m, &ip6, var);
590 		break;
591 	}
592 #endif
593 	default:
594 		goto out;
595 	}
596 
597 out:
598 	if (var != NULL)
599 		if_ipsec_putref_variant(var, &psref);
600 	return ret;
601 }
602 
603 /*
604  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
605  * We'll prevent this by introducing upper limit.
606  */
607 static int
if_ipsec_check_nesting(struct ifnet * ifp,struct mbuf * m)608 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
609 {
610 
611 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
612 }
613 
614 int
if_ipsec_output(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * dst,const struct rtentry * rt)615 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
616     const struct rtentry *rt)
617 {
618 	struct ipsec_softc *sc = ifp->if_softc;
619 	struct ipsec_variant *var;
620 	struct psref psref;
621 	int error;
622 	int bound;
623 
624 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
625 
626 	error = if_ipsec_check_nesting(ifp, m);
627 	if (error) {
628 		m_freem(m);
629 		goto noref_end;
630 	}
631 
632 	if ((ifp->if_flags & IFF_UP) == 0) {
633 		m_freem(m);
634 		error = ENETDOWN;
635 		goto noref_end;
636 	}
637 
638 
639 	bound = curlwp_bind();
640 	var = if_ipsec_getref_variant(sc, &psref);
641 	if (if_ipsec_variant_is_unconfigured(var)) {
642 		m_freem(m);
643 		error = ENETDOWN;
644 		goto end;
645 	}
646 
647 	m->m_flags &= ~(M_BCAST|M_MCAST);
648 
649 	/* use DLT_NULL encapsulation here to pass inner af type */
650 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
651 	if (!m) {
652 		error = ENOBUFS;
653 		goto end;
654 	}
655 	*mtod(m, int *) = dst->sa_family;
656 
657 #if INET6
658 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
659 	if (dst->sa_family == AF_INET6 &&
660 	    !if_ipsec_fwd_ipv6(sc)) {
661 		/*
662 		 * IPv6 packet is not allowed to forward,that is not error.
663 		 */
664 		error = 0;
665 		IF_DROP(&ifp->if_snd);
666 		m_freem(m);
667 		goto end;
668 	}
669 #endif
670 
671 	error = if_ipsec_out_direct(var, m, dst->sa_family);
672 
673 end:
674 	if_ipsec_putref_variant(var, &psref);
675 	curlwp_bindx(bound);
676 noref_end:
677 	if (error)
678 		if_statinc(ifp, if_oerrors);
679 
680 	return error;
681 }
682 
683 static inline int
if_ipsec_out_direct(struct ipsec_variant * var,struct mbuf * m,int family)684 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
685 {
686 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
687 	int error;
688 	int len;
689 
690 	KASSERT(if_ipsec_heldref_variant(var));
691 	KASSERT(var->iv_output != NULL);
692 
693 	len = m->m_pkthdr.len;
694 
695 	/* input DLT_NULL frame to BPF */
696 	bpf_mtap(ifp, m, BPF_D_OUT);
697 
698 	/* grab and chop off inner af type */
699 	/* XXX need pullup? */
700 	m_adj(m, sizeof(int));
701 
702 	error = var->iv_output(var, family, m);
703 	if (error)
704 		return error;
705 
706 	if_statadd2(ifp, if_opackets, 1, if_obytes, len);
707 
708 	return 0;
709 }
710 
711 void
if_ipsec_input(struct mbuf * m,int af,struct ifnet * ifp)712 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
713 {
714 
715 	KASSERT(ifp != NULL);
716 
717 	m_set_rcvif(m, ifp);
718 
719 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
720 
721 	if_ipsec_in_enqueue(m, af, ifp);
722 
723 	return;
724 }
725 
726 static inline void
if_ipsec_in_enqueue(struct mbuf * m,int af,struct ifnet * ifp)727 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
728 {
729 	pktqueue_t *pktq;
730 	int pktlen;
731 
732 	/*
733 	 * Put the packet to the network layer input queue according to the
734 	 * specified address family.
735 	 */
736 	switch (af) {
737 #ifdef INET
738 	case AF_INET:
739 		pktq = ip_pktq;
740 		break;
741 #endif
742 #ifdef INET6
743 	case AF_INET6:
744 		pktq = ip6_pktq;
745 		break;
746 #endif
747 	default:
748 		if_statinc(ifp, if_ierrors);
749 		m_freem(m);
750 		return;
751 	}
752 
753 	const uint32_t h = pktq_rps_hash(&if_ipsec_pktq_rps_hash_p, m);
754 	pktlen = m->m_pkthdr.len;
755 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
756 		if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
757 	} else {
758 		if_statinc(ifp, if_iqdrops);
759 		m_freem(m);
760 	}
761 
762 	return;
763 }
764 
765 static inline int
if_ipsec_check_salen(struct sockaddr * addr)766 if_ipsec_check_salen(struct sockaddr *addr)
767 {
768 
769 	switch (addr->sa_family) {
770 #ifdef INET
771 	case AF_INET:
772 		if (addr->sa_len != sizeof(struct sockaddr_in))
773 			return EINVAL;
774 		break;
775 #endif /* INET */
776 #ifdef INET6
777 	case AF_INET6:
778 		if (addr->sa_len != sizeof(struct sockaddr_in6))
779 			return EINVAL;
780 		break;
781 #endif /* INET6 */
782 	default:
783 		return EAFNOSUPPORT;
784 	}
785 
786 	return 0;
787 }
788 
789 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
790 int
if_ipsec_ioctl(struct ifnet * ifp,u_long cmd,void * data)791 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
792 {
793 	struct ipsec_softc *sc  = ifp->if_softc;
794 	struct ipsec_variant *var = NULL;
795 	struct ifreq     *ifr = (struct ifreq*)data;
796 	struct ifaddr    *ifa = (struct ifaddr*)data;
797 	int error = 0, size;
798 	struct sockaddr *dst, *src;
799 	u_long mtu;
800 	u_short oflags = ifp->if_flags;
801 	int bound;
802 	struct psref psref;
803 
804 	switch (cmd) {
805 	case SIOCINITIFADDR:
806 		ifp->if_flags |= IFF_UP;
807 		ifa->ifa_rtrequest = p2p_rtrequest;
808 		break;
809 
810 	case SIOCSIFDSTADDR:
811 		break;
812 
813 	case SIOCADDMULTI:
814 	case SIOCDELMULTI:
815 		switch (ifr->ifr_addr.sa_family) {
816 #ifdef INET
817 		case AF_INET:	/* IP supports Multicast */
818 			break;
819 #endif /* INET */
820 #ifdef INET6
821 		case AF_INET6:	/* IP6 supports Multicast */
822 			break;
823 #endif /* INET6 */
824 		default:  /* Other protocols doesn't support Multicast */
825 			error = EAFNOSUPPORT;
826 			break;
827 		}
828 		break;
829 
830 	case SIOCSIFMTU:
831 		mtu = ifr->ifr_mtu;
832 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
833 			return EINVAL;
834 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
835 			error = 0;
836 		break;
837 
838 #ifdef INET
839 	case SIOCSIFPHYADDR:
840 #endif
841 #ifdef INET6
842 	case SIOCSIFPHYADDR_IN6:
843 #endif /* INET6 */
844 	case SIOCSLIFPHYADDR:
845 		switch (cmd) {
846 #ifdef INET
847 		case SIOCSIFPHYADDR:
848 			src = (struct sockaddr *)
849 				&(((struct in_aliasreq *)data)->ifra_addr);
850 			dst = (struct sockaddr *)
851 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
852 			break;
853 #endif /* INET */
854 #ifdef INET6
855 		case SIOCSIFPHYADDR_IN6:
856 			src = (struct sockaddr *)
857 				&(((struct in6_aliasreq *)data)->ifra_addr);
858 			dst = (struct sockaddr *)
859 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
860 			break;
861 #endif /* INET6 */
862 		case SIOCSLIFPHYADDR:
863 			src = (struct sockaddr *)
864 				&(((struct if_laddrreq *)data)->addr);
865 			dst = (struct sockaddr *)
866 				&(((struct if_laddrreq *)data)->dstaddr);
867 			break;
868 		default:
869 			return EINVAL;
870 		}
871 
872 		/* sa_family must be equal */
873 		if (src->sa_family != dst->sa_family)
874 			return EINVAL;
875 
876 		error = if_ipsec_check_salen(src);
877 		if (error)
878 			return error;
879 		error = if_ipsec_check_salen(dst);
880 		if (error)
881 			return error;
882 
883 		/* check sa_family looks sane for the cmd */
884 		switch (cmd) {
885 #ifdef INET
886 		case SIOCSIFPHYADDR:
887 			if (src->sa_family == AF_INET)
888 				break;
889 			return EAFNOSUPPORT;
890 #endif /* INET */
891 #ifdef INET6
892 		case SIOCSIFPHYADDR_IN6:
893 			if (src->sa_family == AF_INET6)
894 				break;
895 			return EAFNOSUPPORT;
896 #endif /* INET6 */
897 		case SIOCSLIFPHYADDR:
898 			/* checks done in the above */
899 			break;
900 		}
901 		/*
902 		 * calls if_ipsec_getref_variant() for other softcs to check
903 		 * address pair duplication
904 		 */
905 		bound = curlwp_bind();
906 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
907 		if (error)
908 			goto bad;
909 		if_link_state_change(&sc->ipsec_if, LINK_STATE_UP);
910 		curlwp_bindx(bound);
911 		break;
912 
913 	case SIOCDIFPHYADDR:
914 		bound = curlwp_bind();
915 		if_ipsec_delete_tunnel(&sc->ipsec_if);
916 		if_link_state_change(&sc->ipsec_if, LINK_STATE_DOWN);
917 		curlwp_bindx(bound);
918 		break;
919 
920 	case SIOCGIFPSRCADDR:
921 #ifdef INET6
922 	case SIOCGIFPSRCADDR_IN6:
923 #endif /* INET6 */
924 		bound = curlwp_bind();
925 		var = if_ipsec_getref_variant(sc, &psref);
926 		if (var->iv_psrc == NULL) {
927 			error = EADDRNOTAVAIL;
928 			goto bad;
929 		}
930 		src = var->iv_psrc;
931 		switch (cmd) {
932 #ifdef INET
933 		case SIOCGIFPSRCADDR:
934 			dst = &ifr->ifr_addr;
935 			size = sizeof(ifr->ifr_addr);
936 			break;
937 #endif /* INET */
938 #ifdef INET6
939 		case SIOCGIFPSRCADDR_IN6:
940 			dst = (struct sockaddr *)
941 				&(((struct in6_ifreq *)data)->ifr_addr);
942 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
943 			break;
944 #endif /* INET6 */
945 		default:
946 			error = EADDRNOTAVAIL;
947 			goto bad;
948 		}
949 		if (src->sa_len > size) {
950 			error = EINVAL;
951 			goto bad;
952 		}
953 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
954 		if (error)
955 			goto bad;
956 		if_ipsec_putref_variant(var, &psref);
957 		curlwp_bindx(bound);
958 		break;
959 
960 	case SIOCGIFPDSTADDR:
961 #ifdef INET6
962 	case SIOCGIFPDSTADDR_IN6:
963 #endif /* INET6 */
964 		bound = curlwp_bind();
965 		var = if_ipsec_getref_variant(sc, &psref);
966 		if (var->iv_pdst == NULL) {
967 			error = EADDRNOTAVAIL;
968 			goto bad;
969 		}
970 		src = var->iv_pdst;
971 		switch (cmd) {
972 #ifdef INET
973 		case SIOCGIFPDSTADDR:
974 			dst = &ifr->ifr_addr;
975 			size = sizeof(ifr->ifr_addr);
976 			break;
977 #endif /* INET */
978 #ifdef INET6
979 		case SIOCGIFPDSTADDR_IN6:
980 			dst = (struct sockaddr *)
981 				&(((struct in6_ifreq *)data)->ifr_addr);
982 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
983 			break;
984 #endif /* INET6 */
985 		default:
986 			error = EADDRNOTAVAIL;
987 			goto bad;
988 		}
989 		if (src->sa_len > size) {
990 			error = EINVAL;
991 			goto bad;
992 		}
993 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
994 		if (error)
995 			goto bad;
996 		if_ipsec_putref_variant(var, &psref);
997 		curlwp_bindx(bound);
998 		break;
999 
1000 	case SIOCGLIFPHYADDR:
1001 		bound = curlwp_bind();
1002 		var = if_ipsec_getref_variant(sc, &psref);
1003 		if (if_ipsec_variant_is_unconfigured(var)) {
1004 			error = EADDRNOTAVAIL;
1005 			goto bad;
1006 		}
1007 
1008 		/* copy src */
1009 		src = var->iv_psrc;
1010 		dst = (struct sockaddr *)
1011 			&(((struct if_laddrreq *)data)->addr);
1012 		size = sizeof(((struct if_laddrreq *)data)->addr);
1013 		if (src->sa_len > size) {
1014 			error = EINVAL;
1015 			goto bad;
1016 		}
1017 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
1018 		if (error)
1019 			goto bad;
1020 
1021 		/* copy dst */
1022 		src = var->iv_pdst;
1023 		dst = (struct sockaddr *)
1024 			&(((struct if_laddrreq *)data)->dstaddr);
1025 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
1026 		if (src->sa_len > size) {
1027 			error = EINVAL;
1028 			goto bad;
1029 		}
1030 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
1031 		if (error)
1032 			goto bad;
1033 		if_ipsec_putref_variant(var, &psref);
1034 		curlwp_bindx(bound);
1035 		break;
1036 
1037 	default:
1038 		error = ifioctl_common(ifp, cmd, data);
1039 		if (!error) {
1040 			bound = curlwp_bind();
1041 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
1042 			if (error)
1043 				goto bad;
1044 			curlwp_bindx(bound);
1045 		}
1046 		break;
1047 	}
1048 	return error;
1049 
1050 bad:
1051 	if (var != NULL)
1052 		if_ipsec_putref_variant(var, &psref);
1053 	curlwp_bindx(bound);
1054 
1055 	return error;
1056 }
1057 
1058 struct encap_funcs {
1059 #ifdef INET
1060 	int (*ef_inet)(struct ipsec_variant *);
1061 #endif
1062 #ifdef INET6
1063 	int (*ef_inet6)(struct ipsec_variant *);
1064 #endif
1065 };
1066 
1067 static struct encap_funcs ipsec_encap_attach = {
1068 #ifdef INET
1069 	.ef_inet = ipsecif4_attach,
1070 #endif
1071 #ifdef INET6
1072 	.ef_inet6 = &ipsecif6_attach,
1073 #endif
1074 };
1075 
1076 static struct encap_funcs ipsec_encap_detach = {
1077 #ifdef INET
1078 	.ef_inet = ipsecif4_detach,
1079 #endif
1080 #ifdef INET6
1081 	.ef_inet6 = &ipsecif6_detach,
1082 #endif
1083 };
1084 
1085 static int
if_ipsec_encap_common(struct ipsec_variant * var,struct encap_funcs * funcs)1086 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
1087 {
1088 	int error;
1089 
1090 	KASSERT(var != NULL);
1091 	KASSERT(if_ipsec_variant_is_configured(var));
1092 
1093 	switch (var->iv_psrc->sa_family) {
1094 #ifdef INET
1095 	case AF_INET:
1096 		error = (funcs->ef_inet)(var);
1097 		break;
1098 #endif /* INET */
1099 #ifdef INET6
1100 	case AF_INET6:
1101 		error = (funcs->ef_inet6)(var);
1102 		break;
1103 #endif /* INET6 */
1104 	default:
1105 		error = EINVAL;
1106 		break;
1107 	}
1108 
1109 	return error;
1110 }
1111 
1112 static int
if_ipsec_encap_attach(struct ipsec_variant * var)1113 if_ipsec_encap_attach(struct ipsec_variant *var)
1114 {
1115 
1116 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
1117 }
1118 
1119 static int
if_ipsec_encap_detach(struct ipsec_variant * var)1120 if_ipsec_encap_detach(struct ipsec_variant *var)
1121 {
1122 
1123 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
1124 }
1125 
1126 /*
1127  * Validate and set ipsec(4) I/F configurations.
1128  *     (1) validate
1129  *         (1-1) Check the argument src and dst address pair will change
1130  *               configuration from current src and dst address pair.
1131  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1132  *               with argument src and dst address pair, except for NAT-T shared
1133  *               tunnels.
1134  *     (2) set
1135  *         (2-1) Create variant for new configuration.
1136  *         (2-2) Create temporary "null" variant used to avoid to access
1137  *               dangling variant while SPs are deleted and added.
1138  *         (2-3) Swap variant include its SPs.
1139  *         (2-4) Cleanup last configurations.
1140  */
1141 static int
if_ipsec_set_tunnel(struct ifnet * ifp,struct sockaddr * src,struct sockaddr * dst)1142 if_ipsec_set_tunnel(struct ifnet *ifp,
1143     struct sockaddr *src, struct sockaddr *dst)
1144 {
1145 	struct ipsec_softc *sc = ifp->if_softc;
1146 	struct ipsec_softc *sc2;
1147 	struct ipsec_variant *ovar, *nvar, *nullvar;
1148 	struct sockaddr *osrc, *odst;
1149 	struct sockaddr *nsrc, *ndst;
1150 	in_port_t nsport = 0, ndport = 0;
1151 	int error;
1152 
1153 	error = encap_lock_enter();
1154 	if (error)
1155 		return error;
1156 
1157 	nsrc = sockaddr_dup(src, M_WAITOK);
1158 	ndst = sockaddr_dup(dst, M_WAITOK);
1159 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1160 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1161 
1162 	mutex_enter(&sc->ipsec_lock);
1163 
1164 	ovar = sc->ipsec_var;
1165 
1166 	switch(nsrc->sa_family) {
1167 #ifdef INET
1168 	case AF_INET:
1169 		nsport = satosin(src)->sin_port;
1170 		/*
1171 		 * avoid confuse SP when NAT-T disabled,
1172 		 * e.g.
1173 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1174 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1175 		 */
1176 		satosin(nsrc)->sin_port = 0;
1177 		ndport = satosin(dst)->sin_port;
1178 		satosin(ndst)->sin_port = 0;
1179 		break;
1180 #endif /* INET */
1181 #ifdef INET6
1182 	case AF_INET6:
1183 		nsport = satosin6(src)->sin6_port;
1184 		satosin6(nsrc)->sin6_port = 0;
1185 		ndport = satosin6(dst)->sin6_port;
1186 		satosin6(ndst)->sin6_port = 0;
1187 		break;
1188 #endif /* INET6 */
1189 	default:
1190 		log(LOG_DEBUG,
1191 		    "%s: Invalid address family: %d.\n",
1192 		    __func__, src->sa_family);
1193 		error = EINVAL;
1194 		goto out;
1195 	}
1196 
1197 	/*
1198 	 * (1-1) Check the argument src and dst address pair will change
1199 	 *       configuration from current src and dst address pair.
1200 	 */
1201 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1202 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1203 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1204 		/* address and port pair not changed. */
1205 		error = 0;
1206 		goto out;
1207 	}
1208 
1209 	/*
1210 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1211 	 *       with argument src and dst address pair, except for NAT-T shared
1212 	 *       tunnels.
1213 	 */
1214 	mutex_enter(&ipsec_softcs.lock);
1215 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1216 		struct ipsec_variant *var2;
1217 		struct psref psref;
1218 
1219 		if (sc2 == sc)
1220 			continue;
1221 		var2 = if_ipsec_getref_variant(sc2, &psref);
1222 		if (if_ipsec_variant_is_unconfigured(var2)) {
1223 			if_ipsec_putref_variant(var2, &psref);
1224 			continue;
1225 		}
1226 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1227 			if_ipsec_putref_variant(var2, &psref);
1228 			continue; /* NAT-T shared tunnel */
1229 		}
1230 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1231 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
1232 			if_ipsec_putref_variant(var2, &psref);
1233 			mutex_exit(&ipsec_softcs.lock);
1234 			error = EADDRNOTAVAIL;
1235 			goto out;
1236 		}
1237 
1238 		if_ipsec_putref_variant(var2, &psref);
1239 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1240 	}
1241 	mutex_exit(&ipsec_softcs.lock);
1242 
1243 
1244 	osrc = ovar->iv_psrc;
1245 	odst = ovar->iv_pdst;
1246 
1247 	/*
1248 	 * (2-1) Create ipsec_variant for new configuration.
1249 	 */
1250 	if_ipsec_copy_variant(nvar, ovar);
1251 	nvar->iv_psrc = nsrc;
1252 	nvar->iv_pdst = ndst;
1253 	nvar->iv_sport = nsport;
1254 	nvar->iv_dport = ndport;
1255 	nvar->iv_encap_cookie4 = NULL;
1256 	nvar->iv_encap_cookie6 = NULL;
1257 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1258 	error = if_ipsec_encap_attach(nvar);
1259 	if (error)
1260 		goto out;
1261 
1262 	/*
1263 	 * (2-2) Create temporary "null" variant.
1264 	 */
1265 	if_ipsec_copy_variant(nullvar, ovar);
1266 	if_ipsec_clear_config(nullvar);
1267 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1268 	/*
1269 	 * (2-3) Swap variant include its SPs.
1270 	 */
1271 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1272 	if (error) {
1273 		if_ipsec_encap_detach(nvar);
1274 		goto out;
1275 	}
1276 
1277 	mutex_exit(&sc->ipsec_lock);
1278 
1279 	/*
1280 	 * (2-4) Cleanup last configurations.
1281 	 */
1282 	if (if_ipsec_variant_is_configured(ovar))
1283 		if_ipsec_encap_detach(ovar);
1284 	encap_lock_exit();
1285 
1286 	if (osrc != NULL)
1287 		sockaddr_free(osrc);
1288 	if (odst != NULL)
1289 		sockaddr_free(odst);
1290 	kmem_free(ovar, sizeof(*ovar));
1291 	kmem_free(nullvar, sizeof(*nullvar));
1292 
1293 	return 0;
1294 
1295 out:
1296 	mutex_exit(&sc->ipsec_lock);
1297 	encap_lock_exit();
1298 
1299 	sockaddr_free(nsrc);
1300 	sockaddr_free(ndst);
1301 	kmem_free(nvar, sizeof(*nvar));
1302 	kmem_free(nullvar, sizeof(*nullvar));
1303 
1304 	return error;
1305 }
1306 
1307 /*
1308  * Validate and delete ipsec(4) I/F configurations.
1309  *     (1) validate
1310  *         (1-1) Check current src and dst address pair are null,
1311  *               which means the ipsec(4) I/F is already done deletetunnel.
1312  *     (2) delete
1313  *         (2-1) Create variant for deleted status.
1314  *         (2-2) Create temporary "null" variant used to avoid to access
1315  *               dangling variant while SPs are deleted and added.
1316  *               NOTE:
1317  *               The contents of temporary "null" variant equal to the variant
1318  *               of (2-1), however two psref_target_destroy() synchronization
1319  *               points are necessary to avoid to access dangling variant
1320  *               while SPs are deleted and added. To implement that simply,
1321  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1322  *               create extra "null" variant and use it temporarily.
1323  *         (2-3) Swap variant include its SPs.
1324  *         (2-4) Cleanup last configurations.
1325  */
1326 static void
if_ipsec_delete_tunnel(struct ifnet * ifp)1327 if_ipsec_delete_tunnel(struct ifnet *ifp)
1328 {
1329 	struct ipsec_softc *sc = ifp->if_softc;
1330 	struct ipsec_variant *ovar, *nvar, *nullvar;
1331 	struct sockaddr *osrc, *odst;
1332 	int error;
1333 
1334 	error = encap_lock_enter();
1335 	if (error)
1336 		return;
1337 
1338 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1339 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1340 
1341 	mutex_enter(&sc->ipsec_lock);
1342 
1343 	ovar = sc->ipsec_var;
1344 	osrc = ovar->iv_psrc;
1345 	odst = ovar->iv_pdst;
1346 	/*
1347 	 * (1-1) Check current src and dst address pair are null,
1348 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1349 	 */
1350 	if (osrc == NULL || odst == NULL) {
1351 		/* address pair not changed. */
1352 		mutex_exit(&sc->ipsec_lock);
1353 		encap_lock_exit();
1354 		kmem_free(nvar, sizeof(*nvar));
1355 		kmem_free(nullvar, sizeof(*nullvar));
1356 		return;
1357 	}
1358 
1359 	/*
1360 	 * (2-1) Create variant for deleted status.
1361 	 */
1362 	if_ipsec_copy_variant(nvar, ovar);
1363 	if_ipsec_clear_config(nvar);
1364 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1365 
1366 	/*
1367 	 * (2-2) Create temporary "null" variant used to avoid to access
1368 	 *       dangling variant while SPs are deleted and added.
1369 	 */
1370 	if_ipsec_copy_variant(nullvar, ovar);
1371 	if_ipsec_clear_config(nullvar);
1372 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1373 	/*
1374 	 * (2-3) Swap variant include its SPs.
1375 	 */
1376 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1377 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1378 
1379 	mutex_exit(&sc->ipsec_lock);
1380 
1381 	/*
1382 	 * (2-4) Cleanup last configurations.
1383 	 */
1384 	if (if_ipsec_variant_is_configured(ovar))
1385 		if_ipsec_encap_detach(ovar);
1386 	encap_lock_exit();
1387 
1388 	sockaddr_free(osrc);
1389 	sockaddr_free(odst);
1390 	kmem_free(ovar, sizeof(*ovar));
1391 	kmem_free(nullvar, sizeof(*nullvar));
1392 }
1393 
1394 /*
1395  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1396  *     (1) check
1397  *         (1-1) Check flags are changed.
1398  *         (1-2) Check current src and dst address pair. If they are null,
1399  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1400  *               not needed to update.
1401  *     (2) update
1402  *         (2-1) Create variant for new SPs.
1403  *         (2-2) Create temporary "null" variant used to avoid to access
1404  *               dangling variant while SPs are deleted and added.
1405  *               NOTE:
1406  *               There is the same problem as if_ipsec_delete_tunnel().
1407  *         (2-3) Swap variant include its SPs.
1408  *         (2-4) Cleanup unused configurations.
1409  *               NOTE: use the same encap_cookies.
1410  */
1411 static int
if_ipsec_ensure_flags(struct ifnet * ifp,u_short oflags)1412 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1413 {
1414 	struct ipsec_softc *sc = ifp->if_softc;
1415 	struct ipsec_variant *ovar, *nvar, *nullvar;
1416 	int error;
1417 
1418 	/*
1419 	 * (1) Check flags are changed.
1420 	 */
1421 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1422 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1423 		return 0; /* flags not changed. */
1424 
1425 	error = encap_lock_enter();
1426 	if (error)
1427 		return error;
1428 
1429 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1430 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1431 
1432 	mutex_enter(&sc->ipsec_lock);
1433 
1434 	ovar = sc->ipsec_var;
1435 	/*
1436 	 * (1-2) Check current src and dst address pair.
1437 	 */
1438 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1439 		/* nothing to do */
1440 		mutex_exit(&sc->ipsec_lock);
1441 		encap_lock_exit();
1442 		kmem_free(nvar, sizeof(*nvar));
1443 		kmem_free(nullvar, sizeof(*nullvar));
1444 		return 0;
1445 	}
1446 
1447 	/*
1448 	 * (2-1) Create variant for new SPs.
1449 	 */
1450 	if_ipsec_copy_variant(nvar, ovar);
1451 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1452 	/*
1453 	 * (2-2) Create temporary "null" variant used to avoid to access
1454 	 *       dangling variant while SPs are deleted and added.
1455 	 */
1456 	if_ipsec_copy_variant(nullvar, ovar);
1457 	if_ipsec_clear_config(nullvar);
1458 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1459 	/*
1460 	 * (2-3) Swap variant include its SPs.
1461 	 */
1462 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1463 
1464 	mutex_exit(&sc->ipsec_lock);
1465 	encap_lock_exit();
1466 
1467 	/*
1468 	 * (2-4) Cleanup unused configurations.
1469 	 */
1470 	if (!error)
1471 		kmem_free(ovar, sizeof(*ovar));
1472 	else
1473 		kmem_free(nvar, sizeof(*ovar));
1474 	kmem_free(nullvar, sizeof(*nullvar));
1475 
1476 	return error;
1477 }
1478 
1479 /*
1480  * SPD management
1481  */
1482 
1483 /*
1484  * Share SP set with other NAT-T ipsec(4) I/F(s).
1485  *     Return 1, when "var" shares SP set.
1486  *     Return 0, when "var" cannot share SP set.
1487  *
1488  * NOTE:
1489  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1490  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1491  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1492  * set_tunnel causes race.
1493  * Currently, (fortunately) encap_lock works as this global lock.
1494  */
1495 static int
if_ipsec_share_sp(struct ipsec_variant * var)1496 if_ipsec_share_sp(struct ipsec_variant *var)
1497 {
1498 	struct ipsec_softc *sc = var->iv_softc;
1499 	struct ipsec_softc *sc2;
1500 	struct ipsec_variant *var2;
1501 	struct psref psref;
1502 
1503 	KASSERT(encap_lock_held());
1504 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1505 
1506 	mutex_enter(&ipsec_softcs.lock);
1507 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1508 		if (sc2 == sc)
1509 			continue;
1510 		var2 = if_ipsec_getref_variant(sc2, &psref);
1511 		if (if_ipsec_variant_is_unconfigured(var2)) {
1512 			if_ipsec_putref_variant(var2, &psref);
1513 			continue;
1514 		}
1515 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1516 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1517 			if_ipsec_putref_variant(var2, &psref);
1518 			continue;
1519 		}
1520 
1521 		break;
1522 	}
1523 	mutex_exit(&ipsec_softcs.lock);
1524 	if (sc2 == NULL)
1525 		return 0; /* not shared */
1526 
1527 	IV_SP_IN(var) = IV_SP_IN(var2);
1528 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1529 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1530 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1531 
1532 	if_ipsec_putref_variant(var2, &psref);
1533 	return 1; /* shared */
1534 }
1535 
1536 /*
1537  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1538  *     Return 1, when "var" shared SP set, and then unshare them.
1539  *     Return 0, when "var" did not share SP set.
1540  *
1541  * NOTE:
1542  * See if_ipsec_share_sp()'s note.
1543  */
1544 static int
if_ipsec_unshare_sp(struct ipsec_variant * var)1545 if_ipsec_unshare_sp(struct ipsec_variant *var)
1546 {
1547 	struct ipsec_softc *sc = var->iv_softc;
1548 	struct ipsec_softc *sc2;
1549 	struct ipsec_variant *var2;
1550 	struct psref psref;
1551 
1552 	KASSERT(encap_lock_held());
1553 
1554 	if (!var->iv_pdst || !var->iv_psrc)
1555 		return 0;
1556 
1557 	mutex_enter(&ipsec_softcs.lock);
1558 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1559 		if (sc2 == sc)
1560 			continue;
1561 		var2 = if_ipsec_getref_variant(sc2, &psref);
1562 		if (!var2->iv_pdst || !var2->iv_psrc) {
1563 			if_ipsec_putref_variant(var2, &psref);
1564 			continue;
1565 		}
1566 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1567 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1568 			if_ipsec_putref_variant(var2, &psref);
1569 			continue;
1570 		}
1571 
1572 		break;
1573 	}
1574 	mutex_exit(&ipsec_softcs.lock);
1575 	if (sc2 == NULL)
1576 		return 0; /* not shared */
1577 
1578 	IV_SP_IN(var) = NULL;
1579 	IV_SP_IN6(var) = NULL;
1580 	IV_SP_OUT(var) = NULL;
1581 	IV_SP_OUT6(var) = NULL;
1582 	if_ipsec_putref_variant(var2, &psref);
1583 	return 1; /* shared */
1584 }
1585 
1586 static inline void
if_ipsec_add_mbuf_optalign(struct mbuf * m0,void * data,size_t len,bool align)1587 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1588 {
1589 	struct mbuf *m;
1590 
1591 	MGET(m, M_WAIT, MT_DATA);
1592 	if (align) {
1593 		m->m_len = PFKEY_ALIGN8(len);
1594 		memset(mtod(m, void *), 0, m->m_len);
1595 	} else
1596 		m->m_len = len;
1597 	m_copyback(m, 0, len, data);
1598 	m_cat(m0, m);
1599 }
1600 
1601 static inline void
if_ipsec_add_mbuf(struct mbuf * m0,void * data,size_t len)1602 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1603 {
1604 
1605 	if_ipsec_add_mbuf_optalign(m0, data, len, true);
1606 }
1607 
1608 static inline void
if_ipsec_add_mbuf_addr_port(struct mbuf * m0,struct sockaddr * addr,in_port_t port,bool align)1609 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1610 {
1611 
1612 	if (port == 0) {
1613 		if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1614 	} else {
1615 		union sockaddr_union addrport_u;
1616 		struct sockaddr *addrport = &addrport_u.sa;
1617 
1618 		if_ipsec_set_addr_port(addrport, addr, port);
1619 		if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1620 	}
1621 }
1622 
1623 static inline void
if_ipsec_add_pad(struct mbuf * m0,size_t len)1624 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1625 {
1626 	struct mbuf *m;
1627 
1628 	if (len == 0)
1629 		return;
1630 
1631 	MGET(m, M_WAIT, MT_DATA);
1632 	m->m_len = len;
1633 	memset(mtod(m, void *), 0, m->m_len);
1634 	m_cat(m0, m);
1635 }
1636 
1637 static inline size_t
if_ipsec_set_sadb_addr(struct sadb_address * saaddr,struct sockaddr * addr,int proto,uint16_t exttype)1638 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1639     int proto, uint16_t exttype)
1640 {
1641 	size_t size;
1642 
1643 	KASSERT(saaddr != NULL);
1644 	KASSERT(addr != NULL);
1645 
1646 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1647 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1648 	saaddr->sadb_address_exttype = exttype;
1649 	saaddr->sadb_address_proto = proto;
1650 	switch (addr->sa_family) {
1651 #ifdef INET
1652 	case AF_INET:
1653 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1654 		break;
1655 #endif /* INET */
1656 #ifdef INET6
1657 	case AF_INET6:
1658 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1659 		break;
1660 #endif /* INET6 */
1661 	default:
1662 		log(LOG_DEBUG,
1663 		    "%s: Invalid address family: %d.\n",
1664 		    __func__, addr->sa_family);
1665 		break;
1666 	}
1667 	saaddr->sadb_address_reserved = 0;
1668 
1669 	return size;
1670 }
1671 
1672 static inline size_t
if_ipsec_set_sadb_src(struct sadb_address * sasrc,struct sockaddr * src,int proto)1673 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1674     int proto)
1675 {
1676 
1677 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1678 	    SADB_EXT_ADDRESS_SRC);
1679 }
1680 
1681 static inline size_t
if_ipsec_set_sadb_dst(struct sadb_address * sadst,struct sockaddr * dst,int proto)1682 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1683     int proto)
1684 {
1685 
1686 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1687 	    SADB_EXT_ADDRESS_DST);
1688 }
1689 
1690 static inline size_t
if_ipsec_set_sadb_x_policy(struct sadb_x_policy * xpl,struct sadb_x_ipsecrequest * xisr,uint16_t policy,uint8_t dir,uint32_t id,uint8_t level,struct sockaddr * src,struct sockaddr * dst,uint16_t reqid)1691 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1692     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1693     uint8_t level, struct sockaddr *src, struct sockaddr *dst, uint16_t reqid)
1694 {
1695 	size_t size;
1696 
1697 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1698 
1699 	size = sizeof(*xpl);
1700 	if (policy == IPSEC_POLICY_IPSEC) {
1701 		size += PFKEY_ALIGN8(sizeof(*xisr));
1702 		if (src != NULL && dst != NULL)
1703 			size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1704 	}
1705 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1706 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1707 	xpl->sadb_x_policy_type = policy;
1708 	xpl->sadb_x_policy_dir = dir;
1709 	xpl->sadb_x_policy_flags = 0;
1710 	xpl->sadb_x_policy_id = id;
1711 	xpl->sadb_x_policy_reserved2 = 0;
1712 
1713 	if (policy == IPSEC_POLICY_IPSEC) {
1714 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1715 		if (src != NULL && dst != NULL)
1716 			xisr->sadb_x_ipsecrequest_len +=
1717 				PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1718 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1719 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1720 		xisr->sadb_x_ipsecrequest_level = level;
1721 		if (level == IPSEC_LEVEL_UNIQUE)
1722 			xisr->sadb_x_ipsecrequest_reqid = reqid;
1723 		else
1724 			xisr->sadb_x_ipsecrequest_reqid = 0;
1725 	}
1726 
1727 	return size;
1728 }
1729 
1730 static inline void
if_ipsec_set_sadb_msg(struct sadb_msg * msg,uint16_t extlen,uint8_t msgtype)1731 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1732 {
1733 
1734 	KASSERT(msg != NULL);
1735 
1736 	msg->sadb_msg_version = PF_KEY_V2;
1737 	msg->sadb_msg_type = msgtype;
1738 	msg->sadb_msg_errno = 0;
1739 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1740 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1741 	msg->sadb_msg_reserved = 0;
1742 	msg->sadb_msg_seq = 0; /* XXXX */
1743 	msg->sadb_msg_pid = 0; /* XXXX */
1744 }
1745 
1746 static inline void
if_ipsec_set_sadb_msg_add(struct sadb_msg * msg,uint16_t extlen)1747 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1748 {
1749 
1750 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1751 }
1752 
1753 static inline void
if_ipsec_set_sadb_msg_del(struct sadb_msg * msg,uint16_t extlen)1754 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1755 {
1756 
1757 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1758 }
1759 
1760 static int
if_ipsec_set_addr_port(struct sockaddr * addrport,struct sockaddr * addr,in_port_t port)1761 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1762     in_port_t port)
1763 {
1764 	int error = 0;
1765 
1766 	sockaddr_copy(addrport, addr->sa_len, addr);
1767 
1768 	switch (addr->sa_family) {
1769 #ifdef INET
1770 	case AF_INET: {
1771 		struct sockaddr_in *sin = satosin(addrport);
1772 		sin->sin_port = port;
1773 		break;
1774 	}
1775 #endif /* INET */
1776 #ifdef INET6
1777 	case AF_INET6: {
1778 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1779 		sin6->sin6_port = port;
1780 		break;
1781 	}
1782 #endif /* INET6 */
1783 	default:
1784 		log(LOG_DEBUG,
1785 		    "%s: Invalid address family: %d.\n",
1786 		    __func__, addr->sa_family);
1787 		error = EINVAL;
1788 	}
1789 
1790 	return error;
1791 }
1792 
1793 static int
if_ipsec_get_reqids(struct ipsec_variant * var,u_int16_t reqids[REQID_INDEX_NUM])1794 if_ipsec_get_reqids(struct ipsec_variant *var, u_int16_t reqids[REQID_INDEX_NUM])
1795 {
1796 	struct ipsec_softc *sc = var->iv_softc;
1797 	struct ifnet *ifp = &sc->ipsec_if;
1798 
1799 	mutex_enter(&ipsec_softcs.lock);
1800 	if (ipsec_softcs.use_fixed_reqid) {
1801 		uint32_t unit, reqid_base;
1802 
1803 		unit = strtoul(ifp->if_xname + sizeof("ipsec") - 1, NULL, 10);
1804 		reqid_base = ipsec_softcs.reqid_base + unit * 2;
1805 		if (reqid_base + 1 > ipsec_softcs.reqid_last) {
1806 			log(LOG_ERR,
1807 			    "%s: invalid fixed reqid(%"PRIu32"), "
1808 			    "current range %"PRIu16" <= reqid <= %"PRIu16"\n",
1809 			    ifp->if_xname, reqid_base + 1,
1810 			    ipsec_softcs.reqid_base, ipsec_softcs.reqid_last);
1811 			mutex_exit(&ipsec_softcs.lock);
1812 			return ENOSPC;
1813 		}
1814 
1815 		/*
1816 		 * Use same reqid both inbound and outbound to reduce reqid.
1817 		 */
1818 		reqids[REQID_INDEX_IPV4IN] = reqid_base;
1819 		reqids[REQID_INDEX_IPV4OUT] = reqid_base;
1820 		reqids[REQID_INDEX_IPV6IN] = reqid_base + 1;
1821 		reqids[REQID_INDEX_IPV6OUT] = reqid_base + 1;
1822 	} else {
1823 		for (int i = 0; i < REQID_INDEX_NUM; i++)
1824 			reqids[i] = key_newreqid();
1825 	}
1826 	mutex_exit(&ipsec_softcs.lock);
1827 
1828 	return 0;
1829 }
1830 
1831 static struct secpolicy *
if_ipsec_add_sp0(struct sockaddr * src,in_port_t sport,struct sockaddr * dst,in_port_t dport,int dir,int proto,int level,u_int policy,uint16_t reqid)1832 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1833     struct sockaddr *dst, in_port_t dport,
1834     int dir, int proto, int level, u_int policy, uint16_t reqid)
1835 {
1836 	struct sadb_msg msg;
1837 	struct sadb_address xsrc, xdst;
1838 	struct sadb_x_policy xpl;
1839 	struct sadb_x_ipsecrequest xisr;
1840 	size_t size;
1841 	size_t padlen;
1842 	uint16_t ext_msg_len = 0;
1843 	struct mbuf *m;
1844 
1845 	memset(&msg, 0, sizeof(msg));
1846 	memset(&xsrc, 0, sizeof(xsrc));
1847 	memset(&xdst, 0, sizeof(xdst));
1848 	memset(&xpl, 0, sizeof(xpl));
1849 	memset(&xisr, 0, sizeof(xisr));
1850 
1851 	MGETHDR(m, M_WAIT, MT_DATA);
1852 
1853 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1854 	ext_msg_len += PFKEY_UNIT64(size);
1855 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1856 	ext_msg_len += PFKEY_UNIT64(size);
1857 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level,
1858 	    NULL, NULL, reqid);
1859 	ext_msg_len += PFKEY_UNIT64(size);
1860 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1861 
1862 	/* build PF_KEY message */
1863 
1864 	m->m_len = sizeof(msg);
1865 	m_copyback(m, 0, sizeof(msg), &msg);
1866 
1867 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1868 	/*
1869 	 * secpolicy.spidx.{src, dst} must not be set port number,
1870 	 * even if it is used for NAT-T.
1871 	 */
1872 	if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1873 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1874 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1875 	if_ipsec_add_pad(m, padlen);
1876 
1877 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1878 	/* ditto */
1879 	if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1880 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1881 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1882 	if_ipsec_add_pad(m, padlen);
1883 
1884 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1885 	padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1886 	if (policy == IPSEC_POLICY_IPSEC) {
1887 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1888 		padlen -= PFKEY_ALIGN8(sizeof(xisr));
1889 	}
1890 	if_ipsec_add_pad(m, padlen);
1891 
1892 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1893 	return key_kpi_spdadd(m);
1894 }
1895 
1896 static int
if_ipsec_add_sp(struct ipsec_variant * var,struct sockaddr * src,in_port_t sport,struct sockaddr * dst,in_port_t dport)1897 if_ipsec_add_sp(struct ipsec_variant *var,
1898     struct sockaddr *src, in_port_t sport,
1899     struct sockaddr *dst, in_port_t dport)
1900 {
1901 	struct ipsec_softc *sc = var->iv_softc;
1902 	int level;
1903 	int error;
1904 	u_int v6policy;
1905 	u_int16_t reqids[REQID_INDEX_NUM];
1906 
1907 	/*
1908 	 * must delete sp before add it.
1909 	 */
1910 	KASSERT(IV_SP_IN(var) == NULL);
1911 	KASSERT(IV_SP_OUT(var) == NULL);
1912 	KASSERT(IV_SP_IN6(var) == NULL);
1913 	KASSERT(IV_SP_OUT6(var) == NULL);
1914 
1915 	/*
1916 	 * can be shared?
1917 	 */
1918 	if (if_ipsec_share_sp(var))
1919 		return 0;
1920 
1921 	if (if_ipsec_nat_t(sc))
1922 		level = IPSEC_LEVEL_REQUIRE;
1923 	else
1924 		level = IPSEC_LEVEL_UNIQUE;
1925 
1926 	if (if_ipsec_fwd_ipv6(sc))
1927 		v6policy = IPSEC_POLICY_IPSEC;
1928 	else
1929 		v6policy = IPSEC_POLICY_DISCARD;
1930 
1931 	error = if_ipsec_get_reqids(var, reqids);
1932 	if (error)
1933 		goto fail;
1934 
1935 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1936 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC,
1937 	    reqids[REQID_INDEX_IPV4IN]);
1938 	if (IV_SP_IN(var) == NULL) {
1939 		error = EEXIST;
1940 		goto fail;
1941 	}
1942 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1943 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC,
1944 	    reqids[REQID_INDEX_IPV4OUT]);
1945 	if (IV_SP_OUT(var) == NULL) {
1946 		error = EEXIST;
1947 		goto fail;
1948 	}
1949 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1950 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy,
1951 	    reqids[REQID_INDEX_IPV6IN]);
1952 	if (IV_SP_IN6(var) == NULL) {
1953 		error = EEXIST;
1954 		goto fail;
1955 	}
1956 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1957 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy,
1958 	    reqids[REQID_INDEX_IPV6OUT]);
1959 	if (IV_SP_OUT6(var) == NULL) {
1960 		error = EEXIST;
1961 		goto fail;
1962 	}
1963 
1964 	return 0;
1965 
1966 fail:
1967 	if (IV_SP_IN6(var) != NULL) {
1968 		if_ipsec_del_sp0(IV_SP_IN6(var));
1969 		IV_SP_IN6(var) = NULL;
1970 	}
1971 	if (IV_SP_OUT(var) != NULL) {
1972 		if_ipsec_del_sp0(IV_SP_OUT(var));
1973 		IV_SP_OUT(var) = NULL;
1974 	}
1975 	if (IV_SP_IN(var) != NULL) {
1976 		if_ipsec_del_sp0(IV_SP_IN(var));
1977 		IV_SP_IN(var) = NULL;
1978 	}
1979 
1980 	return error;
1981 }
1982 
1983 static int
if_ipsec_del_sp0(struct secpolicy * sp)1984 if_ipsec_del_sp0(struct secpolicy *sp)
1985 {
1986 	struct sadb_msg msg;
1987 	struct sadb_x_policy xpl;
1988 	size_t size;
1989 	uint16_t ext_msg_len = 0;
1990 	int error;
1991 	struct mbuf *m;
1992 
1993 	if (sp == NULL)
1994 		return 0;
1995 
1996 	memset(&msg, 0, sizeof(msg));
1997 	memset(&xpl, 0, sizeof(xpl));
1998 
1999 	MGETHDR(m, M_WAIT, MT_DATA);
2000 
2001 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL, 0);
2002 	ext_msg_len += PFKEY_UNIT64(size);
2003 
2004 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
2005 
2006 	m->m_len = sizeof(msg);
2007 	m_copyback(m, 0, sizeof(msg), &msg);
2008 
2009 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
2010 
2011 	/*  unreference correspond to key_kpi_spdadd(). */
2012 	KEY_SP_UNREF(&sp);
2013 	error = key_kpi_spddelete2(m);
2014 	if (error != 0) {
2015 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
2016 		    __func__, sp->id, error);
2017 	}
2018 	return error;
2019 }
2020 
2021 static void
if_ipsec_del_sp(struct ipsec_variant * var)2022 if_ipsec_del_sp(struct ipsec_variant *var)
2023 {
2024 
2025 	/* are the SPs shared? */
2026 	if (if_ipsec_unshare_sp(var))
2027 		return;
2028 
2029 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
2030 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
2031 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
2032 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
2033 	IV_SP_IN(var) = NULL;
2034 	IV_SP_IN6(var) = NULL;
2035 	IV_SP_OUT(var) = NULL;
2036 	IV_SP_OUT6(var) = NULL;
2037 }
2038 
2039 static int
if_ipsec_replace_sp(struct ipsec_softc * sc,struct ipsec_variant * ovar,struct ipsec_variant * nvar)2040 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
2041     struct ipsec_variant *nvar)
2042 {
2043 	in_port_t src_port = 0;
2044 	in_port_t dst_port = 0;
2045 	struct sockaddr *src;
2046 	struct sockaddr *dst;
2047 	int error = 0;
2048 
2049 	KASSERT(mutex_owned(&sc->ipsec_lock));
2050 
2051 	if_ipsec_del_sp(ovar);
2052 
2053 	src = nvar->iv_psrc;
2054 	dst = nvar->iv_pdst;
2055 	if (if_ipsec_nat_t(sc)) {
2056 		/* NAT-T enabled */
2057 		src_port = nvar->iv_sport;
2058 		dst_port = nvar->iv_dport;
2059 	}
2060 	if (src && dst)
2061 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
2062 
2063 	return error;
2064 }
2065 
2066 /*
2067  * ipsec_variant and its SPs update API.
2068  *
2069  * Assumption:
2070  * reader side dereferences sc->ipsec_var in reader critical section only,
2071  * that is, all of reader sides do not reader the sc->ipsec_var after
2072  * pserialize_perform().
2073  */
2074 static int
if_ipsec_update_variant(struct ipsec_softc * sc,struct ipsec_variant * nvar,struct ipsec_variant * nullvar)2075 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
2076     struct ipsec_variant *nullvar)
2077 {
2078 	struct ifnet *ifp = &sc->ipsec_if;
2079 	struct ipsec_variant *ovar = sc->ipsec_var;
2080 	int error;
2081 
2082 	KASSERT(mutex_owned(&sc->ipsec_lock));
2083 
2084 	/*
2085 	 * To keep consistency between ipsec(4) I/F settings and SPs,
2086 	 * we stop packet processing while replacing SPs, that is, we set
2087 	 * "null" config variant to sc->ipsec_var.
2088 	 */
2089 	atomic_store_release(&sc->ipsec_var, nullvar);
2090 	pserialize_perform(sc->ipsec_psz);
2091 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
2092 
2093 	error = if_ipsec_replace_sp(sc, ovar, nvar);
2094 	if (!error)
2095 		atomic_store_release(&sc->ipsec_var, nvar);
2096 	else {
2097 		psref_target_init(&ovar->iv_psref, iv_psref_class);
2098 		atomic_store_release(&sc->ipsec_var, ovar); /* rollback */
2099 	}
2100 
2101 	pserialize_perform(sc->ipsec_psz);
2102 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
2103 
2104 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
2105 		ifp->if_flags |= IFF_RUNNING;
2106 	else
2107 		ifp->if_flags &= ~IFF_RUNNING;
2108 
2109 	return error;
2110 }
2111