1 /* $NetBSD: if_ipsec.c,v 1.36 2024/02/10 18:43:53 andvar Exp $ */
2
3 /*
4 * Copyright (c) 2017 Internet Initiative Japan Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.36 2024/02/10 18:43:53 andvar Exp $");
31
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/errno.h>
44 #include <sys/ioctl.h>
45 #include <sys/time.h>
46 #include <sys/syslog.h>
47 #include <sys/cpu.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/psref.h>
52 #include <sys/sysctl.h>
53
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/bpf.h>
58 #include <net/pfkeyv2.h>
59
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef INET
64 #include <netinet/in_var.h>
65 #endif /* INET */
66
67 #ifdef INET6
68 #include <netinet6/in6_var.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #endif /* INET6 */
72
73 #include <netinet/ip_encap.h>
74
75 #include <net/if_ipsec.h>
76
77 #include <net/raw_cb.h>
78 #include <net/pfkeyv2.h>
79
80 #include <netipsec/key.h>
81 #include <netipsec/keydb.h> /* for union sockaddr_union */
82 #include <netipsec/ipsec.h>
83 #include <netipsec/ipsecif.h>
84
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94 struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100 struct ipsec_variant *, struct ipsec_variant *);
101
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106 struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108 struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110 struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112 struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113 struct sockaddr *, struct sockaddr *, uint16_t);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121 in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int, uint16_t);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124 struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127 struct ipsec_variant *);
128
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130 in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target) \
132 if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target) \
134 if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135
136 /*
137 * ipsec global variable definitions
138 */
139
140 /* This list is used in ioctl context only. */
141 static struct {
142 LIST_HEAD(ipsec_sclist, ipsec_softc) list;
143 bool use_fixed_reqid;
144 #define REQID_BASE_DEFAULT 0x2000
145 #define REQID_LAST_DEFAULT 0x2fff
146 u_int16_t reqid_base;
147 u_int16_t reqid_last;
148 kmutex_t lock;
149 } ipsec_softcs __cacheline_aligned = {
150 .use_fixed_reqid = false,
151 .reqid_base = REQID_BASE_DEFAULT,
152 .reqid_last = REQID_LAST_DEFAULT,
153 };
154
155 struct psref_class *iv_psref_class __read_mostly;
156
157 struct if_clone ipsec_cloner =
158 IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
159 static int max_ipsec_nesting = MAX_IPSEC_NEST;
160
161 static struct sysctllog *if_ipsec_sysctl;
162
163 static pktq_rps_hash_func_t if_ipsec_pktq_rps_hash_p;
164
165 enum {
166 REQID_INDEX_IPV4IN = 0,
167 REQID_INDEX_IPV4OUT,
168 REQID_INDEX_IPV6IN,
169 REQID_INDEX_IPV6OUT,
170 REQID_INDEX_NUM,
171 };
172
173 #ifdef INET6
174 static int
sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)175 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
176 {
177 int error, pmtu;
178 struct sysctlnode node = *rnode;
179
180 pmtu = ip6_ipsec_pmtu;
181 node.sysctl_data = &pmtu;
182 error = sysctl_lookup(SYSCTLFN_CALL(&node));
183 if (error || newp == NULL)
184 return error;
185
186 switch (pmtu) {
187 case IPSEC_PMTU_MINMTU:
188 case IPSEC_PMTU_OUTERMTU:
189 ip6_ipsec_pmtu = pmtu;
190 break;
191 default:
192 return EINVAL;
193 }
194
195 return 0;
196 }
197
198 static int
sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)199 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
200 {
201 int error, pmtu;
202 struct sysctlnode node = *rnode;
203 struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
204
205 pmtu = sc->ipsec_pmtu;
206 node.sysctl_data = &pmtu;
207 error = sysctl_lookup(SYSCTLFN_CALL(&node));
208 if (error || newp == NULL)
209 return error;
210
211 switch (pmtu) {
212 case IPSEC_PMTU_SYSDEFAULT:
213 case IPSEC_PMTU_MINMTU:
214 case IPSEC_PMTU_OUTERMTU:
215 sc->ipsec_pmtu = pmtu;
216 break;
217 default:
218 return EINVAL;
219 }
220
221 return 0;
222 }
223 #endif
224
225 static int
sysctl_if_ipsec_use_fixed_reqid(SYSCTLFN_ARGS)226 sysctl_if_ipsec_use_fixed_reqid(SYSCTLFN_ARGS)
227 {
228 bool fixed;
229 int error;
230 struct sysctlnode node = *rnode;
231
232 mutex_enter(&ipsec_softcs.lock);
233 fixed = ipsec_softcs.use_fixed_reqid;
234 node.sysctl_data = &fixed;
235 error = sysctl_lookup(SYSCTLFN_CALL(&node));
236 if (error || newp == NULL) {
237 mutex_exit(&ipsec_softcs.lock);
238 return error;
239 }
240
241 if (!LIST_EMPTY(&ipsec_softcs.list)) {
242 mutex_exit(&ipsec_softcs.lock);
243 return EBUSY;
244 }
245 ipsec_softcs.use_fixed_reqid = fixed;
246 mutex_exit(&ipsec_softcs.lock);
247
248 return 0;
249 }
250
251 static int
sysctl_if_ipsec_reqid_base(SYSCTLFN_ARGS)252 sysctl_if_ipsec_reqid_base(SYSCTLFN_ARGS)
253 {
254 int base;
255 int error;
256 struct sysctlnode node = *rnode;
257
258 mutex_enter(&ipsec_softcs.lock);
259 base = ipsec_softcs.reqid_base;
260 node.sysctl_data = &base;
261 error = sysctl_lookup(SYSCTLFN_CALL(&node));
262 if (error || newp == NULL) {
263 mutex_exit(&ipsec_softcs.lock);
264 return error;
265 }
266
267 if (!LIST_EMPTY(&ipsec_softcs.list)) {
268 mutex_exit(&ipsec_softcs.lock);
269 return EBUSY;
270 }
271 ipsec_softcs.reqid_base = base;
272 mutex_exit(&ipsec_softcs.lock);
273
274 return 0;
275 }
276
277 static int
sysctl_if_ipsec_reqid_last(SYSCTLFN_ARGS)278 sysctl_if_ipsec_reqid_last(SYSCTLFN_ARGS)
279 {
280 int last;
281 int error;
282 struct sysctlnode node = *rnode;
283
284 mutex_enter(&ipsec_softcs.lock);
285 last = ipsec_softcs.reqid_last;
286 node.sysctl_data = &last;
287 error = sysctl_lookup(SYSCTLFN_CALL(&node));
288 if (error || newp == NULL) {
289 mutex_exit(&ipsec_softcs.lock);
290 return error;
291 }
292
293 if (!LIST_EMPTY(&ipsec_softcs.list)) {
294 mutex_exit(&ipsec_softcs.lock);
295 return EBUSY;
296 }
297 ipsec_softcs.reqid_last = last;
298 mutex_exit(&ipsec_softcs.lock);
299
300 return 0;
301 }
302
303 static void
if_ipsec_sysctl_setup(void)304 if_ipsec_sysctl_setup(void)
305 {
306 const struct sysctlnode *node = NULL;
307
308 if_ipsec_sysctl = NULL;
309
310 #ifdef INET6
311 /*
312 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
313 */
314 sysctl_createv(NULL, 0, NULL, NULL,
315 CTLFLAG_PERMANENT,
316 CTLTYPE_NODE, "inet6",
317 SYSCTL_DESCR("PF_INET6 related settings"),
318 NULL, 0, NULL, 0,
319 CTL_NET, PF_INET6, CTL_EOL);
320 sysctl_createv(NULL, 0, NULL, NULL,
321 CTLFLAG_PERMANENT,
322 CTLTYPE_NODE, "ip6",
323 SYSCTL_DESCR("IPv6 related settings"),
324 NULL, 0, NULL, 0,
325 CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
326
327 sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
328 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
329 CTLTYPE_INT, "ipsecifhlim",
330 SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
331 NULL, 0, &ip6_ipsec_hlim, 0,
332 CTL_NET, PF_INET6, IPPROTO_IPV6,
333 IPV6CTL_IPSEC_HLIM, CTL_EOL);
334
335 sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
336 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
337 CTLTYPE_INT, "ipsecifpmtu",
338 SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
339 sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
340 CTL_NET, PF_INET6, IPPROTO_IPV6,
341 IPV6CTL_IPSEC_PMTU, CTL_EOL);
342 #endif
343
344 sysctl_createv(&if_ipsec_sysctl, 0, NULL, &node,
345 CTLFLAG_PERMANENT,
346 CTLTYPE_NODE, "ipsecif",
347 SYSCTL_DESCR("ipsecif global control"),
348 NULL, 0, NULL, 0,
349 CTL_NET, CTL_CREATE, CTL_EOL);
350
351 sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
352 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
353 CTLTYPE_STRING, "rps_hash",
354 SYSCTL_DESCR("Interface rps hash function control"),
355 sysctl_pktq_rps_hash_handler, 0, (void *)&if_ipsec_pktq_rps_hash_p,
356 PKTQ_RPS_HASH_NAME_LEN,
357 CTL_CREATE, CTL_EOL);
358
359 sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
360 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
361 CTLTYPE_BOOL, "use_fixed_reqid",
362 SYSCTL_DESCR("use fixed reqid for SP"),
363 sysctl_if_ipsec_use_fixed_reqid, 0, NULL, 0,
364 CTL_CREATE, CTL_EOL);
365 sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
366 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
367 CTLTYPE_INT, "reqid_base",
368 SYSCTL_DESCR("base value of fixed reqid"),
369 sysctl_if_ipsec_reqid_base, 0, NULL, 0,
370 CTL_CREATE, CTL_EOL);
371 sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
372 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
373 CTLTYPE_INT, "reqid_last",
374 SYSCTL_DESCR("last value of fixed reqid"),
375 sysctl_if_ipsec_reqid_last, 0, NULL, 0,
376 CTL_CREATE, CTL_EOL);
377
378 }
379
380 static void
if_ipsec_perif_sysctl_setup(struct sysctllog ** clog,struct ipsec_softc * sc)381 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
382 {
383 #ifdef INET6
384 const struct sysctlnode *cnode, *rnode;
385 struct ifnet *ifp = &sc->ipsec_if;
386 const char *ifname = ifp->if_xname;
387 int rv;
388
389 /*
390 * Already created in sysctl_sndq_setup().
391 */
392 sysctl_createv(clog, 0, NULL, &rnode,
393 CTLFLAG_PERMANENT,
394 CTLTYPE_NODE, "interfaces",
395 SYSCTL_DESCR("Per-interface controls"),
396 NULL, 0, NULL, 0,
397 CTL_NET, CTL_CREATE, CTL_EOL);
398 sysctl_createv(clog, 0, &rnode, &rnode,
399 CTLFLAG_PERMANENT,
400 CTLTYPE_NODE, ifname,
401 SYSCTL_DESCR("Interface controls"),
402 NULL, 0, NULL, 0,
403 CTL_CREATE, CTL_EOL);
404
405 rv = sysctl_createv(clog, 0, &rnode, &cnode,
406 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
407 CTLTYPE_INT, "pmtu",
408 SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
409 sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
410 CTL_CREATE, CTL_EOL);
411 if (rv != 0)
412 log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
413
414 sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
415 #endif
416 }
417
418 /* ARGSUSED */
419 void
ipsecifattach(int count)420 ipsecifattach(int count)
421 {
422
423 mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
424 LIST_INIT(&ipsec_softcs.list);
425
426 iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
427
428 if_ipsec_pktq_rps_hash_p = pktq_rps_hash_default;
429 if_ipsec_sysctl_setup();
430
431 if_clone_attach(&ipsec_cloner);
432 }
433
434 static int
if_ipsec_clone_create(struct if_clone * ifc,int unit)435 if_ipsec_clone_create(struct if_clone *ifc, int unit)
436 {
437 struct ipsec_softc *sc;
438 struct ipsec_variant *var;
439 struct ifnet *ifp;
440
441 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
442
443 if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
444
445 if_ipsec_attach0(sc);
446
447 ifp = &sc->ipsec_if;
448 if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
449
450 var = kmem_zalloc(sizeof(*var), KM_SLEEP);
451 var->iv_softc = sc;
452 psref_target_init(&var->iv_psref, iv_psref_class);
453
454 sc->ipsec_var = var;
455 mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
456 sc->ipsec_psz = pserialize_create();
457 sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
458
459 mutex_enter(&ipsec_softcs.lock);
460 LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
461 mutex_exit(&ipsec_softcs.lock);
462 return 0;
463 }
464
465 static void
if_ipsec_attach0(struct ipsec_softc * sc)466 if_ipsec_attach0(struct ipsec_softc *sc)
467 {
468
469 sc->ipsec_if.if_addrlen = 0;
470 sc->ipsec_if.if_mtu = IPSEC_MTU;
471 sc->ipsec_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
472 /* set ipsec(4) specific default flags. */
473 sc->ipsec_if.if_flags |= IFF_FWD_IPV6;
474 sc->ipsec_if.if_extflags = IFEF_MPSAFE;
475 sc->ipsec_if.if_ioctl = if_ipsec_ioctl;
476 sc->ipsec_if.if_output = if_ipsec_output;
477 sc->ipsec_if.if_type = IFT_IPSEC;
478 sc->ipsec_if.if_dlt = DLT_NULL;
479 sc->ipsec_if.if_softc = sc;
480 IFQ_SET_READY(&sc->ipsec_if.if_snd);
481 if_initialize(&sc->ipsec_if);
482 sc->ipsec_if.if_link_state = LINK_STATE_DOWN;
483 if_alloc_sadl(&sc->ipsec_if);
484 bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
485 if_register(&sc->ipsec_if);
486 }
487
488 static int
if_ipsec_clone_destroy(struct ifnet * ifp)489 if_ipsec_clone_destroy(struct ifnet *ifp)
490 {
491 struct ipsec_softc *sc = ifp->if_softc;
492 struct ipsec_variant *var;
493 int bound;
494
495 mutex_enter(&ipsec_softcs.lock);
496 LIST_REMOVE(sc, ipsec_list);
497 mutex_exit(&ipsec_softcs.lock);
498
499 bound = curlwp_bind();
500 if_ipsec_delete_tunnel(&sc->ipsec_if);
501 curlwp_bindx(bound);
502
503 bpf_detach(ifp);
504 if_detach(ifp);
505
506 if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
507
508 pserialize_destroy(sc->ipsec_psz);
509 mutex_destroy(&sc->ipsec_lock);
510
511 var = sc->ipsec_var;
512 kmem_free(var, sizeof(*var));
513 kmem_free(sc, sizeof(*sc));
514
515 return 0;
516 }
517
518 static inline bool
if_ipsec_nat_t(struct ipsec_softc * sc)519 if_ipsec_nat_t(struct ipsec_softc *sc)
520 {
521
522 return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
523 }
524
525 static inline bool
if_ipsec_fwd_ipv6(struct ipsec_softc * sc)526 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
527 {
528
529 return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
530 }
531
532 int
if_ipsec_encap_func(struct mbuf * m,int off,int proto,void * arg)533 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
534 {
535 uint8_t v;
536 struct ipsec_softc *sc;
537 struct ipsec_variant *var = NULL;
538 struct psref psref;
539 int ret = 0;
540
541 sc = arg;
542 KASSERT(sc != NULL);
543
544 if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
545 goto out;
546
547 var = if_ipsec_getref_variant(sc, &psref);
548 if (if_ipsec_variant_is_unconfigured(var))
549 goto out;
550
551 switch (proto) {
552 case IPPROTO_IPV4:
553 case IPPROTO_IPV6:
554 break;
555 default:
556 goto out;
557 }
558
559 m_copydata(m, 0, sizeof(v), &v);
560 v = (v >> 4) & 0xff; /* Get the IP version number. */
561
562 switch (v) {
563 #ifdef INET
564 case IPVERSION: {
565 struct ip ip;
566
567 if (m->m_pkthdr.len < sizeof(ip))
568 goto out;
569
570 m_copydata(m, 0, sizeof(ip), &ip);
571 if (var->iv_psrc->sa_family != AF_INET ||
572 var->iv_pdst->sa_family != AF_INET)
573 goto out;
574 ret = ipsecif4_encap_func(m, &ip, var);
575 break;
576 }
577 #endif
578 #ifdef INET6
579 case (IPV6_VERSION >> 4): {
580 struct ip6_hdr ip6;
581
582 if (m->m_pkthdr.len < sizeof(ip6))
583 goto out;
584
585 m_copydata(m, 0, sizeof(ip6), &ip6);
586 if (var->iv_psrc->sa_family != AF_INET6 ||
587 var->iv_pdst->sa_family != AF_INET6)
588 goto out;
589 ret = ipsecif6_encap_func(m, &ip6, var);
590 break;
591 }
592 #endif
593 default:
594 goto out;
595 }
596
597 out:
598 if (var != NULL)
599 if_ipsec_putref_variant(var, &psref);
600 return ret;
601 }
602
603 /*
604 * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
605 * We'll prevent this by introducing upper limit.
606 */
607 static int
if_ipsec_check_nesting(struct ifnet * ifp,struct mbuf * m)608 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
609 {
610
611 return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
612 }
613
614 int
if_ipsec_output(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * dst,const struct rtentry * rt)615 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
616 const struct rtentry *rt)
617 {
618 struct ipsec_softc *sc = ifp->if_softc;
619 struct ipsec_variant *var;
620 struct psref psref;
621 int error;
622 int bound;
623
624 IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
625
626 error = if_ipsec_check_nesting(ifp, m);
627 if (error) {
628 m_freem(m);
629 goto noref_end;
630 }
631
632 if ((ifp->if_flags & IFF_UP) == 0) {
633 m_freem(m);
634 error = ENETDOWN;
635 goto noref_end;
636 }
637
638
639 bound = curlwp_bind();
640 var = if_ipsec_getref_variant(sc, &psref);
641 if (if_ipsec_variant_is_unconfigured(var)) {
642 m_freem(m);
643 error = ENETDOWN;
644 goto end;
645 }
646
647 m->m_flags &= ~(M_BCAST|M_MCAST);
648
649 /* use DLT_NULL encapsulation here to pass inner af type */
650 M_PREPEND(m, sizeof(int), M_DONTWAIT);
651 if (!m) {
652 error = ENOBUFS;
653 goto end;
654 }
655 *mtod(m, int *) = dst->sa_family;
656
657 #if INET6
658 /* drop IPv6 packet if IFF_FWD_IPV6 is not set */
659 if (dst->sa_family == AF_INET6 &&
660 !if_ipsec_fwd_ipv6(sc)) {
661 /*
662 * IPv6 packet is not allowed to forward,that is not error.
663 */
664 error = 0;
665 IF_DROP(&ifp->if_snd);
666 m_freem(m);
667 goto end;
668 }
669 #endif
670
671 error = if_ipsec_out_direct(var, m, dst->sa_family);
672
673 end:
674 if_ipsec_putref_variant(var, &psref);
675 curlwp_bindx(bound);
676 noref_end:
677 if (error)
678 if_statinc(ifp, if_oerrors);
679
680 return error;
681 }
682
683 static inline int
if_ipsec_out_direct(struct ipsec_variant * var,struct mbuf * m,int family)684 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
685 {
686 struct ifnet *ifp = &var->iv_softc->ipsec_if;
687 int error;
688 int len;
689
690 KASSERT(if_ipsec_heldref_variant(var));
691 KASSERT(var->iv_output != NULL);
692
693 len = m->m_pkthdr.len;
694
695 /* input DLT_NULL frame to BPF */
696 bpf_mtap(ifp, m, BPF_D_OUT);
697
698 /* grab and chop off inner af type */
699 /* XXX need pullup? */
700 m_adj(m, sizeof(int));
701
702 error = var->iv_output(var, family, m);
703 if (error)
704 return error;
705
706 if_statadd2(ifp, if_opackets, 1, if_obytes, len);
707
708 return 0;
709 }
710
711 void
if_ipsec_input(struct mbuf * m,int af,struct ifnet * ifp)712 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
713 {
714
715 KASSERT(ifp != NULL);
716
717 m_set_rcvif(m, ifp);
718
719 bpf_mtap_af(ifp, af, m, BPF_D_IN);
720
721 if_ipsec_in_enqueue(m, af, ifp);
722
723 return;
724 }
725
726 static inline void
if_ipsec_in_enqueue(struct mbuf * m,int af,struct ifnet * ifp)727 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
728 {
729 pktqueue_t *pktq;
730 int pktlen;
731
732 /*
733 * Put the packet to the network layer input queue according to the
734 * specified address family.
735 */
736 switch (af) {
737 #ifdef INET
738 case AF_INET:
739 pktq = ip_pktq;
740 break;
741 #endif
742 #ifdef INET6
743 case AF_INET6:
744 pktq = ip6_pktq;
745 break;
746 #endif
747 default:
748 if_statinc(ifp, if_ierrors);
749 m_freem(m);
750 return;
751 }
752
753 const uint32_t h = pktq_rps_hash(&if_ipsec_pktq_rps_hash_p, m);
754 pktlen = m->m_pkthdr.len;
755 if (__predict_true(pktq_enqueue(pktq, m, h))) {
756 if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
757 } else {
758 if_statinc(ifp, if_iqdrops);
759 m_freem(m);
760 }
761
762 return;
763 }
764
765 static inline int
if_ipsec_check_salen(struct sockaddr * addr)766 if_ipsec_check_salen(struct sockaddr *addr)
767 {
768
769 switch (addr->sa_family) {
770 #ifdef INET
771 case AF_INET:
772 if (addr->sa_len != sizeof(struct sockaddr_in))
773 return EINVAL;
774 break;
775 #endif /* INET */
776 #ifdef INET6
777 case AF_INET6:
778 if (addr->sa_len != sizeof(struct sockaddr_in6))
779 return EINVAL;
780 break;
781 #endif /* INET6 */
782 default:
783 return EAFNOSUPPORT;
784 }
785
786 return 0;
787 }
788
789 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
790 int
if_ipsec_ioctl(struct ifnet * ifp,u_long cmd,void * data)791 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
792 {
793 struct ipsec_softc *sc = ifp->if_softc;
794 struct ipsec_variant *var = NULL;
795 struct ifreq *ifr = (struct ifreq*)data;
796 struct ifaddr *ifa = (struct ifaddr*)data;
797 int error = 0, size;
798 struct sockaddr *dst, *src;
799 u_long mtu;
800 u_short oflags = ifp->if_flags;
801 int bound;
802 struct psref psref;
803
804 switch (cmd) {
805 case SIOCINITIFADDR:
806 ifp->if_flags |= IFF_UP;
807 ifa->ifa_rtrequest = p2p_rtrequest;
808 break;
809
810 case SIOCSIFDSTADDR:
811 break;
812
813 case SIOCADDMULTI:
814 case SIOCDELMULTI:
815 switch (ifr->ifr_addr.sa_family) {
816 #ifdef INET
817 case AF_INET: /* IP supports Multicast */
818 break;
819 #endif /* INET */
820 #ifdef INET6
821 case AF_INET6: /* IP6 supports Multicast */
822 break;
823 #endif /* INET6 */
824 default: /* Other protocols doesn't support Multicast */
825 error = EAFNOSUPPORT;
826 break;
827 }
828 break;
829
830 case SIOCSIFMTU:
831 mtu = ifr->ifr_mtu;
832 if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
833 return EINVAL;
834 else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
835 error = 0;
836 break;
837
838 #ifdef INET
839 case SIOCSIFPHYADDR:
840 #endif
841 #ifdef INET6
842 case SIOCSIFPHYADDR_IN6:
843 #endif /* INET6 */
844 case SIOCSLIFPHYADDR:
845 switch (cmd) {
846 #ifdef INET
847 case SIOCSIFPHYADDR:
848 src = (struct sockaddr *)
849 &(((struct in_aliasreq *)data)->ifra_addr);
850 dst = (struct sockaddr *)
851 &(((struct in_aliasreq *)data)->ifra_dstaddr);
852 break;
853 #endif /* INET */
854 #ifdef INET6
855 case SIOCSIFPHYADDR_IN6:
856 src = (struct sockaddr *)
857 &(((struct in6_aliasreq *)data)->ifra_addr);
858 dst = (struct sockaddr *)
859 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
860 break;
861 #endif /* INET6 */
862 case SIOCSLIFPHYADDR:
863 src = (struct sockaddr *)
864 &(((struct if_laddrreq *)data)->addr);
865 dst = (struct sockaddr *)
866 &(((struct if_laddrreq *)data)->dstaddr);
867 break;
868 default:
869 return EINVAL;
870 }
871
872 /* sa_family must be equal */
873 if (src->sa_family != dst->sa_family)
874 return EINVAL;
875
876 error = if_ipsec_check_salen(src);
877 if (error)
878 return error;
879 error = if_ipsec_check_salen(dst);
880 if (error)
881 return error;
882
883 /* check sa_family looks sane for the cmd */
884 switch (cmd) {
885 #ifdef INET
886 case SIOCSIFPHYADDR:
887 if (src->sa_family == AF_INET)
888 break;
889 return EAFNOSUPPORT;
890 #endif /* INET */
891 #ifdef INET6
892 case SIOCSIFPHYADDR_IN6:
893 if (src->sa_family == AF_INET6)
894 break;
895 return EAFNOSUPPORT;
896 #endif /* INET6 */
897 case SIOCSLIFPHYADDR:
898 /* checks done in the above */
899 break;
900 }
901 /*
902 * calls if_ipsec_getref_variant() for other softcs to check
903 * address pair duplication
904 */
905 bound = curlwp_bind();
906 error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
907 if (error)
908 goto bad;
909 if_link_state_change(&sc->ipsec_if, LINK_STATE_UP);
910 curlwp_bindx(bound);
911 break;
912
913 case SIOCDIFPHYADDR:
914 bound = curlwp_bind();
915 if_ipsec_delete_tunnel(&sc->ipsec_if);
916 if_link_state_change(&sc->ipsec_if, LINK_STATE_DOWN);
917 curlwp_bindx(bound);
918 break;
919
920 case SIOCGIFPSRCADDR:
921 #ifdef INET6
922 case SIOCGIFPSRCADDR_IN6:
923 #endif /* INET6 */
924 bound = curlwp_bind();
925 var = if_ipsec_getref_variant(sc, &psref);
926 if (var->iv_psrc == NULL) {
927 error = EADDRNOTAVAIL;
928 goto bad;
929 }
930 src = var->iv_psrc;
931 switch (cmd) {
932 #ifdef INET
933 case SIOCGIFPSRCADDR:
934 dst = &ifr->ifr_addr;
935 size = sizeof(ifr->ifr_addr);
936 break;
937 #endif /* INET */
938 #ifdef INET6
939 case SIOCGIFPSRCADDR_IN6:
940 dst = (struct sockaddr *)
941 &(((struct in6_ifreq *)data)->ifr_addr);
942 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
943 break;
944 #endif /* INET6 */
945 default:
946 error = EADDRNOTAVAIL;
947 goto bad;
948 }
949 if (src->sa_len > size) {
950 error = EINVAL;
951 goto bad;
952 }
953 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
954 if (error)
955 goto bad;
956 if_ipsec_putref_variant(var, &psref);
957 curlwp_bindx(bound);
958 break;
959
960 case SIOCGIFPDSTADDR:
961 #ifdef INET6
962 case SIOCGIFPDSTADDR_IN6:
963 #endif /* INET6 */
964 bound = curlwp_bind();
965 var = if_ipsec_getref_variant(sc, &psref);
966 if (var->iv_pdst == NULL) {
967 error = EADDRNOTAVAIL;
968 goto bad;
969 }
970 src = var->iv_pdst;
971 switch (cmd) {
972 #ifdef INET
973 case SIOCGIFPDSTADDR:
974 dst = &ifr->ifr_addr;
975 size = sizeof(ifr->ifr_addr);
976 break;
977 #endif /* INET */
978 #ifdef INET6
979 case SIOCGIFPDSTADDR_IN6:
980 dst = (struct sockaddr *)
981 &(((struct in6_ifreq *)data)->ifr_addr);
982 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
983 break;
984 #endif /* INET6 */
985 default:
986 error = EADDRNOTAVAIL;
987 goto bad;
988 }
989 if (src->sa_len > size) {
990 error = EINVAL;
991 goto bad;
992 }
993 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
994 if (error)
995 goto bad;
996 if_ipsec_putref_variant(var, &psref);
997 curlwp_bindx(bound);
998 break;
999
1000 case SIOCGLIFPHYADDR:
1001 bound = curlwp_bind();
1002 var = if_ipsec_getref_variant(sc, &psref);
1003 if (if_ipsec_variant_is_unconfigured(var)) {
1004 error = EADDRNOTAVAIL;
1005 goto bad;
1006 }
1007
1008 /* copy src */
1009 src = var->iv_psrc;
1010 dst = (struct sockaddr *)
1011 &(((struct if_laddrreq *)data)->addr);
1012 size = sizeof(((struct if_laddrreq *)data)->addr);
1013 if (src->sa_len > size) {
1014 error = EINVAL;
1015 goto bad;
1016 }
1017 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
1018 if (error)
1019 goto bad;
1020
1021 /* copy dst */
1022 src = var->iv_pdst;
1023 dst = (struct sockaddr *)
1024 &(((struct if_laddrreq *)data)->dstaddr);
1025 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
1026 if (src->sa_len > size) {
1027 error = EINVAL;
1028 goto bad;
1029 }
1030 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
1031 if (error)
1032 goto bad;
1033 if_ipsec_putref_variant(var, &psref);
1034 curlwp_bindx(bound);
1035 break;
1036
1037 default:
1038 error = ifioctl_common(ifp, cmd, data);
1039 if (!error) {
1040 bound = curlwp_bind();
1041 error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
1042 if (error)
1043 goto bad;
1044 curlwp_bindx(bound);
1045 }
1046 break;
1047 }
1048 return error;
1049
1050 bad:
1051 if (var != NULL)
1052 if_ipsec_putref_variant(var, &psref);
1053 curlwp_bindx(bound);
1054
1055 return error;
1056 }
1057
1058 struct encap_funcs {
1059 #ifdef INET
1060 int (*ef_inet)(struct ipsec_variant *);
1061 #endif
1062 #ifdef INET6
1063 int (*ef_inet6)(struct ipsec_variant *);
1064 #endif
1065 };
1066
1067 static struct encap_funcs ipsec_encap_attach = {
1068 #ifdef INET
1069 .ef_inet = ipsecif4_attach,
1070 #endif
1071 #ifdef INET6
1072 .ef_inet6 = &ipsecif6_attach,
1073 #endif
1074 };
1075
1076 static struct encap_funcs ipsec_encap_detach = {
1077 #ifdef INET
1078 .ef_inet = ipsecif4_detach,
1079 #endif
1080 #ifdef INET6
1081 .ef_inet6 = &ipsecif6_detach,
1082 #endif
1083 };
1084
1085 static int
if_ipsec_encap_common(struct ipsec_variant * var,struct encap_funcs * funcs)1086 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
1087 {
1088 int error;
1089
1090 KASSERT(var != NULL);
1091 KASSERT(if_ipsec_variant_is_configured(var));
1092
1093 switch (var->iv_psrc->sa_family) {
1094 #ifdef INET
1095 case AF_INET:
1096 error = (funcs->ef_inet)(var);
1097 break;
1098 #endif /* INET */
1099 #ifdef INET6
1100 case AF_INET6:
1101 error = (funcs->ef_inet6)(var);
1102 break;
1103 #endif /* INET6 */
1104 default:
1105 error = EINVAL;
1106 break;
1107 }
1108
1109 return error;
1110 }
1111
1112 static int
if_ipsec_encap_attach(struct ipsec_variant * var)1113 if_ipsec_encap_attach(struct ipsec_variant *var)
1114 {
1115
1116 return if_ipsec_encap_common(var, &ipsec_encap_attach);
1117 }
1118
1119 static int
if_ipsec_encap_detach(struct ipsec_variant * var)1120 if_ipsec_encap_detach(struct ipsec_variant *var)
1121 {
1122
1123 return if_ipsec_encap_common(var, &ipsec_encap_detach);
1124 }
1125
1126 /*
1127 * Validate and set ipsec(4) I/F configurations.
1128 * (1) validate
1129 * (1-1) Check the argument src and dst address pair will change
1130 * configuration from current src and dst address pair.
1131 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1132 * with argument src and dst address pair, except for NAT-T shared
1133 * tunnels.
1134 * (2) set
1135 * (2-1) Create variant for new configuration.
1136 * (2-2) Create temporary "null" variant used to avoid to access
1137 * dangling variant while SPs are deleted and added.
1138 * (2-3) Swap variant include its SPs.
1139 * (2-4) Cleanup last configurations.
1140 */
1141 static int
if_ipsec_set_tunnel(struct ifnet * ifp,struct sockaddr * src,struct sockaddr * dst)1142 if_ipsec_set_tunnel(struct ifnet *ifp,
1143 struct sockaddr *src, struct sockaddr *dst)
1144 {
1145 struct ipsec_softc *sc = ifp->if_softc;
1146 struct ipsec_softc *sc2;
1147 struct ipsec_variant *ovar, *nvar, *nullvar;
1148 struct sockaddr *osrc, *odst;
1149 struct sockaddr *nsrc, *ndst;
1150 in_port_t nsport = 0, ndport = 0;
1151 int error;
1152
1153 error = encap_lock_enter();
1154 if (error)
1155 return error;
1156
1157 nsrc = sockaddr_dup(src, M_WAITOK);
1158 ndst = sockaddr_dup(dst, M_WAITOK);
1159 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1160 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1161
1162 mutex_enter(&sc->ipsec_lock);
1163
1164 ovar = sc->ipsec_var;
1165
1166 switch(nsrc->sa_family) {
1167 #ifdef INET
1168 case AF_INET:
1169 nsport = satosin(src)->sin_port;
1170 /*
1171 * avoid confuse SP when NAT-T disabled,
1172 * e.g.
1173 * expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1174 * confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1175 */
1176 satosin(nsrc)->sin_port = 0;
1177 ndport = satosin(dst)->sin_port;
1178 satosin(ndst)->sin_port = 0;
1179 break;
1180 #endif /* INET */
1181 #ifdef INET6
1182 case AF_INET6:
1183 nsport = satosin6(src)->sin6_port;
1184 satosin6(nsrc)->sin6_port = 0;
1185 ndport = satosin6(dst)->sin6_port;
1186 satosin6(ndst)->sin6_port = 0;
1187 break;
1188 #endif /* INET6 */
1189 default:
1190 log(LOG_DEBUG,
1191 "%s: Invalid address family: %d.\n",
1192 __func__, src->sa_family);
1193 error = EINVAL;
1194 goto out;
1195 }
1196
1197 /*
1198 * (1-1) Check the argument src and dst address pair will change
1199 * configuration from current src and dst address pair.
1200 */
1201 if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1202 (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1203 (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1204 /* address and port pair not changed. */
1205 error = 0;
1206 goto out;
1207 }
1208
1209 /*
1210 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1211 * with argument src and dst address pair, except for NAT-T shared
1212 * tunnels.
1213 */
1214 mutex_enter(&ipsec_softcs.lock);
1215 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1216 struct ipsec_variant *var2;
1217 struct psref psref;
1218
1219 if (sc2 == sc)
1220 continue;
1221 var2 = if_ipsec_getref_variant(sc2, &psref);
1222 if (if_ipsec_variant_is_unconfigured(var2)) {
1223 if_ipsec_putref_variant(var2, &psref);
1224 continue;
1225 }
1226 if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1227 if_ipsec_putref_variant(var2, &psref);
1228 continue; /* NAT-T shared tunnel */
1229 }
1230 if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1231 sockaddr_cmp(var2->iv_psrc, src) == 0) {
1232 if_ipsec_putref_variant(var2, &psref);
1233 mutex_exit(&ipsec_softcs.lock);
1234 error = EADDRNOTAVAIL;
1235 goto out;
1236 }
1237
1238 if_ipsec_putref_variant(var2, &psref);
1239 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
1240 }
1241 mutex_exit(&ipsec_softcs.lock);
1242
1243
1244 osrc = ovar->iv_psrc;
1245 odst = ovar->iv_pdst;
1246
1247 /*
1248 * (2-1) Create ipsec_variant for new configuration.
1249 */
1250 if_ipsec_copy_variant(nvar, ovar);
1251 nvar->iv_psrc = nsrc;
1252 nvar->iv_pdst = ndst;
1253 nvar->iv_sport = nsport;
1254 nvar->iv_dport = ndport;
1255 nvar->iv_encap_cookie4 = NULL;
1256 nvar->iv_encap_cookie6 = NULL;
1257 psref_target_init(&nvar->iv_psref, iv_psref_class);
1258 error = if_ipsec_encap_attach(nvar);
1259 if (error)
1260 goto out;
1261
1262 /*
1263 * (2-2) Create temporary "null" variant.
1264 */
1265 if_ipsec_copy_variant(nullvar, ovar);
1266 if_ipsec_clear_config(nullvar);
1267 psref_target_init(&nullvar->iv_psref, iv_psref_class);
1268 /*
1269 * (2-3) Swap variant include its SPs.
1270 */
1271 error = if_ipsec_update_variant(sc, nvar, nullvar);
1272 if (error) {
1273 if_ipsec_encap_detach(nvar);
1274 goto out;
1275 }
1276
1277 mutex_exit(&sc->ipsec_lock);
1278
1279 /*
1280 * (2-4) Cleanup last configurations.
1281 */
1282 if (if_ipsec_variant_is_configured(ovar))
1283 if_ipsec_encap_detach(ovar);
1284 encap_lock_exit();
1285
1286 if (osrc != NULL)
1287 sockaddr_free(osrc);
1288 if (odst != NULL)
1289 sockaddr_free(odst);
1290 kmem_free(ovar, sizeof(*ovar));
1291 kmem_free(nullvar, sizeof(*nullvar));
1292
1293 return 0;
1294
1295 out:
1296 mutex_exit(&sc->ipsec_lock);
1297 encap_lock_exit();
1298
1299 sockaddr_free(nsrc);
1300 sockaddr_free(ndst);
1301 kmem_free(nvar, sizeof(*nvar));
1302 kmem_free(nullvar, sizeof(*nullvar));
1303
1304 return error;
1305 }
1306
1307 /*
1308 * Validate and delete ipsec(4) I/F configurations.
1309 * (1) validate
1310 * (1-1) Check current src and dst address pair are null,
1311 * which means the ipsec(4) I/F is already done deletetunnel.
1312 * (2) delete
1313 * (2-1) Create variant for deleted status.
1314 * (2-2) Create temporary "null" variant used to avoid to access
1315 * dangling variant while SPs are deleted and added.
1316 * NOTE:
1317 * The contents of temporary "null" variant equal to the variant
1318 * of (2-1), however two psref_target_destroy() synchronization
1319 * points are necessary to avoid to access dangling variant
1320 * while SPs are deleted and added. To implement that simply,
1321 * we use the same manner as if_ipsec_set_tunnel(), that is,
1322 * create extra "null" variant and use it temporarily.
1323 * (2-3) Swap variant include its SPs.
1324 * (2-4) Cleanup last configurations.
1325 */
1326 static void
if_ipsec_delete_tunnel(struct ifnet * ifp)1327 if_ipsec_delete_tunnel(struct ifnet *ifp)
1328 {
1329 struct ipsec_softc *sc = ifp->if_softc;
1330 struct ipsec_variant *ovar, *nvar, *nullvar;
1331 struct sockaddr *osrc, *odst;
1332 int error;
1333
1334 error = encap_lock_enter();
1335 if (error)
1336 return;
1337
1338 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1339 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1340
1341 mutex_enter(&sc->ipsec_lock);
1342
1343 ovar = sc->ipsec_var;
1344 osrc = ovar->iv_psrc;
1345 odst = ovar->iv_pdst;
1346 /*
1347 * (1-1) Check current src and dst address pair are null,
1348 * which means the ipsec(4) I/F is already done deletetunnel.
1349 */
1350 if (osrc == NULL || odst == NULL) {
1351 /* address pair not changed. */
1352 mutex_exit(&sc->ipsec_lock);
1353 encap_lock_exit();
1354 kmem_free(nvar, sizeof(*nvar));
1355 kmem_free(nullvar, sizeof(*nullvar));
1356 return;
1357 }
1358
1359 /*
1360 * (2-1) Create variant for deleted status.
1361 */
1362 if_ipsec_copy_variant(nvar, ovar);
1363 if_ipsec_clear_config(nvar);
1364 psref_target_init(&nvar->iv_psref, iv_psref_class);
1365
1366 /*
1367 * (2-2) Create temporary "null" variant used to avoid to access
1368 * dangling variant while SPs are deleted and added.
1369 */
1370 if_ipsec_copy_variant(nullvar, ovar);
1371 if_ipsec_clear_config(nullvar);
1372 psref_target_init(&nullvar->iv_psref, iv_psref_class);
1373 /*
1374 * (2-3) Swap variant include its SPs.
1375 */
1376 /* if_ipsec_update_variant() does not fail when delete SP only. */
1377 (void)if_ipsec_update_variant(sc, nvar, nullvar);
1378
1379 mutex_exit(&sc->ipsec_lock);
1380
1381 /*
1382 * (2-4) Cleanup last configurations.
1383 */
1384 if (if_ipsec_variant_is_configured(ovar))
1385 if_ipsec_encap_detach(ovar);
1386 encap_lock_exit();
1387
1388 sockaddr_free(osrc);
1389 sockaddr_free(odst);
1390 kmem_free(ovar, sizeof(*ovar));
1391 kmem_free(nullvar, sizeof(*nullvar));
1392 }
1393
1394 /*
1395 * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1396 * (1) check
1397 * (1-1) Check flags are changed.
1398 * (1-2) Check current src and dst address pair. If they are null,
1399 * that means the ipsec(4) I/F is deletetunnel'ed, so it is
1400 * not needed to update.
1401 * (2) update
1402 * (2-1) Create variant for new SPs.
1403 * (2-2) Create temporary "null" variant used to avoid to access
1404 * dangling variant while SPs are deleted and added.
1405 * NOTE:
1406 * There is the same problem as if_ipsec_delete_tunnel().
1407 * (2-3) Swap variant include its SPs.
1408 * (2-4) Cleanup unused configurations.
1409 * NOTE: use the same encap_cookies.
1410 */
1411 static int
if_ipsec_ensure_flags(struct ifnet * ifp,u_short oflags)1412 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1413 {
1414 struct ipsec_softc *sc = ifp->if_softc;
1415 struct ipsec_variant *ovar, *nvar, *nullvar;
1416 int error;
1417
1418 /*
1419 * (1) Check flags are changed.
1420 */
1421 if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1422 (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1423 return 0; /* flags not changed. */
1424
1425 error = encap_lock_enter();
1426 if (error)
1427 return error;
1428
1429 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1430 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1431
1432 mutex_enter(&sc->ipsec_lock);
1433
1434 ovar = sc->ipsec_var;
1435 /*
1436 * (1-2) Check current src and dst address pair.
1437 */
1438 if (if_ipsec_variant_is_unconfigured(ovar)) {
1439 /* nothing to do */
1440 mutex_exit(&sc->ipsec_lock);
1441 encap_lock_exit();
1442 kmem_free(nvar, sizeof(*nvar));
1443 kmem_free(nullvar, sizeof(*nullvar));
1444 return 0;
1445 }
1446
1447 /*
1448 * (2-1) Create variant for new SPs.
1449 */
1450 if_ipsec_copy_variant(nvar, ovar);
1451 psref_target_init(&nvar->iv_psref, iv_psref_class);
1452 /*
1453 * (2-2) Create temporary "null" variant used to avoid to access
1454 * dangling variant while SPs are deleted and added.
1455 */
1456 if_ipsec_copy_variant(nullvar, ovar);
1457 if_ipsec_clear_config(nullvar);
1458 psref_target_init(&nullvar->iv_psref, iv_psref_class);
1459 /*
1460 * (2-3) Swap variant include its SPs.
1461 */
1462 error = if_ipsec_update_variant(sc, nvar, nullvar);
1463
1464 mutex_exit(&sc->ipsec_lock);
1465 encap_lock_exit();
1466
1467 /*
1468 * (2-4) Cleanup unused configurations.
1469 */
1470 if (!error)
1471 kmem_free(ovar, sizeof(*ovar));
1472 else
1473 kmem_free(nvar, sizeof(*ovar));
1474 kmem_free(nullvar, sizeof(*nullvar));
1475
1476 return error;
1477 }
1478
1479 /*
1480 * SPD management
1481 */
1482
1483 /*
1484 * Share SP set with other NAT-T ipsec(4) I/F(s).
1485 * Return 1, when "var" shares SP set.
1486 * Return 0, when "var" cannot share SP set.
1487 *
1488 * NOTE:
1489 * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1490 * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1491 * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1492 * set_tunnel causes race.
1493 * Currently, (fortunately) encap_lock works as this global lock.
1494 */
1495 static int
if_ipsec_share_sp(struct ipsec_variant * var)1496 if_ipsec_share_sp(struct ipsec_variant *var)
1497 {
1498 struct ipsec_softc *sc = var->iv_softc;
1499 struct ipsec_softc *sc2;
1500 struct ipsec_variant *var2;
1501 struct psref psref;
1502
1503 KASSERT(encap_lock_held());
1504 KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1505
1506 mutex_enter(&ipsec_softcs.lock);
1507 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1508 if (sc2 == sc)
1509 continue;
1510 var2 = if_ipsec_getref_variant(sc2, &psref);
1511 if (if_ipsec_variant_is_unconfigured(var2)) {
1512 if_ipsec_putref_variant(var2, &psref);
1513 continue;
1514 }
1515 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1516 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1517 if_ipsec_putref_variant(var2, &psref);
1518 continue;
1519 }
1520
1521 break;
1522 }
1523 mutex_exit(&ipsec_softcs.lock);
1524 if (sc2 == NULL)
1525 return 0; /* not shared */
1526
1527 IV_SP_IN(var) = IV_SP_IN(var2);
1528 IV_SP_IN6(var) = IV_SP_IN6(var2);
1529 IV_SP_OUT(var) = IV_SP_OUT(var2);
1530 IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1531
1532 if_ipsec_putref_variant(var2, &psref);
1533 return 1; /* shared */
1534 }
1535
1536 /*
1537 * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1538 * Return 1, when "var" shared SP set, and then unshare them.
1539 * Return 0, when "var" did not share SP set.
1540 *
1541 * NOTE:
1542 * See if_ipsec_share_sp()'s note.
1543 */
1544 static int
if_ipsec_unshare_sp(struct ipsec_variant * var)1545 if_ipsec_unshare_sp(struct ipsec_variant *var)
1546 {
1547 struct ipsec_softc *sc = var->iv_softc;
1548 struct ipsec_softc *sc2;
1549 struct ipsec_variant *var2;
1550 struct psref psref;
1551
1552 KASSERT(encap_lock_held());
1553
1554 if (!var->iv_pdst || !var->iv_psrc)
1555 return 0;
1556
1557 mutex_enter(&ipsec_softcs.lock);
1558 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1559 if (sc2 == sc)
1560 continue;
1561 var2 = if_ipsec_getref_variant(sc2, &psref);
1562 if (!var2->iv_pdst || !var2->iv_psrc) {
1563 if_ipsec_putref_variant(var2, &psref);
1564 continue;
1565 }
1566 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1567 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1568 if_ipsec_putref_variant(var2, &psref);
1569 continue;
1570 }
1571
1572 break;
1573 }
1574 mutex_exit(&ipsec_softcs.lock);
1575 if (sc2 == NULL)
1576 return 0; /* not shared */
1577
1578 IV_SP_IN(var) = NULL;
1579 IV_SP_IN6(var) = NULL;
1580 IV_SP_OUT(var) = NULL;
1581 IV_SP_OUT6(var) = NULL;
1582 if_ipsec_putref_variant(var2, &psref);
1583 return 1; /* shared */
1584 }
1585
1586 static inline void
if_ipsec_add_mbuf_optalign(struct mbuf * m0,void * data,size_t len,bool align)1587 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1588 {
1589 struct mbuf *m;
1590
1591 MGET(m, M_WAIT, MT_DATA);
1592 if (align) {
1593 m->m_len = PFKEY_ALIGN8(len);
1594 memset(mtod(m, void *), 0, m->m_len);
1595 } else
1596 m->m_len = len;
1597 m_copyback(m, 0, len, data);
1598 m_cat(m0, m);
1599 }
1600
1601 static inline void
if_ipsec_add_mbuf(struct mbuf * m0,void * data,size_t len)1602 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1603 {
1604
1605 if_ipsec_add_mbuf_optalign(m0, data, len, true);
1606 }
1607
1608 static inline void
if_ipsec_add_mbuf_addr_port(struct mbuf * m0,struct sockaddr * addr,in_port_t port,bool align)1609 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1610 {
1611
1612 if (port == 0) {
1613 if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1614 } else {
1615 union sockaddr_union addrport_u;
1616 struct sockaddr *addrport = &addrport_u.sa;
1617
1618 if_ipsec_set_addr_port(addrport, addr, port);
1619 if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1620 }
1621 }
1622
1623 static inline void
if_ipsec_add_pad(struct mbuf * m0,size_t len)1624 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1625 {
1626 struct mbuf *m;
1627
1628 if (len == 0)
1629 return;
1630
1631 MGET(m, M_WAIT, MT_DATA);
1632 m->m_len = len;
1633 memset(mtod(m, void *), 0, m->m_len);
1634 m_cat(m0, m);
1635 }
1636
1637 static inline size_t
if_ipsec_set_sadb_addr(struct sadb_address * saaddr,struct sockaddr * addr,int proto,uint16_t exttype)1638 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1639 int proto, uint16_t exttype)
1640 {
1641 size_t size;
1642
1643 KASSERT(saaddr != NULL);
1644 KASSERT(addr != NULL);
1645
1646 size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1647 saaddr->sadb_address_len = PFKEY_UNIT64(size);
1648 saaddr->sadb_address_exttype = exttype;
1649 saaddr->sadb_address_proto = proto;
1650 switch (addr->sa_family) {
1651 #ifdef INET
1652 case AF_INET:
1653 saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1654 break;
1655 #endif /* INET */
1656 #ifdef INET6
1657 case AF_INET6:
1658 saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1659 break;
1660 #endif /* INET6 */
1661 default:
1662 log(LOG_DEBUG,
1663 "%s: Invalid address family: %d.\n",
1664 __func__, addr->sa_family);
1665 break;
1666 }
1667 saaddr->sadb_address_reserved = 0;
1668
1669 return size;
1670 }
1671
1672 static inline size_t
if_ipsec_set_sadb_src(struct sadb_address * sasrc,struct sockaddr * src,int proto)1673 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1674 int proto)
1675 {
1676
1677 return if_ipsec_set_sadb_addr(sasrc, src, proto,
1678 SADB_EXT_ADDRESS_SRC);
1679 }
1680
1681 static inline size_t
if_ipsec_set_sadb_dst(struct sadb_address * sadst,struct sockaddr * dst,int proto)1682 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1683 int proto)
1684 {
1685
1686 return if_ipsec_set_sadb_addr(sadst, dst, proto,
1687 SADB_EXT_ADDRESS_DST);
1688 }
1689
1690 static inline size_t
if_ipsec_set_sadb_x_policy(struct sadb_x_policy * xpl,struct sadb_x_ipsecrequest * xisr,uint16_t policy,uint8_t dir,uint32_t id,uint8_t level,struct sockaddr * src,struct sockaddr * dst,uint16_t reqid)1691 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1692 struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1693 uint8_t level, struct sockaddr *src, struct sockaddr *dst, uint16_t reqid)
1694 {
1695 size_t size;
1696
1697 KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1698
1699 size = sizeof(*xpl);
1700 if (policy == IPSEC_POLICY_IPSEC) {
1701 size += PFKEY_ALIGN8(sizeof(*xisr));
1702 if (src != NULL && dst != NULL)
1703 size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1704 }
1705 xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1706 xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1707 xpl->sadb_x_policy_type = policy;
1708 xpl->sadb_x_policy_dir = dir;
1709 xpl->sadb_x_policy_flags = 0;
1710 xpl->sadb_x_policy_id = id;
1711 xpl->sadb_x_policy_reserved2 = 0;
1712
1713 if (policy == IPSEC_POLICY_IPSEC) {
1714 xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1715 if (src != NULL && dst != NULL)
1716 xisr->sadb_x_ipsecrequest_len +=
1717 PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1718 xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1719 xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1720 xisr->sadb_x_ipsecrequest_level = level;
1721 if (level == IPSEC_LEVEL_UNIQUE)
1722 xisr->sadb_x_ipsecrequest_reqid = reqid;
1723 else
1724 xisr->sadb_x_ipsecrequest_reqid = 0;
1725 }
1726
1727 return size;
1728 }
1729
1730 static inline void
if_ipsec_set_sadb_msg(struct sadb_msg * msg,uint16_t extlen,uint8_t msgtype)1731 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1732 {
1733
1734 KASSERT(msg != NULL);
1735
1736 msg->sadb_msg_version = PF_KEY_V2;
1737 msg->sadb_msg_type = msgtype;
1738 msg->sadb_msg_errno = 0;
1739 msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1740 msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1741 msg->sadb_msg_reserved = 0;
1742 msg->sadb_msg_seq = 0; /* XXXX */
1743 msg->sadb_msg_pid = 0; /* XXXX */
1744 }
1745
1746 static inline void
if_ipsec_set_sadb_msg_add(struct sadb_msg * msg,uint16_t extlen)1747 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1748 {
1749
1750 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1751 }
1752
1753 static inline void
if_ipsec_set_sadb_msg_del(struct sadb_msg * msg,uint16_t extlen)1754 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1755 {
1756
1757 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1758 }
1759
1760 static int
if_ipsec_set_addr_port(struct sockaddr * addrport,struct sockaddr * addr,in_port_t port)1761 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1762 in_port_t port)
1763 {
1764 int error = 0;
1765
1766 sockaddr_copy(addrport, addr->sa_len, addr);
1767
1768 switch (addr->sa_family) {
1769 #ifdef INET
1770 case AF_INET: {
1771 struct sockaddr_in *sin = satosin(addrport);
1772 sin->sin_port = port;
1773 break;
1774 }
1775 #endif /* INET */
1776 #ifdef INET6
1777 case AF_INET6: {
1778 struct sockaddr_in6 *sin6 = satosin6(addrport);
1779 sin6->sin6_port = port;
1780 break;
1781 }
1782 #endif /* INET6 */
1783 default:
1784 log(LOG_DEBUG,
1785 "%s: Invalid address family: %d.\n",
1786 __func__, addr->sa_family);
1787 error = EINVAL;
1788 }
1789
1790 return error;
1791 }
1792
1793 static int
if_ipsec_get_reqids(struct ipsec_variant * var,u_int16_t reqids[REQID_INDEX_NUM])1794 if_ipsec_get_reqids(struct ipsec_variant *var, u_int16_t reqids[REQID_INDEX_NUM])
1795 {
1796 struct ipsec_softc *sc = var->iv_softc;
1797 struct ifnet *ifp = &sc->ipsec_if;
1798
1799 mutex_enter(&ipsec_softcs.lock);
1800 if (ipsec_softcs.use_fixed_reqid) {
1801 uint32_t unit, reqid_base;
1802
1803 unit = strtoul(ifp->if_xname + sizeof("ipsec") - 1, NULL, 10);
1804 reqid_base = ipsec_softcs.reqid_base + unit * 2;
1805 if (reqid_base + 1 > ipsec_softcs.reqid_last) {
1806 log(LOG_ERR,
1807 "%s: invalid fixed reqid(%"PRIu32"), "
1808 "current range %"PRIu16" <= reqid <= %"PRIu16"\n",
1809 ifp->if_xname, reqid_base + 1,
1810 ipsec_softcs.reqid_base, ipsec_softcs.reqid_last);
1811 mutex_exit(&ipsec_softcs.lock);
1812 return ENOSPC;
1813 }
1814
1815 /*
1816 * Use same reqid both inbound and outbound to reduce reqid.
1817 */
1818 reqids[REQID_INDEX_IPV4IN] = reqid_base;
1819 reqids[REQID_INDEX_IPV4OUT] = reqid_base;
1820 reqids[REQID_INDEX_IPV6IN] = reqid_base + 1;
1821 reqids[REQID_INDEX_IPV6OUT] = reqid_base + 1;
1822 } else {
1823 for (int i = 0; i < REQID_INDEX_NUM; i++)
1824 reqids[i] = key_newreqid();
1825 }
1826 mutex_exit(&ipsec_softcs.lock);
1827
1828 return 0;
1829 }
1830
1831 static struct secpolicy *
if_ipsec_add_sp0(struct sockaddr * src,in_port_t sport,struct sockaddr * dst,in_port_t dport,int dir,int proto,int level,u_int policy,uint16_t reqid)1832 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1833 struct sockaddr *dst, in_port_t dport,
1834 int dir, int proto, int level, u_int policy, uint16_t reqid)
1835 {
1836 struct sadb_msg msg;
1837 struct sadb_address xsrc, xdst;
1838 struct sadb_x_policy xpl;
1839 struct sadb_x_ipsecrequest xisr;
1840 size_t size;
1841 size_t padlen;
1842 uint16_t ext_msg_len = 0;
1843 struct mbuf *m;
1844
1845 memset(&msg, 0, sizeof(msg));
1846 memset(&xsrc, 0, sizeof(xsrc));
1847 memset(&xdst, 0, sizeof(xdst));
1848 memset(&xpl, 0, sizeof(xpl));
1849 memset(&xisr, 0, sizeof(xisr));
1850
1851 MGETHDR(m, M_WAIT, MT_DATA);
1852
1853 size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1854 ext_msg_len += PFKEY_UNIT64(size);
1855 size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1856 ext_msg_len += PFKEY_UNIT64(size);
1857 size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level,
1858 NULL, NULL, reqid);
1859 ext_msg_len += PFKEY_UNIT64(size);
1860 if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1861
1862 /* build PF_KEY message */
1863
1864 m->m_len = sizeof(msg);
1865 m_copyback(m, 0, sizeof(msg), &msg);
1866
1867 if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1868 /*
1869 * secpolicy.spidx.{src, dst} must not be set port number,
1870 * even if it is used for NAT-T.
1871 */
1872 if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1873 padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1874 - (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1875 if_ipsec_add_pad(m, padlen);
1876
1877 if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1878 /* ditto */
1879 if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1880 padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1881 - (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1882 if_ipsec_add_pad(m, padlen);
1883
1884 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1885 padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1886 if (policy == IPSEC_POLICY_IPSEC) {
1887 if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1888 padlen -= PFKEY_ALIGN8(sizeof(xisr));
1889 }
1890 if_ipsec_add_pad(m, padlen);
1891
1892 /* key_kpi_spdadd() has already done KEY_SP_REF(). */
1893 return key_kpi_spdadd(m);
1894 }
1895
1896 static int
if_ipsec_add_sp(struct ipsec_variant * var,struct sockaddr * src,in_port_t sport,struct sockaddr * dst,in_port_t dport)1897 if_ipsec_add_sp(struct ipsec_variant *var,
1898 struct sockaddr *src, in_port_t sport,
1899 struct sockaddr *dst, in_port_t dport)
1900 {
1901 struct ipsec_softc *sc = var->iv_softc;
1902 int level;
1903 int error;
1904 u_int v6policy;
1905 u_int16_t reqids[REQID_INDEX_NUM];
1906
1907 /*
1908 * must delete sp before add it.
1909 */
1910 KASSERT(IV_SP_IN(var) == NULL);
1911 KASSERT(IV_SP_OUT(var) == NULL);
1912 KASSERT(IV_SP_IN6(var) == NULL);
1913 KASSERT(IV_SP_OUT6(var) == NULL);
1914
1915 /*
1916 * can be shared?
1917 */
1918 if (if_ipsec_share_sp(var))
1919 return 0;
1920
1921 if (if_ipsec_nat_t(sc))
1922 level = IPSEC_LEVEL_REQUIRE;
1923 else
1924 level = IPSEC_LEVEL_UNIQUE;
1925
1926 if (if_ipsec_fwd_ipv6(sc))
1927 v6policy = IPSEC_POLICY_IPSEC;
1928 else
1929 v6policy = IPSEC_POLICY_DISCARD;
1930
1931 error = if_ipsec_get_reqids(var, reqids);
1932 if (error)
1933 goto fail;
1934
1935 IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1936 IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC,
1937 reqids[REQID_INDEX_IPV4IN]);
1938 if (IV_SP_IN(var) == NULL) {
1939 error = EEXIST;
1940 goto fail;
1941 }
1942 IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1943 IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC,
1944 reqids[REQID_INDEX_IPV4OUT]);
1945 if (IV_SP_OUT(var) == NULL) {
1946 error = EEXIST;
1947 goto fail;
1948 }
1949 IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1950 IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy,
1951 reqids[REQID_INDEX_IPV6IN]);
1952 if (IV_SP_IN6(var) == NULL) {
1953 error = EEXIST;
1954 goto fail;
1955 }
1956 IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1957 IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy,
1958 reqids[REQID_INDEX_IPV6OUT]);
1959 if (IV_SP_OUT6(var) == NULL) {
1960 error = EEXIST;
1961 goto fail;
1962 }
1963
1964 return 0;
1965
1966 fail:
1967 if (IV_SP_IN6(var) != NULL) {
1968 if_ipsec_del_sp0(IV_SP_IN6(var));
1969 IV_SP_IN6(var) = NULL;
1970 }
1971 if (IV_SP_OUT(var) != NULL) {
1972 if_ipsec_del_sp0(IV_SP_OUT(var));
1973 IV_SP_OUT(var) = NULL;
1974 }
1975 if (IV_SP_IN(var) != NULL) {
1976 if_ipsec_del_sp0(IV_SP_IN(var));
1977 IV_SP_IN(var) = NULL;
1978 }
1979
1980 return error;
1981 }
1982
1983 static int
if_ipsec_del_sp0(struct secpolicy * sp)1984 if_ipsec_del_sp0(struct secpolicy *sp)
1985 {
1986 struct sadb_msg msg;
1987 struct sadb_x_policy xpl;
1988 size_t size;
1989 uint16_t ext_msg_len = 0;
1990 int error;
1991 struct mbuf *m;
1992
1993 if (sp == NULL)
1994 return 0;
1995
1996 memset(&msg, 0, sizeof(msg));
1997 memset(&xpl, 0, sizeof(xpl));
1998
1999 MGETHDR(m, M_WAIT, MT_DATA);
2000
2001 size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL, 0);
2002 ext_msg_len += PFKEY_UNIT64(size);
2003
2004 if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
2005
2006 m->m_len = sizeof(msg);
2007 m_copyback(m, 0, sizeof(msg), &msg);
2008
2009 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
2010
2011 /* unreference correspond to key_kpi_spdadd(). */
2012 KEY_SP_UNREF(&sp);
2013 error = key_kpi_spddelete2(m);
2014 if (error != 0) {
2015 log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
2016 __func__, sp->id, error);
2017 }
2018 return error;
2019 }
2020
2021 static void
if_ipsec_del_sp(struct ipsec_variant * var)2022 if_ipsec_del_sp(struct ipsec_variant *var)
2023 {
2024
2025 /* are the SPs shared? */
2026 if (if_ipsec_unshare_sp(var))
2027 return;
2028
2029 (void)if_ipsec_del_sp0(IV_SP_OUT(var));
2030 (void)if_ipsec_del_sp0(IV_SP_IN(var));
2031 (void)if_ipsec_del_sp0(IV_SP_OUT6(var));
2032 (void)if_ipsec_del_sp0(IV_SP_IN6(var));
2033 IV_SP_IN(var) = NULL;
2034 IV_SP_IN6(var) = NULL;
2035 IV_SP_OUT(var) = NULL;
2036 IV_SP_OUT6(var) = NULL;
2037 }
2038
2039 static int
if_ipsec_replace_sp(struct ipsec_softc * sc,struct ipsec_variant * ovar,struct ipsec_variant * nvar)2040 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
2041 struct ipsec_variant *nvar)
2042 {
2043 in_port_t src_port = 0;
2044 in_port_t dst_port = 0;
2045 struct sockaddr *src;
2046 struct sockaddr *dst;
2047 int error = 0;
2048
2049 KASSERT(mutex_owned(&sc->ipsec_lock));
2050
2051 if_ipsec_del_sp(ovar);
2052
2053 src = nvar->iv_psrc;
2054 dst = nvar->iv_pdst;
2055 if (if_ipsec_nat_t(sc)) {
2056 /* NAT-T enabled */
2057 src_port = nvar->iv_sport;
2058 dst_port = nvar->iv_dport;
2059 }
2060 if (src && dst)
2061 error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
2062
2063 return error;
2064 }
2065
2066 /*
2067 * ipsec_variant and its SPs update API.
2068 *
2069 * Assumption:
2070 * reader side dereferences sc->ipsec_var in reader critical section only,
2071 * that is, all of reader sides do not reader the sc->ipsec_var after
2072 * pserialize_perform().
2073 */
2074 static int
if_ipsec_update_variant(struct ipsec_softc * sc,struct ipsec_variant * nvar,struct ipsec_variant * nullvar)2075 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
2076 struct ipsec_variant *nullvar)
2077 {
2078 struct ifnet *ifp = &sc->ipsec_if;
2079 struct ipsec_variant *ovar = sc->ipsec_var;
2080 int error;
2081
2082 KASSERT(mutex_owned(&sc->ipsec_lock));
2083
2084 /*
2085 * To keep consistency between ipsec(4) I/F settings and SPs,
2086 * we stop packet processing while replacing SPs, that is, we set
2087 * "null" config variant to sc->ipsec_var.
2088 */
2089 atomic_store_release(&sc->ipsec_var, nullvar);
2090 pserialize_perform(sc->ipsec_psz);
2091 psref_target_destroy(&ovar->iv_psref, iv_psref_class);
2092
2093 error = if_ipsec_replace_sp(sc, ovar, nvar);
2094 if (!error)
2095 atomic_store_release(&sc->ipsec_var, nvar);
2096 else {
2097 psref_target_init(&ovar->iv_psref, iv_psref_class);
2098 atomic_store_release(&sc->ipsec_var, ovar); /* rollback */
2099 }
2100
2101 pserialize_perform(sc->ipsec_psz);
2102 psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
2103
2104 if (if_ipsec_variant_is_configured(sc->ipsec_var))
2105 ifp->if_flags |= IFF_RUNNING;
2106 else
2107 ifp->if_flags &= ~IFF_RUNNING;
2108
2109 return error;
2110 }
2111