1 /* $NetBSD: if_ipsec.c,v 1.31 2021/10/11 05:13:11 knakahara Exp $ */ 2 3 /* 4 * Copyright (c) 2017 Internet Initiative Japan Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.31 2021/10/11 05:13:11 knakahara Exp $"); 31 32 #ifdef _KERNEL_OPT 33 #include "opt_inet.h" 34 #endif 35 36 #include <sys/param.h> 37 #include <sys/atomic.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/errno.h> 44 #include <sys/ioctl.h> 45 #include <sys/time.h> 46 #include <sys/syslog.h> 47 #include <sys/cpu.h> 48 #include <sys/kmem.h> 49 #include <sys/mutex.h> 50 #include <sys/pserialize.h> 51 #include <sys/psref.h> 52 #include <sys/sysctl.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/route.h> 57 #include <net/bpf.h> 58 #include <net/pfkeyv2.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/ip.h> 63 #ifdef INET 64 #include <netinet/in_var.h> 65 #endif /* INET */ 66 67 #ifdef INET6 68 #include <netinet6/in6_var.h> 69 #include <netinet/ip6.h> 70 #include <netinet6/ip6_var.h> 71 #endif /* INET6 */ 72 73 #include <netinet/ip_encap.h> 74 75 #include <net/if_ipsec.h> 76 77 #include <net/raw_cb.h> 78 #include <net/pfkeyv2.h> 79 80 #include <netipsec/key.h> 81 #include <netipsec/keydb.h> /* for union sockaddr_union */ 82 #include <netipsec/ipsec.h> 83 #include <netipsec/ipsecif.h> 84 85 static int if_ipsec_clone_create(struct if_clone *, int); 86 static int if_ipsec_clone_destroy(struct ifnet *); 87 88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int); 89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *); 90 91 static int if_ipsec_encap_attach(struct ipsec_variant *); 92 static int if_ipsec_encap_detach(struct ipsec_variant *); 93 static int if_ipsec_set_tunnel(struct ifnet *, 94 struct sockaddr *, struct sockaddr *); 95 static void if_ipsec_delete_tunnel(struct ifnet *); 96 static int if_ipsec_ensure_flags(struct ifnet *, u_short); 97 static void if_ipsec_attach0(struct ipsec_softc *); 98 99 static int if_ipsec_update_variant(struct ipsec_softc *, 100 struct ipsec_variant *, struct ipsec_variant *); 101 102 /* sadb_msg */ 103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t); 104 static inline void if_ipsec_add_pad(struct mbuf *, size_t); 105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *, 106 struct sockaddr *, int, uint16_t); 107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *, 108 struct sockaddr *, int); 109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *, 110 struct sockaddr *, int); 111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *, 112 struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t, 113 struct sockaddr *, struct sockaddr *); 114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t); 115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t); 116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t); 117 /* SPD */ 118 static int if_ipsec_share_sp(struct ipsec_variant *); 119 static int if_ipsec_unshare_sp(struct ipsec_variant *); 120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *, 121 in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int); 122 static inline int if_ipsec_del_sp0(struct secpolicy *); 123 static int if_ipsec_add_sp(struct ipsec_variant *, 124 struct sockaddr *, in_port_t, struct sockaddr *, in_port_t); 125 static void if_ipsec_del_sp(struct ipsec_variant *); 126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *, 127 struct ipsec_variant *); 128 129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *, 130 in_port_t); 131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target) \ 132 if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport) 133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target) \ 134 if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport) 135 136 /* 137 * ipsec global variable definitions 138 */ 139 140 /* This list is used in ioctl context only. */ 141 static struct { 142 LIST_HEAD(ipsec_sclist, ipsec_softc) list; 143 kmutex_t lock; 144 } ipsec_softcs __cacheline_aligned; 145 146 struct psref_class *iv_psref_class __read_mostly; 147 148 struct if_clone ipsec_cloner = 149 IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy); 150 static int max_ipsec_nesting = MAX_IPSEC_NEST; 151 152 static struct sysctllog *if_ipsec_sysctl; 153 154 static pktq_rps_hash_func_t if_ipsec_pktq_rps_hash_p; 155 156 #ifdef INET6 157 static int 158 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS) 159 { 160 int error, pmtu; 161 struct sysctlnode node = *rnode; 162 163 pmtu = ip6_ipsec_pmtu; 164 node.sysctl_data = &pmtu; 165 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 166 if (error || newp == NULL) 167 return error; 168 169 switch (pmtu) { 170 case IPSEC_PMTU_MINMTU: 171 case IPSEC_PMTU_OUTERMTU: 172 ip6_ipsec_pmtu = pmtu; 173 break; 174 default: 175 return EINVAL; 176 } 177 178 return 0; 179 } 180 181 static int 182 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS) 183 { 184 int error, pmtu; 185 struct sysctlnode node = *rnode; 186 struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data; 187 188 pmtu = sc->ipsec_pmtu; 189 node.sysctl_data = &pmtu; 190 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 191 if (error || newp == NULL) 192 return error; 193 194 switch (pmtu) { 195 case IPSEC_PMTU_SYSDEFAULT: 196 case IPSEC_PMTU_MINMTU: 197 case IPSEC_PMTU_OUTERMTU: 198 sc->ipsec_pmtu = pmtu; 199 break; 200 default: 201 return EINVAL; 202 } 203 204 return 0; 205 } 206 #endif 207 208 static void 209 if_ipsec_sysctl_setup(void) 210 { 211 const struct sysctlnode *node = NULL; 212 213 if_ipsec_sysctl = NULL; 214 215 #ifdef INET6 216 /* 217 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error. 218 */ 219 sysctl_createv(NULL, 0, NULL, NULL, 220 CTLFLAG_PERMANENT, 221 CTLTYPE_NODE, "inet6", 222 SYSCTL_DESCR("PF_INET6 related settings"), 223 NULL, 0, NULL, 0, 224 CTL_NET, PF_INET6, CTL_EOL); 225 sysctl_createv(NULL, 0, NULL, NULL, 226 CTLFLAG_PERMANENT, 227 CTLTYPE_NODE, "ip6", 228 SYSCTL_DESCR("IPv6 related settings"), 229 NULL, 0, NULL, 0, 230 CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL); 231 232 sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL, 233 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 234 CTLTYPE_INT, "ipsecifhlim", 235 SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"), 236 NULL, 0, &ip6_ipsec_hlim, 0, 237 CTL_NET, PF_INET6, IPPROTO_IPV6, 238 IPV6CTL_IPSEC_HLIM, CTL_EOL); 239 240 sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL, 241 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 242 CTLTYPE_INT, "ipsecifpmtu", 243 SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"), 244 sysctl_if_ipsec_pmtu_global, 0, NULL, 0, 245 CTL_NET, PF_INET6, IPPROTO_IPV6, 246 IPV6CTL_IPSEC_PMTU, CTL_EOL); 247 #endif 248 249 sysctl_createv(&if_ipsec_sysctl, 0, NULL, &node, 250 CTLFLAG_PERMANENT, 251 CTLTYPE_NODE, "ipsecif", 252 SYSCTL_DESCR("ipsecif global control"), 253 NULL, 0, NULL, 0, 254 CTL_NET, CTL_CREATE, CTL_EOL); 255 256 sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL, 257 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 258 CTLTYPE_STRING, "rps_hash", 259 SYSCTL_DESCR("Interface rps hash function control"), 260 sysctl_pktq_rps_hash_handler, 0, (void *)&if_ipsec_pktq_rps_hash_p, 261 PKTQ_RPS_HASH_NAME_LEN, 262 CTL_CREATE, CTL_EOL); 263 } 264 265 static void 266 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc) 267 { 268 #ifdef INET6 269 const struct sysctlnode *cnode, *rnode; 270 struct ifnet *ifp = &sc->ipsec_if; 271 const char *ifname = ifp->if_xname; 272 int rv; 273 274 /* 275 * Already created in sysctl_sndq_setup(). 276 */ 277 sysctl_createv(clog, 0, NULL, &rnode, 278 CTLFLAG_PERMANENT, 279 CTLTYPE_NODE, "interfaces", 280 SYSCTL_DESCR("Per-interface controls"), 281 NULL, 0, NULL, 0, 282 CTL_NET, CTL_CREATE, CTL_EOL); 283 sysctl_createv(clog, 0, &rnode, &rnode, 284 CTLFLAG_PERMANENT, 285 CTLTYPE_NODE, ifname, 286 SYSCTL_DESCR("Interface controls"), 287 NULL, 0, NULL, 0, 288 CTL_CREATE, CTL_EOL); 289 290 rv = sysctl_createv(clog, 0, &rnode, &cnode, 291 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 292 CTLTYPE_INT, "pmtu", 293 SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"), 294 sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0, 295 CTL_CREATE, CTL_EOL); 296 if (rv != 0) 297 log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname); 298 299 sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT; 300 #endif 301 } 302 303 /* ARGSUSED */ 304 void 305 ipsecifattach(int count) 306 { 307 308 mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE); 309 LIST_INIT(&ipsec_softcs.list); 310 311 iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET); 312 313 if_ipsec_pktq_rps_hash_p = pktq_rps_hash_default; 314 if_ipsec_sysctl_setup(); 315 316 if_clone_attach(&ipsec_cloner); 317 } 318 319 static int 320 if_ipsec_clone_create(struct if_clone *ifc, int unit) 321 { 322 struct ipsec_softc *sc; 323 struct ipsec_variant *var; 324 struct ifnet *ifp; 325 326 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 327 328 if_initname(&sc->ipsec_if, ifc->ifc_name, unit); 329 330 if_ipsec_attach0(sc); 331 332 ifp = &sc->ipsec_if; 333 if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc); 334 335 var = kmem_zalloc(sizeof(*var), KM_SLEEP); 336 var->iv_softc = sc; 337 psref_target_init(&var->iv_psref, iv_psref_class); 338 339 sc->ipsec_var = var; 340 mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE); 341 sc->ipsec_psz = pserialize_create(); 342 sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu(); 343 344 mutex_enter(&ipsec_softcs.lock); 345 LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list); 346 mutex_exit(&ipsec_softcs.lock); 347 return 0; 348 } 349 350 static void 351 if_ipsec_attach0(struct ipsec_softc *sc) 352 { 353 354 sc->ipsec_if.if_addrlen = 0; 355 sc->ipsec_if.if_mtu = IPSEC_MTU; 356 sc->ipsec_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; 357 /* set ipsec(4) specific default flags. */ 358 sc->ipsec_if.if_flags |= IFF_FWD_IPV6; 359 sc->ipsec_if.if_extflags = IFEF_MPSAFE; 360 sc->ipsec_if.if_ioctl = if_ipsec_ioctl; 361 sc->ipsec_if.if_output = if_ipsec_output; 362 sc->ipsec_if.if_type = IFT_IPSEC; 363 sc->ipsec_if.if_dlt = DLT_NULL; 364 sc->ipsec_if.if_softc = sc; 365 IFQ_SET_READY(&sc->ipsec_if.if_snd); 366 if_initialize(&sc->ipsec_if); 367 sc->ipsec_if.if_link_state = LINK_STATE_DOWN; 368 if_alloc_sadl(&sc->ipsec_if); 369 bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int)); 370 if_register(&sc->ipsec_if); 371 } 372 373 static int 374 if_ipsec_clone_destroy(struct ifnet *ifp) 375 { 376 struct ipsec_softc *sc = ifp->if_softc; 377 struct ipsec_variant *var; 378 int bound; 379 380 mutex_enter(&ipsec_softcs.lock); 381 LIST_REMOVE(sc, ipsec_list); 382 mutex_exit(&ipsec_softcs.lock); 383 384 bound = curlwp_bind(); 385 if_ipsec_delete_tunnel(&sc->ipsec_if); 386 curlwp_bindx(bound); 387 388 bpf_detach(ifp); 389 if_detach(ifp); 390 391 if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu); 392 393 pserialize_destroy(sc->ipsec_psz); 394 mutex_destroy(&sc->ipsec_lock); 395 396 var = sc->ipsec_var; 397 kmem_free(var, sizeof(*var)); 398 kmem_free(sc, sizeof(*sc)); 399 400 return 0; 401 } 402 403 static inline bool 404 if_ipsec_nat_t(struct ipsec_softc *sc) 405 { 406 407 return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0; 408 } 409 410 static inline bool 411 if_ipsec_fwd_ipv6(struct ipsec_softc *sc) 412 { 413 414 return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0; 415 } 416 417 int 418 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg) 419 { 420 uint8_t v; 421 struct ipsec_softc *sc; 422 struct ipsec_variant *var = NULL; 423 struct psref psref; 424 int ret = 0; 425 426 sc = arg; 427 KASSERT(sc != NULL); 428 429 if ((sc->ipsec_if.if_flags & IFF_UP) == 0) 430 goto out; 431 432 var = if_ipsec_getref_variant(sc, &psref); 433 if (if_ipsec_variant_is_unconfigured(var)) 434 goto out; 435 436 switch (proto) { 437 case IPPROTO_IPV4: 438 case IPPROTO_IPV6: 439 break; 440 default: 441 goto out; 442 } 443 444 m_copydata(m, 0, sizeof(v), &v); 445 v = (v >> 4) & 0xff; /* Get the IP version number. */ 446 447 switch (v) { 448 #ifdef INET 449 case IPVERSION: { 450 struct ip ip; 451 452 if (m->m_pkthdr.len < sizeof(ip)) 453 goto out; 454 455 m_copydata(m, 0, sizeof(ip), &ip); 456 if (var->iv_psrc->sa_family != AF_INET || 457 var->iv_pdst->sa_family != AF_INET) 458 goto out; 459 ret = ipsecif4_encap_func(m, &ip, var); 460 break; 461 } 462 #endif 463 #ifdef INET6 464 case (IPV6_VERSION >> 4): { 465 struct ip6_hdr ip6; 466 467 if (m->m_pkthdr.len < sizeof(ip6)) 468 goto out; 469 470 m_copydata(m, 0, sizeof(ip6), &ip6); 471 if (var->iv_psrc->sa_family != AF_INET6 || 472 var->iv_pdst->sa_family != AF_INET6) 473 goto out; 474 ret = ipsecif6_encap_func(m, &ip6, var); 475 break; 476 } 477 #endif 478 default: 479 goto out; 480 } 481 482 out: 483 if (var != NULL) 484 if_ipsec_putref_variant(var, &psref); 485 return ret; 486 } 487 488 /* 489 * ipsec(4) I/F may cause infinite recursion calls when misconfigured. 490 * We'll prevent this by introducing upper limit. 491 */ 492 static int 493 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m) 494 { 495 496 return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting); 497 } 498 499 int 500 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 501 const struct rtentry *rt) 502 { 503 struct ipsec_softc *sc = ifp->if_softc; 504 struct ipsec_variant *var; 505 struct psref psref; 506 int error; 507 int bound; 508 509 IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family); 510 511 error = if_ipsec_check_nesting(ifp, m); 512 if (error) { 513 m_freem(m); 514 goto noref_end; 515 } 516 517 if ((ifp->if_flags & IFF_UP) == 0) { 518 m_freem(m); 519 error = ENETDOWN; 520 goto noref_end; 521 } 522 523 524 bound = curlwp_bind(); 525 var = if_ipsec_getref_variant(sc, &psref); 526 if (if_ipsec_variant_is_unconfigured(var)) { 527 m_freem(m); 528 error = ENETDOWN; 529 goto end; 530 } 531 532 m->m_flags &= ~(M_BCAST|M_MCAST); 533 534 /* use DLT_NULL encapsulation here to pass inner af type */ 535 M_PREPEND(m, sizeof(int), M_DONTWAIT); 536 if (!m) { 537 error = ENOBUFS; 538 goto end; 539 } 540 *mtod(m, int *) = dst->sa_family; 541 542 #if INET6 543 /* drop IPv6 packet if IFF_FWD_IPV6 is not set */ 544 if (dst->sa_family == AF_INET6 && 545 !if_ipsec_fwd_ipv6(sc)) { 546 /* 547 * IPv6 packet is not allowed to forward,that is not error. 548 */ 549 error = 0; 550 IF_DROP(&ifp->if_snd); 551 m_freem(m); 552 goto end; 553 } 554 #endif 555 556 error = if_ipsec_out_direct(var, m, dst->sa_family); 557 558 end: 559 if_ipsec_putref_variant(var, &psref); 560 curlwp_bindx(bound); 561 noref_end: 562 if (error) 563 if_statinc(ifp, if_oerrors); 564 565 return error; 566 } 567 568 static inline int 569 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family) 570 { 571 struct ifnet *ifp = &var->iv_softc->ipsec_if; 572 int error; 573 int len; 574 575 KASSERT(if_ipsec_heldref_variant(var)); 576 KASSERT(var->iv_output != NULL); 577 578 len = m->m_pkthdr.len; 579 580 /* input DLT_NULL frame to BPF */ 581 bpf_mtap(ifp, m, BPF_D_OUT); 582 583 /* grab and chop off inner af type */ 584 /* XXX need pullup? */ 585 m_adj(m, sizeof(int)); 586 587 error = var->iv_output(var, family, m); 588 if (error) 589 return error; 590 591 if_statadd2(ifp, if_opackets, 1, if_obytes, len); 592 593 return 0; 594 } 595 596 void 597 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp) 598 { 599 600 KASSERT(ifp != NULL); 601 602 m_set_rcvif(m, ifp); 603 604 bpf_mtap_af(ifp, af, m, BPF_D_IN); 605 606 if_ipsec_in_enqueue(m, af, ifp); 607 608 return; 609 } 610 611 static inline void 612 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp) 613 { 614 pktqueue_t *pktq; 615 int pktlen; 616 617 /* 618 * Put the packet to the network layer input queue according to the 619 * specified address family. 620 */ 621 switch (af) { 622 #ifdef INET 623 case AF_INET: 624 pktq = ip_pktq; 625 break; 626 #endif 627 #ifdef INET6 628 case AF_INET6: 629 pktq = ip6_pktq; 630 break; 631 #endif 632 default: 633 if_statinc(ifp, if_ierrors); 634 m_freem(m); 635 return; 636 } 637 638 const uint32_t h = pktq_rps_hash(&if_ipsec_pktq_rps_hash_p, m); 639 pktlen = m->m_pkthdr.len; 640 if (__predict_true(pktq_enqueue(pktq, m, h))) { 641 if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1); 642 } else { 643 if_statinc(ifp, if_iqdrops); 644 m_freem(m); 645 } 646 647 return; 648 } 649 650 static inline int 651 if_ipsec_check_salen(struct sockaddr *addr) 652 { 653 654 switch (addr->sa_family) { 655 #ifdef INET 656 case AF_INET: 657 if (addr->sa_len != sizeof(struct sockaddr_in)) 658 return EINVAL; 659 break; 660 #endif /* INET */ 661 #ifdef INET6 662 case AF_INET6: 663 if (addr->sa_len != sizeof(struct sockaddr_in6)) 664 return EINVAL; 665 break; 666 #endif /* INET6 */ 667 default: 668 return EAFNOSUPPORT; 669 } 670 671 return 0; 672 } 673 674 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ 675 int 676 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data) 677 { 678 struct ipsec_softc *sc = ifp->if_softc; 679 struct ipsec_variant *var = NULL; 680 struct ifreq *ifr = (struct ifreq*)data; 681 struct ifaddr *ifa = (struct ifaddr*)data; 682 int error = 0, size; 683 struct sockaddr *dst, *src; 684 u_long mtu; 685 u_short oflags = ifp->if_flags; 686 int bound; 687 struct psref psref; 688 689 switch (cmd) { 690 case SIOCINITIFADDR: 691 ifp->if_flags |= IFF_UP; 692 ifa->ifa_rtrequest = p2p_rtrequest; 693 break; 694 695 case SIOCSIFDSTADDR: 696 break; 697 698 case SIOCADDMULTI: 699 case SIOCDELMULTI: 700 switch (ifr->ifr_addr.sa_family) { 701 #ifdef INET 702 case AF_INET: /* IP supports Multicast */ 703 break; 704 #endif /* INET */ 705 #ifdef INET6 706 case AF_INET6: /* IP6 supports Multicast */ 707 break; 708 #endif /* INET6 */ 709 default: /* Other protocols doesn't support Multicast */ 710 error = EAFNOSUPPORT; 711 break; 712 } 713 break; 714 715 case SIOCSIFMTU: 716 mtu = ifr->ifr_mtu; 717 if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX) 718 return EINVAL; 719 else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 720 error = 0; 721 break; 722 723 #ifdef INET 724 case SIOCSIFPHYADDR: 725 #endif 726 #ifdef INET6 727 case SIOCSIFPHYADDR_IN6: 728 #endif /* INET6 */ 729 case SIOCSLIFPHYADDR: 730 switch (cmd) { 731 #ifdef INET 732 case SIOCSIFPHYADDR: 733 src = (struct sockaddr *) 734 &(((struct in_aliasreq *)data)->ifra_addr); 735 dst = (struct sockaddr *) 736 &(((struct in_aliasreq *)data)->ifra_dstaddr); 737 break; 738 #endif /* INET */ 739 #ifdef INET6 740 case SIOCSIFPHYADDR_IN6: 741 src = (struct sockaddr *) 742 &(((struct in6_aliasreq *)data)->ifra_addr); 743 dst = (struct sockaddr *) 744 &(((struct in6_aliasreq *)data)->ifra_dstaddr); 745 break; 746 #endif /* INET6 */ 747 case SIOCSLIFPHYADDR: 748 src = (struct sockaddr *) 749 &(((struct if_laddrreq *)data)->addr); 750 dst = (struct sockaddr *) 751 &(((struct if_laddrreq *)data)->dstaddr); 752 break; 753 default: 754 return EINVAL; 755 } 756 757 /* sa_family must be equal */ 758 if (src->sa_family != dst->sa_family) 759 return EINVAL; 760 761 error = if_ipsec_check_salen(src); 762 if (error) 763 return error; 764 error = if_ipsec_check_salen(dst); 765 if (error) 766 return error; 767 768 /* check sa_family looks sane for the cmd */ 769 switch (cmd) { 770 #ifdef INET 771 case SIOCSIFPHYADDR: 772 if (src->sa_family == AF_INET) 773 break; 774 return EAFNOSUPPORT; 775 #endif /* INET */ 776 #ifdef INET6 777 case SIOCSIFPHYADDR_IN6: 778 if (src->sa_family == AF_INET6) 779 break; 780 return EAFNOSUPPORT; 781 #endif /* INET6 */ 782 case SIOCSLIFPHYADDR: 783 /* checks done in the above */ 784 break; 785 } 786 /* 787 * calls if_ipsec_getref_variant() for other softcs to check 788 * address pair duplicattion 789 */ 790 bound = curlwp_bind(); 791 error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst); 792 if (error) 793 goto bad; 794 if_link_state_change(&sc->ipsec_if, LINK_STATE_UP); 795 curlwp_bindx(bound); 796 break; 797 798 case SIOCDIFPHYADDR: 799 bound = curlwp_bind(); 800 if_ipsec_delete_tunnel(&sc->ipsec_if); 801 if_link_state_change(&sc->ipsec_if, LINK_STATE_DOWN); 802 curlwp_bindx(bound); 803 break; 804 805 case SIOCGIFPSRCADDR: 806 #ifdef INET6 807 case SIOCGIFPSRCADDR_IN6: 808 #endif /* INET6 */ 809 bound = curlwp_bind(); 810 var = if_ipsec_getref_variant(sc, &psref); 811 if (var->iv_psrc == NULL) { 812 error = EADDRNOTAVAIL; 813 goto bad; 814 } 815 src = var->iv_psrc; 816 switch (cmd) { 817 #ifdef INET 818 case SIOCGIFPSRCADDR: 819 dst = &ifr->ifr_addr; 820 size = sizeof(ifr->ifr_addr); 821 break; 822 #endif /* INET */ 823 #ifdef INET6 824 case SIOCGIFPSRCADDR_IN6: 825 dst = (struct sockaddr *) 826 &(((struct in6_ifreq *)data)->ifr_addr); 827 size = sizeof(((struct in6_ifreq *)data)->ifr_addr); 828 break; 829 #endif /* INET6 */ 830 default: 831 error = EADDRNOTAVAIL; 832 goto bad; 833 } 834 if (src->sa_len > size) { 835 error = EINVAL; 836 goto bad; 837 } 838 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst); 839 if (error) 840 goto bad; 841 if_ipsec_putref_variant(var, &psref); 842 curlwp_bindx(bound); 843 break; 844 845 case SIOCGIFPDSTADDR: 846 #ifdef INET6 847 case SIOCGIFPDSTADDR_IN6: 848 #endif /* INET6 */ 849 bound = curlwp_bind(); 850 var = if_ipsec_getref_variant(sc, &psref); 851 if (var->iv_pdst == NULL) { 852 error = EADDRNOTAVAIL; 853 goto bad; 854 } 855 src = var->iv_pdst; 856 switch (cmd) { 857 #ifdef INET 858 case SIOCGIFPDSTADDR: 859 dst = &ifr->ifr_addr; 860 size = sizeof(ifr->ifr_addr); 861 break; 862 #endif /* INET */ 863 #ifdef INET6 864 case SIOCGIFPDSTADDR_IN6: 865 dst = (struct sockaddr *) 866 &(((struct in6_ifreq *)data)->ifr_addr); 867 size = sizeof(((struct in6_ifreq *)data)->ifr_addr); 868 break; 869 #endif /* INET6 */ 870 default: 871 error = EADDRNOTAVAIL; 872 goto bad; 873 } 874 if (src->sa_len > size) { 875 error = EINVAL; 876 goto bad; 877 } 878 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst); 879 if (error) 880 goto bad; 881 if_ipsec_putref_variant(var, &psref); 882 curlwp_bindx(bound); 883 break; 884 885 case SIOCGLIFPHYADDR: 886 bound = curlwp_bind(); 887 var = if_ipsec_getref_variant(sc, &psref); 888 if (if_ipsec_variant_is_unconfigured(var)) { 889 error = EADDRNOTAVAIL; 890 goto bad; 891 } 892 893 /* copy src */ 894 src = var->iv_psrc; 895 dst = (struct sockaddr *) 896 &(((struct if_laddrreq *)data)->addr); 897 size = sizeof(((struct if_laddrreq *)data)->addr); 898 if (src->sa_len > size) { 899 error = EINVAL; 900 goto bad; 901 } 902 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst); 903 if (error) 904 goto bad; 905 906 /* copy dst */ 907 src = var->iv_pdst; 908 dst = (struct sockaddr *) 909 &(((struct if_laddrreq *)data)->dstaddr); 910 size = sizeof(((struct if_laddrreq *)data)->dstaddr); 911 if (src->sa_len > size) { 912 error = EINVAL; 913 goto bad; 914 } 915 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst); 916 if (error) 917 goto bad; 918 if_ipsec_putref_variant(var, &psref); 919 curlwp_bindx(bound); 920 break; 921 922 default: 923 error = ifioctl_common(ifp, cmd, data); 924 if (!error) { 925 bound = curlwp_bind(); 926 error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags); 927 if (error) 928 goto bad; 929 curlwp_bindx(bound); 930 } 931 break; 932 } 933 return error; 934 935 bad: 936 if (var != NULL) 937 if_ipsec_putref_variant(var, &psref); 938 curlwp_bindx(bound); 939 940 return error; 941 } 942 943 struct encap_funcs { 944 #ifdef INET 945 int (*ef_inet)(struct ipsec_variant *); 946 #endif 947 #ifdef INET6 948 int (*ef_inet6)(struct ipsec_variant *); 949 #endif 950 }; 951 952 static struct encap_funcs ipsec_encap_attach = { 953 #ifdef INET 954 .ef_inet = ipsecif4_attach, 955 #endif 956 #ifdef INET6 957 .ef_inet6 = &ipsecif6_attach, 958 #endif 959 }; 960 961 static struct encap_funcs ipsec_encap_detach = { 962 #ifdef INET 963 .ef_inet = ipsecif4_detach, 964 #endif 965 #ifdef INET6 966 .ef_inet6 = &ipsecif6_detach, 967 #endif 968 }; 969 970 static int 971 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs) 972 { 973 int error; 974 975 KASSERT(var != NULL); 976 KASSERT(if_ipsec_variant_is_configured(var)); 977 978 switch (var->iv_psrc->sa_family) { 979 #ifdef INET 980 case AF_INET: 981 error = (funcs->ef_inet)(var); 982 break; 983 #endif /* INET */ 984 #ifdef INET6 985 case AF_INET6: 986 error = (funcs->ef_inet6)(var); 987 break; 988 #endif /* INET6 */ 989 default: 990 error = EINVAL; 991 break; 992 } 993 994 return error; 995 } 996 997 static int 998 if_ipsec_encap_attach(struct ipsec_variant *var) 999 { 1000 1001 return if_ipsec_encap_common(var, &ipsec_encap_attach); 1002 } 1003 1004 static int 1005 if_ipsec_encap_detach(struct ipsec_variant *var) 1006 { 1007 1008 return if_ipsec_encap_common(var, &ipsec_encap_detach); 1009 } 1010 1011 /* 1012 * Validate and set ipsec(4) I/F configurations. 1013 * (1) validate 1014 * (1-1) Check the argument src and dst address pair will change 1015 * configuration from current src and dst address pair. 1016 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair 1017 * with argument src and dst address pair, except for NAT-T shared 1018 * tunnels. 1019 * (2) set 1020 * (2-1) Create variant for new configuration. 1021 * (2-2) Create temporary "null" variant used to avoid to access 1022 * dangling variant while SPs are deleted and added. 1023 * (2-3) Swap variant include its SPs. 1024 * (2-4) Cleanup last configurations. 1025 */ 1026 static int 1027 if_ipsec_set_tunnel(struct ifnet *ifp, 1028 struct sockaddr *src, struct sockaddr *dst) 1029 { 1030 struct ipsec_softc *sc = ifp->if_softc; 1031 struct ipsec_softc *sc2; 1032 struct ipsec_variant *ovar, *nvar, *nullvar; 1033 struct sockaddr *osrc, *odst; 1034 struct sockaddr *nsrc, *ndst; 1035 in_port_t nsport = 0, ndport = 0; 1036 int error; 1037 1038 error = encap_lock_enter(); 1039 if (error) 1040 return error; 1041 1042 nsrc = sockaddr_dup(src, M_WAITOK); 1043 ndst = sockaddr_dup(dst, M_WAITOK); 1044 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); 1045 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); 1046 1047 mutex_enter(&sc->ipsec_lock); 1048 1049 ovar = sc->ipsec_var; 1050 1051 switch(nsrc->sa_family) { 1052 #ifdef INET 1053 case AF_INET: 1054 nsport = satosin(src)->sin_port; 1055 /* 1056 * avoid confuse SP when NAT-T disabled, 1057 * e.g. 1058 * expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4) 1059 * confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4) 1060 */ 1061 satosin(nsrc)->sin_port = 0; 1062 ndport = satosin(dst)->sin_port; 1063 satosin(ndst)->sin_port = 0; 1064 break; 1065 #endif /* INET */ 1066 #ifdef INET6 1067 case AF_INET6: 1068 nsport = satosin6(src)->sin6_port; 1069 satosin6(nsrc)->sin6_port = 0; 1070 ndport = satosin6(dst)->sin6_port; 1071 satosin6(ndst)->sin6_port = 0; 1072 break; 1073 #endif /* INET6 */ 1074 default: 1075 log(LOG_DEBUG, 1076 "%s: Invalid address family: %d.\n", 1077 __func__, src->sa_family); 1078 error = EINVAL; 1079 goto out; 1080 } 1081 1082 /* 1083 * (1-1) Check the argument src and dst address pair will change 1084 * configuration from current src and dst address pair. 1085 */ 1086 if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) && 1087 (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) && 1088 (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) { 1089 /* address and port pair not changed. */ 1090 error = 0; 1091 goto out; 1092 } 1093 1094 /* 1095 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair 1096 * with argument src and dst address pair, except for NAT-T shared 1097 * tunnels. 1098 */ 1099 mutex_enter(&ipsec_softcs.lock); 1100 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { 1101 struct ipsec_variant *var2; 1102 struct psref psref; 1103 1104 if (sc2 == sc) 1105 continue; 1106 var2 = if_ipsec_getref_variant(sc2, &psref); 1107 if (if_ipsec_variant_is_unconfigured(var2)) { 1108 if_ipsec_putref_variant(var2, &psref); 1109 continue; 1110 } 1111 if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) { 1112 if_ipsec_putref_variant(var2, &psref); 1113 continue; /* NAT-T shared tunnel */ 1114 } 1115 if (sockaddr_cmp(var2->iv_pdst, dst) == 0 && 1116 sockaddr_cmp(var2->iv_psrc, src) == 0) { 1117 if_ipsec_putref_variant(var2, &psref); 1118 mutex_exit(&ipsec_softcs.lock); 1119 error = EADDRNOTAVAIL; 1120 goto out; 1121 } 1122 1123 if_ipsec_putref_variant(var2, &psref); 1124 /* XXX both end must be valid? (I mean, not 0.0.0.0) */ 1125 } 1126 mutex_exit(&ipsec_softcs.lock); 1127 1128 1129 osrc = ovar->iv_psrc; 1130 odst = ovar->iv_pdst; 1131 1132 /* 1133 * (2-1) Create ipsec_variant for new configuration. 1134 */ 1135 if_ipsec_copy_variant(nvar, ovar); 1136 nvar->iv_psrc = nsrc; 1137 nvar->iv_pdst = ndst; 1138 nvar->iv_sport = nsport; 1139 nvar->iv_dport = ndport; 1140 nvar->iv_encap_cookie4 = NULL; 1141 nvar->iv_encap_cookie6 = NULL; 1142 psref_target_init(&nvar->iv_psref, iv_psref_class); 1143 error = if_ipsec_encap_attach(nvar); 1144 if (error) 1145 goto out; 1146 1147 /* 1148 * (2-2) Create temporary "null" variant. 1149 */ 1150 if_ipsec_copy_variant(nullvar, ovar); 1151 if_ipsec_clear_config(nullvar); 1152 psref_target_init(&nullvar->iv_psref, iv_psref_class); 1153 /* 1154 * (2-3) Swap variant include its SPs. 1155 */ 1156 error = if_ipsec_update_variant(sc, nvar, nullvar); 1157 if (error) { 1158 if_ipsec_encap_detach(nvar); 1159 goto out; 1160 } 1161 1162 mutex_exit(&sc->ipsec_lock); 1163 1164 /* 1165 * (2-4) Cleanup last configurations. 1166 */ 1167 if (if_ipsec_variant_is_configured(ovar)) 1168 if_ipsec_encap_detach(ovar); 1169 encap_lock_exit(); 1170 1171 if (osrc != NULL) 1172 sockaddr_free(osrc); 1173 if (odst != NULL) 1174 sockaddr_free(odst); 1175 kmem_free(ovar, sizeof(*ovar)); 1176 kmem_free(nullvar, sizeof(*nullvar)); 1177 1178 return 0; 1179 1180 out: 1181 mutex_exit(&sc->ipsec_lock); 1182 encap_lock_exit(); 1183 1184 sockaddr_free(nsrc); 1185 sockaddr_free(ndst); 1186 kmem_free(nvar, sizeof(*nvar)); 1187 kmem_free(nullvar, sizeof(*nullvar)); 1188 1189 return error; 1190 } 1191 1192 /* 1193 * Validate and delete ipsec(4) I/F configurations. 1194 * (1) validate 1195 * (1-1) Check current src and dst address pair are null, 1196 * which means the ipsec(4) I/F is already done deletetunnel. 1197 * (2) delete 1198 * (2-1) Create variant for deleted status. 1199 * (2-2) Create temporary "null" variant used to avoid to access 1200 * dangling variant while SPs are deleted and added. 1201 * NOTE: 1202 * The contents of temporary "null" variant equal to the variant 1203 * of (2-1), however two psref_target_destroy() synchronization 1204 * points are necessary to avoid to access dangling variant 1205 * while SPs are deleted and added. To implement that simply, 1206 * we use the same manner as if_ipsec_set_tunnel(), that is, 1207 * create extra "null" variant and use it temporarily. 1208 * (2-3) Swap variant include its SPs. 1209 * (2-4) Cleanup last configurations. 1210 */ 1211 static void 1212 if_ipsec_delete_tunnel(struct ifnet *ifp) 1213 { 1214 struct ipsec_softc *sc = ifp->if_softc; 1215 struct ipsec_variant *ovar, *nvar, *nullvar; 1216 struct sockaddr *osrc, *odst; 1217 int error; 1218 1219 error = encap_lock_enter(); 1220 if (error) 1221 return; 1222 1223 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); 1224 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); 1225 1226 mutex_enter(&sc->ipsec_lock); 1227 1228 ovar = sc->ipsec_var; 1229 osrc = ovar->iv_psrc; 1230 odst = ovar->iv_pdst; 1231 /* 1232 * (1-1) Check current src and dst address pair are null, 1233 * which means the ipsec(4) I/F is already done deletetunnel. 1234 */ 1235 if (osrc == NULL || odst == NULL) { 1236 /* address pair not changed. */ 1237 mutex_exit(&sc->ipsec_lock); 1238 encap_lock_exit(); 1239 kmem_free(nvar, sizeof(*nvar)); 1240 kmem_free(nullvar, sizeof(*nullvar)); 1241 return; 1242 } 1243 1244 /* 1245 * (2-1) Create variant for deleted status. 1246 */ 1247 if_ipsec_copy_variant(nvar, ovar); 1248 if_ipsec_clear_config(nvar); 1249 psref_target_init(&nvar->iv_psref, iv_psref_class); 1250 1251 /* 1252 * (2-2) Create temporary "null" variant used to avoid to access 1253 * dangling variant while SPs are deleted and added. 1254 */ 1255 if_ipsec_copy_variant(nullvar, ovar); 1256 if_ipsec_clear_config(nullvar); 1257 psref_target_init(&nullvar->iv_psref, iv_psref_class); 1258 /* 1259 * (2-3) Swap variant include its SPs. 1260 */ 1261 /* if_ipsec_update_variant() does not fail when delete SP only. */ 1262 (void)if_ipsec_update_variant(sc, nvar, nullvar); 1263 1264 mutex_exit(&sc->ipsec_lock); 1265 1266 /* 1267 * (2-4) Cleanup last configurations. 1268 */ 1269 if (if_ipsec_variant_is_configured(ovar)) 1270 if_ipsec_encap_detach(ovar); 1271 encap_lock_exit(); 1272 1273 sockaddr_free(osrc); 1274 sockaddr_free(odst); 1275 kmem_free(ovar, sizeof(*ovar)); 1276 kmem_free(nullvar, sizeof(*nullvar)); 1277 } 1278 1279 /* 1280 * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed. 1281 * (1) check 1282 * (1-1) Check flags are changed. 1283 * (1-2) Check current src and dst address pair. If they are null, 1284 * that means the ipsec(4) I/F is deletetunnel'ed, so it is 1285 * not needed to update. 1286 * (2) update 1287 * (2-1) Create variant for new SPs. 1288 * (2-2) Create temporary "null" variant used to avoid to access 1289 * dangling variant while SPs are deleted and added. 1290 * NOTE: 1291 * There is the same problem as if_ipsec_delete_tunnel(). 1292 * (2-3) Swap variant include its SPs. 1293 * (2-4) Cleanup unused configurations. 1294 * NOTE: use the same encap_cookies. 1295 */ 1296 static int 1297 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags) 1298 { 1299 struct ipsec_softc *sc = ifp->if_softc; 1300 struct ipsec_variant *ovar, *nvar, *nullvar; 1301 int error; 1302 1303 /* 1304 * (1) Check flags are changed. 1305 */ 1306 if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) == 1307 (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6))) 1308 return 0; /* flags not changed. */ 1309 1310 error = encap_lock_enter(); 1311 if (error) 1312 return error; 1313 1314 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); 1315 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); 1316 1317 mutex_enter(&sc->ipsec_lock); 1318 1319 ovar = sc->ipsec_var; 1320 /* 1321 * (1-2) Check current src and dst address pair. 1322 */ 1323 if (if_ipsec_variant_is_unconfigured(ovar)) { 1324 /* nothing to do */ 1325 mutex_exit(&sc->ipsec_lock); 1326 encap_lock_exit(); 1327 kmem_free(nvar, sizeof(*nvar)); 1328 kmem_free(nullvar, sizeof(*nullvar)); 1329 return 0; 1330 } 1331 1332 /* 1333 * (2-1) Create variant for new SPs. 1334 */ 1335 if_ipsec_copy_variant(nvar, ovar); 1336 psref_target_init(&nvar->iv_psref, iv_psref_class); 1337 /* 1338 * (2-2) Create temporary "null" variant used to avoid to access 1339 * dangling variant while SPs are deleted and added. 1340 */ 1341 if_ipsec_copy_variant(nullvar, ovar); 1342 if_ipsec_clear_config(nullvar); 1343 psref_target_init(&nullvar->iv_psref, iv_psref_class); 1344 /* 1345 * (2-3) Swap variant include its SPs. 1346 */ 1347 error = if_ipsec_update_variant(sc, nvar, nullvar); 1348 1349 mutex_exit(&sc->ipsec_lock); 1350 encap_lock_exit(); 1351 1352 /* 1353 * (2-4) Cleanup unused configurations. 1354 */ 1355 if (!error) 1356 kmem_free(ovar, sizeof(*ovar)); 1357 else 1358 kmem_free(nvar, sizeof(*ovar)); 1359 kmem_free(nullvar, sizeof(*nullvar)); 1360 1361 return error; 1362 } 1363 1364 /* 1365 * SPD management 1366 */ 1367 1368 /* 1369 * Share SP set with other NAT-T ipsec(4) I/F(s). 1370 * Return 1, when "var" shares SP set. 1371 * Return 0, when "var" cannot share SP set. 1372 * 1373 * NOTE: 1374 * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock 1375 * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0 1376 * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's 1377 * set_tunnel causes race. 1378 * Currently, (fortunately) encap_lock works as this global lock. 1379 */ 1380 static int 1381 if_ipsec_share_sp(struct ipsec_variant *var) 1382 { 1383 struct ipsec_softc *sc = var->iv_softc; 1384 struct ipsec_softc *sc2; 1385 struct ipsec_variant *var2; 1386 struct psref psref; 1387 1388 KASSERT(encap_lock_held()); 1389 KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL); 1390 1391 mutex_enter(&ipsec_softcs.lock); 1392 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { 1393 if (sc2 == sc) 1394 continue; 1395 var2 = if_ipsec_getref_variant(sc2, &psref); 1396 if (if_ipsec_variant_is_unconfigured(var2)) { 1397 if_ipsec_putref_variant(var2, &psref); 1398 continue; 1399 } 1400 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 || 1401 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) { 1402 if_ipsec_putref_variant(var2, &psref); 1403 continue; 1404 } 1405 1406 break; 1407 } 1408 mutex_exit(&ipsec_softcs.lock); 1409 if (sc2 == NULL) 1410 return 0; /* not shared */ 1411 1412 IV_SP_IN(var) = IV_SP_IN(var2); 1413 IV_SP_IN6(var) = IV_SP_IN6(var2); 1414 IV_SP_OUT(var) = IV_SP_OUT(var2); 1415 IV_SP_OUT6(var) = IV_SP_OUT6(var2); 1416 1417 if_ipsec_putref_variant(var2, &psref); 1418 return 1; /* shared */ 1419 } 1420 1421 /* 1422 * Unshare SP set with other NAT-T ipsec(4) I/F(s). 1423 * Return 1, when "var" shared SP set, and then unshare them. 1424 * Return 0, when "var" did not share SP set. 1425 * 1426 * NOTE: 1427 * See if_ipsec_share_sp()'s note. 1428 */ 1429 static int 1430 if_ipsec_unshare_sp(struct ipsec_variant *var) 1431 { 1432 struct ipsec_softc *sc = var->iv_softc; 1433 struct ipsec_softc *sc2; 1434 struct ipsec_variant *var2; 1435 struct psref psref; 1436 1437 KASSERT(encap_lock_held()); 1438 1439 if (!var->iv_pdst || !var->iv_psrc) 1440 return 0; 1441 1442 mutex_enter(&ipsec_softcs.lock); 1443 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { 1444 if (sc2 == sc) 1445 continue; 1446 var2 = if_ipsec_getref_variant(sc2, &psref); 1447 if (!var2->iv_pdst || !var2->iv_psrc) { 1448 if_ipsec_putref_variant(var2, &psref); 1449 continue; 1450 } 1451 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 || 1452 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) { 1453 if_ipsec_putref_variant(var2, &psref); 1454 continue; 1455 } 1456 1457 break; 1458 } 1459 mutex_exit(&ipsec_softcs.lock); 1460 if (sc2 == NULL) 1461 return 0; /* not shared */ 1462 1463 IV_SP_IN(var) = NULL; 1464 IV_SP_IN6(var) = NULL; 1465 IV_SP_OUT(var) = NULL; 1466 IV_SP_OUT6(var) = NULL; 1467 if_ipsec_putref_variant(var2, &psref); 1468 return 1; /* shared */ 1469 } 1470 1471 static inline void 1472 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align) 1473 { 1474 struct mbuf *m; 1475 1476 MGET(m, M_WAIT, MT_DATA); 1477 if (align) { 1478 m->m_len = PFKEY_ALIGN8(len); 1479 memset(mtod(m, void *), 0, m->m_len); 1480 } else 1481 m->m_len = len; 1482 m_copyback(m, 0, len, data); 1483 m_cat(m0, m); 1484 } 1485 1486 static inline void 1487 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len) 1488 { 1489 1490 if_ipsec_add_mbuf_optalign(m0, data, len, true); 1491 } 1492 1493 static inline void 1494 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align) 1495 { 1496 1497 if (port == 0) { 1498 if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align); 1499 } else { 1500 union sockaddr_union addrport_u; 1501 struct sockaddr *addrport = &addrport_u.sa; 1502 1503 if_ipsec_set_addr_port(addrport, addr, port); 1504 if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align); 1505 } 1506 } 1507 1508 static inline void 1509 if_ipsec_add_pad(struct mbuf *m0, size_t len) 1510 { 1511 struct mbuf *m; 1512 1513 if (len == 0) 1514 return; 1515 1516 MGET(m, M_WAIT, MT_DATA); 1517 m->m_len = len; 1518 memset(mtod(m, void *), 0, m->m_len); 1519 m_cat(m0, m); 1520 } 1521 1522 static inline size_t 1523 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr, 1524 int proto, uint16_t exttype) 1525 { 1526 size_t size; 1527 1528 KASSERT(saaddr != NULL); 1529 KASSERT(addr != NULL); 1530 1531 size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len); 1532 saaddr->sadb_address_len = PFKEY_UNIT64(size); 1533 saaddr->sadb_address_exttype = exttype; 1534 saaddr->sadb_address_proto = proto; 1535 switch (addr->sa_family) { 1536 #ifdef INET 1537 case AF_INET: 1538 saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3; 1539 break; 1540 #endif /* INET */ 1541 #ifdef INET6 1542 case AF_INET6: 1543 saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3; 1544 break; 1545 #endif /* INET6 */ 1546 default: 1547 log(LOG_DEBUG, 1548 "%s: Invalid address family: %d.\n", 1549 __func__, addr->sa_family); 1550 break; 1551 } 1552 saaddr->sadb_address_reserved = 0; 1553 1554 return size; 1555 } 1556 1557 static inline size_t 1558 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src, 1559 int proto) 1560 { 1561 1562 return if_ipsec_set_sadb_addr(sasrc, src, proto, 1563 SADB_EXT_ADDRESS_SRC); 1564 } 1565 1566 static inline size_t 1567 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst, 1568 int proto) 1569 { 1570 1571 return if_ipsec_set_sadb_addr(sadst, dst, proto, 1572 SADB_EXT_ADDRESS_DST); 1573 } 1574 1575 static inline size_t 1576 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl, 1577 struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id, 1578 uint8_t level, struct sockaddr *src, struct sockaddr *dst) 1579 { 1580 size_t size; 1581 1582 KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL); 1583 1584 size = sizeof(*xpl); 1585 if (policy == IPSEC_POLICY_IPSEC) { 1586 size += PFKEY_ALIGN8(sizeof(*xisr)); 1587 if (src != NULL && dst != NULL) 1588 size += PFKEY_ALIGN8(src->sa_len + dst->sa_len); 1589 } 1590 xpl->sadb_x_policy_len = PFKEY_UNIT64(size); 1591 xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; 1592 xpl->sadb_x_policy_type = policy; 1593 xpl->sadb_x_policy_dir = dir; 1594 xpl->sadb_x_policy_reserved = 0; 1595 xpl->sadb_x_policy_id = id; 1596 xpl->sadb_x_policy_reserved2 = 0; 1597 1598 if (policy == IPSEC_POLICY_IPSEC) { 1599 xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr)); 1600 if (src != NULL && dst != NULL) 1601 xisr->sadb_x_ipsecrequest_len += 1602 PFKEY_ALIGN8(src->sa_len + dst->sa_len); 1603 xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP; 1604 xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT; 1605 xisr->sadb_x_ipsecrequest_level = level; 1606 if (level == IPSEC_LEVEL_UNIQUE) 1607 xisr->sadb_x_ipsecrequest_reqid = key_newreqid(); 1608 else 1609 xisr->sadb_x_ipsecrequest_reqid = 0; 1610 } 1611 1612 return size; 1613 } 1614 1615 static inline void 1616 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype) 1617 { 1618 1619 KASSERT(msg != NULL); 1620 1621 msg->sadb_msg_version = PF_KEY_V2; 1622 msg->sadb_msg_type = msgtype; 1623 msg->sadb_msg_errno = 0; 1624 msg->sadb_msg_satype = SADB_SATYPE_UNSPEC; 1625 msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen; 1626 msg->sadb_msg_reserved = 0; 1627 msg->sadb_msg_seq = 0; /* XXXX */ 1628 msg->sadb_msg_pid = 0; /* XXXX */ 1629 } 1630 1631 static inline void 1632 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen) 1633 { 1634 1635 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD); 1636 } 1637 1638 static inline void 1639 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen) 1640 { 1641 1642 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2); 1643 } 1644 1645 static int 1646 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr, 1647 in_port_t port) 1648 { 1649 int error = 0; 1650 1651 sockaddr_copy(addrport, addr->sa_len, addr); 1652 1653 switch (addr->sa_family) { 1654 #ifdef INET 1655 case AF_INET: { 1656 struct sockaddr_in *sin = satosin(addrport); 1657 sin->sin_port = port; 1658 break; 1659 } 1660 #endif /* INET */ 1661 #ifdef INET6 1662 case AF_INET6: { 1663 struct sockaddr_in6 *sin6 = satosin6(addrport); 1664 sin6->sin6_port = port; 1665 break; 1666 } 1667 #endif /* INET6 */ 1668 default: 1669 log(LOG_DEBUG, 1670 "%s: Invalid address family: %d.\n", 1671 __func__, addr->sa_family); 1672 error = EINVAL; 1673 } 1674 1675 return error; 1676 } 1677 1678 static struct secpolicy * 1679 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport, 1680 struct sockaddr *dst, in_port_t dport, 1681 int dir, int proto, int level, u_int policy) 1682 { 1683 struct sadb_msg msg; 1684 struct sadb_address xsrc, xdst; 1685 struct sadb_x_policy xpl; 1686 struct sadb_x_ipsecrequest xisr; 1687 size_t size; 1688 size_t padlen; 1689 uint16_t ext_msg_len = 0; 1690 struct mbuf *m; 1691 1692 memset(&msg, 0, sizeof(msg)); 1693 memset(&xsrc, 0, sizeof(xsrc)); 1694 memset(&xdst, 0, sizeof(xdst)); 1695 memset(&xpl, 0, sizeof(xpl)); 1696 memset(&xisr, 0, sizeof(xisr)); 1697 1698 MGETHDR(m, M_WAIT, MT_DATA); 1699 1700 size = if_ipsec_set_sadb_src(&xsrc, src, proto); 1701 ext_msg_len += PFKEY_UNIT64(size); 1702 size = if_ipsec_set_sadb_dst(&xdst, dst, proto); 1703 ext_msg_len += PFKEY_UNIT64(size); 1704 size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, NULL, NULL); 1705 ext_msg_len += PFKEY_UNIT64(size); 1706 if_ipsec_set_sadb_msg_add(&msg, ext_msg_len); 1707 1708 /* build PF_KEY message */ 1709 1710 m->m_len = sizeof(msg); 1711 m_copyback(m, 0, sizeof(msg), &msg); 1712 1713 if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc)); 1714 /* 1715 * secpolicy.spidx.{src, dst} must not be set port number, 1716 * even if it is used for NAT-T. 1717 */ 1718 if_ipsec_add_mbuf_addr_port(m, src, 0, true); 1719 padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len) 1720 - (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len)); 1721 if_ipsec_add_pad(m, padlen); 1722 1723 if_ipsec_add_mbuf(m, &xdst, sizeof(xdst)); 1724 /* ditto */ 1725 if_ipsec_add_mbuf_addr_port(m, dst, 0, true); 1726 padlen = PFKEY_UNUNIT64(xdst.sadb_address_len) 1727 - (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len)); 1728 if_ipsec_add_pad(m, padlen); 1729 1730 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl)); 1731 padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl); 1732 if (policy == IPSEC_POLICY_IPSEC) { 1733 if_ipsec_add_mbuf(m, &xisr, sizeof(xisr)); 1734 padlen -= PFKEY_ALIGN8(sizeof(xisr)); 1735 } 1736 if_ipsec_add_pad(m, padlen); 1737 1738 /* key_kpi_spdadd() has already done KEY_SP_REF(). */ 1739 return key_kpi_spdadd(m); 1740 } 1741 1742 static int 1743 if_ipsec_add_sp(struct ipsec_variant *var, 1744 struct sockaddr *src, in_port_t sport, 1745 struct sockaddr *dst, in_port_t dport) 1746 { 1747 struct ipsec_softc *sc = var->iv_softc; 1748 int level; 1749 u_int v6policy; 1750 1751 /* 1752 * must delete sp before add it. 1753 */ 1754 KASSERT(IV_SP_IN(var) == NULL); 1755 KASSERT(IV_SP_OUT(var) == NULL); 1756 KASSERT(IV_SP_IN6(var) == NULL); 1757 KASSERT(IV_SP_OUT6(var) == NULL); 1758 1759 /* 1760 * can be shared? 1761 */ 1762 if (if_ipsec_share_sp(var)) 1763 return 0; 1764 1765 if (if_ipsec_nat_t(sc)) 1766 level = IPSEC_LEVEL_REQUIRE; 1767 else 1768 level = IPSEC_LEVEL_UNIQUE; 1769 1770 if (if_ipsec_fwd_ipv6(sc)) 1771 v6policy = IPSEC_POLICY_IPSEC; 1772 else 1773 v6policy = IPSEC_POLICY_DISCARD; 1774 1775 IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport, 1776 IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC); 1777 if (IV_SP_IN(var) == NULL) 1778 goto fail; 1779 IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport, 1780 IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC); 1781 if (IV_SP_OUT(var) == NULL) 1782 goto fail; 1783 IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport, 1784 IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy); 1785 if (IV_SP_IN6(var) == NULL) 1786 goto fail; 1787 IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport, 1788 IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy); 1789 if (IV_SP_OUT6(var) == NULL) 1790 goto fail; 1791 1792 return 0; 1793 1794 fail: 1795 if (IV_SP_IN6(var) != NULL) { 1796 if_ipsec_del_sp0(IV_SP_IN6(var)); 1797 IV_SP_IN6(var) = NULL; 1798 } 1799 if (IV_SP_OUT(var) != NULL) { 1800 if_ipsec_del_sp0(IV_SP_OUT(var)); 1801 IV_SP_OUT(var) = NULL; 1802 } 1803 if (IV_SP_IN(var) != NULL) { 1804 if_ipsec_del_sp0(IV_SP_IN(var)); 1805 IV_SP_IN(var) = NULL; 1806 } 1807 1808 return EEXIST; 1809 } 1810 1811 static int 1812 if_ipsec_del_sp0(struct secpolicy *sp) 1813 { 1814 struct sadb_msg msg; 1815 struct sadb_x_policy xpl; 1816 size_t size; 1817 uint16_t ext_msg_len = 0; 1818 int error; 1819 struct mbuf *m; 1820 1821 if (sp == NULL) 1822 return 0; 1823 1824 memset(&msg, 0, sizeof(msg)); 1825 memset(&xpl, 0, sizeof(xpl)); 1826 1827 MGETHDR(m, M_WAIT, MT_DATA); 1828 1829 size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL); 1830 ext_msg_len += PFKEY_UNIT64(size); 1831 1832 if_ipsec_set_sadb_msg_del(&msg, ext_msg_len); 1833 1834 m->m_len = sizeof(msg); 1835 m_copyback(m, 0, sizeof(msg), &msg); 1836 1837 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl)); 1838 1839 /* unreference correspond to key_kpi_spdadd(). */ 1840 KEY_SP_UNREF(&sp); 1841 error = key_kpi_spddelete2(m); 1842 if (error != 0) { 1843 log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n", 1844 __func__, sp->id, error); 1845 } 1846 return error; 1847 } 1848 1849 static void 1850 if_ipsec_del_sp(struct ipsec_variant *var) 1851 { 1852 1853 /* are the SPs shared? */ 1854 if (if_ipsec_unshare_sp(var)) 1855 return; 1856 1857 (void)if_ipsec_del_sp0(IV_SP_OUT(var)); 1858 (void)if_ipsec_del_sp0(IV_SP_IN(var)); 1859 (void)if_ipsec_del_sp0(IV_SP_OUT6(var)); 1860 (void)if_ipsec_del_sp0(IV_SP_IN6(var)); 1861 IV_SP_IN(var) = NULL; 1862 IV_SP_IN6(var) = NULL; 1863 IV_SP_OUT(var) = NULL; 1864 IV_SP_OUT6(var) = NULL; 1865 } 1866 1867 static int 1868 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar, 1869 struct ipsec_variant *nvar) 1870 { 1871 in_port_t src_port = 0; 1872 in_port_t dst_port = 0; 1873 struct sockaddr *src; 1874 struct sockaddr *dst; 1875 int error = 0; 1876 1877 KASSERT(mutex_owned(&sc->ipsec_lock)); 1878 1879 if_ipsec_del_sp(ovar); 1880 1881 src = nvar->iv_psrc; 1882 dst = nvar->iv_pdst; 1883 if (if_ipsec_nat_t(sc)) { 1884 /* NAT-T enabled */ 1885 src_port = nvar->iv_sport; 1886 dst_port = nvar->iv_dport; 1887 } 1888 if (src && dst) 1889 error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port); 1890 1891 return error; 1892 } 1893 1894 /* 1895 * ipsec_variant and its SPs update API. 1896 * 1897 * Assumption: 1898 * reader side dereferences sc->ipsec_var in reader critical section only, 1899 * that is, all of reader sides do not reader the sc->ipsec_var after 1900 * pserialize_perform(). 1901 */ 1902 static int 1903 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar, 1904 struct ipsec_variant *nullvar) 1905 { 1906 struct ifnet *ifp = &sc->ipsec_if; 1907 struct ipsec_variant *ovar = sc->ipsec_var; 1908 int error; 1909 1910 KASSERT(mutex_owned(&sc->ipsec_lock)); 1911 1912 /* 1913 * To keep consistency between ipsec(4) I/F settings and SPs, 1914 * we stop packet processing while replacing SPs, that is, we set 1915 * "null" config variant to sc->ipsec_var. 1916 */ 1917 atomic_store_release(&sc->ipsec_var, nullvar); 1918 pserialize_perform(sc->ipsec_psz); 1919 psref_target_destroy(&ovar->iv_psref, iv_psref_class); 1920 1921 error = if_ipsec_replace_sp(sc, ovar, nvar); 1922 if (!error) 1923 atomic_store_release(&sc->ipsec_var, nvar); 1924 else { 1925 psref_target_init(&ovar->iv_psref, iv_psref_class); 1926 atomic_store_release(&sc->ipsec_var, ovar); /* rollback */ 1927 } 1928 1929 pserialize_perform(sc->ipsec_psz); 1930 psref_target_destroy(&nullvar->iv_psref, iv_psref_class); 1931 1932 if (if_ipsec_variant_is_configured(sc->ipsec_var)) 1933 ifp->if_flags |= IFF_RUNNING; 1934 else 1935 ifp->if_flags &= ~IFF_RUNNING; 1936 1937 return error; 1938 } 1939