1 /* $NetBSD: in6_pcb.c,v 1.177 2022/11/04 09:04:27 ozaki-r Exp $ */ 2 /* $KAME: in6_pcb.c,v 1.84 2001/02/08 18:02:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.177 2022/11/04 09:04:27 ozaki-r Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_inet.h" 69 #include "opt_ipsec.h" 70 #endif 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/mbuf.h> 75 #include <sys/protosw.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/ioctl.h> 79 #include <sys/errno.h> 80 #include <sys/time.h> 81 #include <sys/proc.h> 82 #include <sys/kauth.h> 83 #include <sys/domain.h> 84 #include <sys/once.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_var.h> 91 #include <netinet/in_systm.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip6.h> 95 #include <netinet/portalgo.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet6/in6_pcb.h> 98 #include <netinet6/scope6_var.h> 99 100 #include "faith.h" 101 102 #ifdef IPSEC 103 #include <netipsec/ipsec.h> 104 #include <netipsec/ipsec6.h> 105 #include <netipsec/key.h> 106 #endif /* IPSEC */ 107 108 #include <netinet/tcp_vtw.h> 109 110 const struct in6_addr zeroin6_addr; 111 112 #define IN6PCBHASH_PORT(table, lport) \ 113 &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash] 114 #define IN6PCBHASH_BIND(table, laddr, lport) \ 115 &(table)->inpt_bindhashtbl[ \ 116 (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \ 117 (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + ntohs(lport)) & \ 118 (table)->inpt_bindhash] 119 #define IN6PCBHASH_CONNECT(table, faddr, fport, laddr, lport) \ 120 &(table)->inpt_bindhashtbl[ \ 121 ((((faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^ \ 122 (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3]) + ntohs(fport)) + \ 123 (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \ 124 (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + \ 125 ntohs(lport))) & (table)->inpt_bindhash] 126 127 int ip6_anonportmin = IPV6PORT_ANONMIN; 128 int ip6_anonportmax = IPV6PORT_ANONMAX; 129 int ip6_lowportmin = IPV6PORT_RESERVEDMIN; 130 int ip6_lowportmax = IPV6PORT_RESERVEDMAX; 131 132 void 133 in6pcb_init(struct inpcbtable *table, int bindhashsize, int connecthashsize) 134 { 135 136 inpcb_init(table, bindhashsize, connecthashsize); 137 table->inpt_lastport = (in_port_t)ip6_anonportmax; 138 } 139 140 /* 141 * Bind address from sin6 to inp. 142 */ 143 static int 144 in6pcb_bind_addr(struct inpcb *inp, struct sockaddr_in6 *sin6, struct lwp *l) 145 { 146 int error; 147 int s; 148 149 /* 150 * We should check the family, but old programs 151 * incorrectly fail to initialize it. 152 */ 153 if (sin6->sin6_family != AF_INET6) 154 return EAFNOSUPPORT; 155 156 #ifndef INET 157 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 158 return EADDRNOTAVAIL; 159 #endif 160 161 if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0) 162 return error; 163 164 s = pserialize_read_enter(); 165 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 166 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 167 error = EINVAL; 168 goto out; 169 } 170 if (sin6->sin6_addr.s6_addr32[3]) { 171 struct sockaddr_in sin; 172 173 memset(&sin, 0, sizeof(sin)); 174 sin.sin_len = sizeof(sin); 175 sin.sin_family = AF_INET; 176 bcopy(&sin6->sin6_addr.s6_addr32[3], 177 &sin.sin_addr, sizeof(sin.sin_addr)); 178 if (!IN_MULTICAST(sin.sin_addr.s_addr)) { 179 struct ifaddr *ifa; 180 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 181 if (ifa == NULL && 182 (inp->inp_flags & IN6P_BINDANY) == 0) { 183 error = EADDRNOTAVAIL; 184 goto out; 185 } 186 } 187 } 188 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 189 // succeed 190 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 191 struct ifaddr *ifa = NULL; 192 193 if ((inp->inp_flags & IN6P_FAITH) == 0) { 194 ifa = ifa_ifwithaddr(sin6tosa(sin6)); 195 if (ifa == NULL && 196 (inp->inp_flags & IN6P_BINDANY) == 0) { 197 error = EADDRNOTAVAIL; 198 goto out; 199 } 200 } 201 202 /* 203 * bind to an anycast address might accidentally 204 * cause sending a packet with an anycast source 205 * address, so we forbid it. 206 * 207 * We should allow to bind to a deprecated address, 208 * since the application dare to use it. 209 * But, can we assume that they are careful enough 210 * to check if the address is deprecated or not? 211 * Maybe, as a safeguard, we should have a setsockopt 212 * flag to control the bind(2) behavior against 213 * deprecated addresses (default: forbid bind(2)). 214 */ 215 if (ifa && 216 ifatoia6(ifa)->ia6_flags & 217 (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED)) { 218 error = EADDRNOTAVAIL; 219 goto out; 220 } 221 } 222 in6p_laddr(inp) = sin6->sin6_addr; 223 error = 0; 224 out: 225 pserialize_read_exit(s); 226 return error; 227 } 228 229 /* 230 * Bind port from sin6 to inp. 231 */ 232 static int 233 in6pcb_bind_port(struct inpcb *inp, struct sockaddr_in6 *sin6, struct lwp *l) 234 { 235 struct inpcbtable *table = inp->inp_table; 236 struct socket *so = inp->inp_socket; 237 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 238 int error; 239 240 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 241 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 242 (so->so_options & SO_ACCEPTCONN) == 0)) 243 wild = 1; 244 245 if (sin6->sin6_port != 0) { 246 enum kauth_network_req req; 247 248 #ifndef IPNOPRIVPORTS 249 if (ntohs(sin6->sin6_port) < IPV6PORT_RESERVED) 250 req = KAUTH_REQ_NETWORK_BIND_PRIVPORT; 251 else 252 #endif /* IPNOPRIVPORTS */ 253 req = KAUTH_REQ_NETWORK_BIND_PORT; 254 255 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND, 256 req, so, sin6, NULL); 257 if (error) 258 return EACCES; 259 } 260 261 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 262 /* 263 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 264 * allow compepte duplication of binding if 265 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 266 * and a multicast address is bound on both 267 * new and duplicated sockets. 268 */ 269 if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT)) 270 reuseport = SO_REUSEADDR|SO_REUSEPORT; 271 } 272 273 if (sin6->sin6_port != 0) { 274 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 275 #ifdef INET 276 struct inpcb *t; 277 struct vestigial_inpcb vestige; 278 279 t = inpcb_lookup_local(table, 280 *(struct in_addr *)&sin6->sin6_addr.s6_addr32[3], 281 sin6->sin6_port, wild, &vestige); 282 if (t && (reuseport & t->inp_socket->so_options) == 0) 283 return EADDRINUSE; 284 if (!t 285 && vestige.valid 286 && !(reuseport && vestige.reuse_port)) 287 return EADDRINUSE; 288 #else 289 return EADDRNOTAVAIL; 290 #endif 291 } 292 293 { 294 struct inpcb *t; 295 struct vestigial_inpcb vestige; 296 297 t = in6pcb_lookup_local(table, &sin6->sin6_addr, 298 sin6->sin6_port, wild, &vestige); 299 if (t && (reuseport & t->inp_socket->so_options) == 0) 300 return EADDRINUSE; 301 if (!t 302 && vestige.valid 303 && !(reuseport && vestige.reuse_port)) 304 return EADDRINUSE; 305 } 306 } 307 308 if (sin6->sin6_port == 0) { 309 int e; 310 e = in6pcb_set_port(sin6, inp, l); 311 if (e != 0) 312 return e; 313 } else { 314 inp->inp_lport = sin6->sin6_port; 315 inpcb_set_state(inp, INP_BOUND); 316 } 317 318 LIST_REMOVE(inp, inp_lhash); 319 LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, inp->inp_lport), 320 inp, inp_lhash); 321 322 return 0; 323 } 324 325 int 326 in6pcb_bind(void *v, struct sockaddr_in6 *sin6, struct lwp *l) 327 { 328 struct inpcb *inp = v; 329 struct sockaddr_in6 lsin6; 330 int error; 331 332 if (inp->inp_af != AF_INET6) 333 return EINVAL; 334 335 /* 336 * If we already have a local port or a local address it means we're 337 * bounded. 338 */ 339 if (inp->inp_lport || !(IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) || 340 (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) && 341 in6p_laddr(inp).s6_addr32[3] == 0))) 342 return EINVAL; 343 344 if (NULL != sin6) { 345 /* We were provided a sockaddr_in6 to use. */ 346 if (sin6->sin6_len != sizeof(*sin6)) 347 return EINVAL; 348 } else { 349 /* We always bind to *something*, even if it's "anything". */ 350 lsin6 = *((const struct sockaddr_in6 *) 351 inp->inp_socket->so_proto->pr_domain->dom_sa_any); 352 sin6 = &lsin6; 353 } 354 355 /* Bind address. */ 356 error = in6pcb_bind_addr(inp, sin6, l); 357 if (error) 358 return error; 359 360 /* Bind port. */ 361 error = in6pcb_bind_port(inp, sin6, l); 362 if (error) { 363 /* 364 * Reset the address here to "any" so we don't "leak" the 365 * inpcb. 366 */ 367 in6p_laddr(inp) = in6addr_any; 368 369 return error; 370 } 371 372 373 #if 0 374 in6p_flowinfo(inp) = 0; /* XXX */ 375 #endif 376 return 0; 377 } 378 379 /* 380 * Connect from a socket to a specified address. 381 * Both address and port must be specified in argument sin6. 382 * If don't have a local address for this socket yet, 383 * then pick one. 384 */ 385 int 386 in6pcb_connect(void *v, struct sockaddr_in6 *sin6, struct lwp *l) 387 { 388 struct inpcb *inp = v; 389 struct in6_addr *in6a = NULL; 390 struct in6_addr ia6; 391 struct ifnet *ifp = NULL; /* outgoing interface */ 392 int error = 0; 393 int scope_ambiguous = 0; 394 #ifdef INET 395 struct in6_addr mapped; 396 #endif 397 struct sockaddr_in6 tmp; 398 struct vestigial_inpcb vestige; 399 struct psref psref; 400 int bound; 401 402 (void)&in6a; /* XXX fool gcc */ 403 404 if (inp->inp_af != AF_INET6) 405 return EINVAL; 406 407 if (sin6->sin6_len != sizeof(*sin6)) 408 return EINVAL; 409 if (sin6->sin6_family != AF_INET6) 410 return EAFNOSUPPORT; 411 if (sin6->sin6_port == 0) 412 return EADDRNOTAVAIL; 413 414 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && 415 inp->inp_socket->so_type == SOCK_STREAM) 416 return EADDRNOTAVAIL; 417 418 if (sin6->sin6_scope_id == 0 && !ip6_use_defzone) 419 scope_ambiguous = 1; 420 if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0) 421 return error; 422 423 /* sanity check for mapped address case */ 424 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 425 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 426 return EINVAL; 427 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp))) 428 in6p_laddr(inp).s6_addr16[5] = htons(0xffff); 429 if (!IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp))) 430 return EINVAL; 431 } else 432 { 433 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp))) 434 return EINVAL; 435 } 436 437 /* protect *sin6 from overwrites */ 438 tmp = *sin6; 439 sin6 = &tmp; 440 441 bound = curlwp_bind(); 442 /* Source address selection. */ 443 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) && 444 in6p_laddr(inp).s6_addr32[3] == 0) { 445 #ifdef INET 446 struct sockaddr_in sin; 447 struct in_ifaddr *ia4; 448 struct psref _psref; 449 450 memset(&sin, 0, sizeof(sin)); 451 sin.sin_len = sizeof(sin); 452 sin.sin_family = AF_INET; 453 memcpy(&sin.sin_addr, &sin6->sin6_addr.s6_addr32[3], 454 sizeof(sin.sin_addr)); 455 ia4 = in_selectsrc(&sin, &inp->inp_route, 456 inp->inp_socket->so_options, NULL, &error, &_psref); 457 if (ia4 == NULL) { 458 if (error == 0) 459 error = EADDRNOTAVAIL; 460 curlwp_bindx(bound); 461 return error; 462 } 463 memset(&mapped, 0, sizeof(mapped)); 464 mapped.s6_addr16[5] = htons(0xffff); 465 memcpy(&mapped.s6_addr32[3], &IA_SIN(ia4)->sin_addr, 466 sizeof(IA_SIN(ia4)->sin_addr)); 467 ia4_release(ia4, &_psref); 468 in6a = &mapped; 469 #else 470 curlwp_bindx(bound); 471 return EADDRNOTAVAIL; 472 #endif 473 } else { 474 /* 475 * XXX: in6_selectsrc might replace the bound local address 476 * with the address specified by setsockopt(IPV6_PKTINFO). 477 * Is it the intended behavior? 478 */ 479 error = in6_selectsrc(sin6, in6p_outputopts(inp), 480 in6p_moptions(inp), &inp->inp_route, &in6p_laddr(inp), 481 &ifp, &psref, &ia6); 482 if (error == 0) 483 in6a = &ia6; 484 if (ifp && scope_ambiguous && 485 (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { 486 if_put(ifp, &psref); 487 curlwp_bindx(bound); 488 return error; 489 } 490 491 if (in6a == NULL) { 492 if_put(ifp, &psref); 493 curlwp_bindx(bound); 494 if (error == 0) 495 error = EADDRNOTAVAIL; 496 return error; 497 } 498 } 499 500 if (ifp != NULL) { 501 in6p_ip6(inp).ip6_hlim = (u_int8_t)in6pcb_selecthlim(inp, ifp); 502 if_put(ifp, &psref); 503 } else 504 in6p_ip6(inp).ip6_hlim = (u_int8_t)in6pcb_selecthlim_rt(inp); 505 curlwp_bindx(bound); 506 507 if (in6pcb_lookup(inp->inp_table, &sin6->sin6_addr, 508 sin6->sin6_port, 509 IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) ? in6a : &in6p_laddr(inp), 510 inp->inp_lport, 0, &vestige) 511 || vestige.valid) 512 return EADDRINUSE; 513 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) || 514 (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) && 515 in6p_laddr(inp).s6_addr32[3] == 0)) 516 { 517 if (inp->inp_lport == 0) { 518 error = in6pcb_bind(inp, NULL, l); 519 if (error != 0) 520 return error; 521 } 522 in6p_laddr(inp) = *in6a; 523 } 524 in6p_faddr(inp) = sin6->sin6_addr; 525 inp->inp_fport = sin6->sin6_port; 526 527 /* Late bind, if needed */ 528 if (inp->inp_bindportonsend) { 529 struct sockaddr_in6 lsin = *((const struct sockaddr_in6 *) 530 inp->inp_socket->so_proto->pr_domain->dom_sa_any); 531 lsin.sin6_addr = in6p_laddr(inp); 532 lsin.sin6_port = 0; 533 534 if ((error = in6pcb_bind_port(inp, &lsin, l)) != 0) 535 return error; 536 } 537 538 inpcb_set_state(inp, INP_CONNECTED); 539 in6p_flowinfo(inp) &= ~IPV6_FLOWLABEL_MASK; 540 if (ip6_auto_flowlabel) 541 in6p_flowinfo(inp) |= 542 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 543 #if defined(IPSEC) 544 if (ipsec_enabled && inp->inp_socket->so_type == SOCK_STREAM) 545 ipsec_pcbconn(inp->inp_sp); 546 #endif 547 return 0; 548 } 549 550 void 551 in6pcb_disconnect(struct inpcb *inp) 552 { 553 memset((void *)&in6p_faddr(inp), 0, sizeof(in6p_faddr(inp))); 554 inp->inp_fport = 0; 555 inpcb_set_state(inp, INP_BOUND); 556 in6p_flowinfo(inp) &= ~IPV6_FLOWLABEL_MASK; 557 #if defined(IPSEC) 558 if (ipsec_enabled) 559 ipsec_pcbdisconn(inp->inp_sp); 560 #endif 561 if (inp->inp_socket->so_state & SS_NOFDREF) 562 inpcb_destroy(inp); 563 } 564 565 void 566 in6pcb_fetch_sockaddr(struct inpcb *inp, struct sockaddr_in6 *sin6) 567 { 568 569 if (inp->inp_af != AF_INET6) 570 return; 571 572 sockaddr_in6_init(sin6, &in6p_laddr(inp), inp->inp_lport, 0, 0); 573 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ 574 } 575 576 void 577 in6pcb_fetch_peeraddr(struct inpcb *inp, struct sockaddr_in6 *sin6) 578 { 579 580 if (inp->inp_af != AF_INET6) 581 return; 582 583 sockaddr_in6_init(sin6, &in6p_faddr(inp), inp->inp_fport, 0, 0); 584 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ 585 } 586 587 /* 588 * Pass some notification to all connections of a protocol 589 * associated with address dst. The local address and/or port numbers 590 * may be specified to limit the search. The "usual action" will be 591 * taken, depending on the ctlinput cmd. The caller must filter any 592 * cmds that are uninteresting (e.g., no error in the map). 593 * Call the protocol specific routine (if any) to report 594 * any errors for each matching socket. 595 * 596 * Must be called at splsoftnet. 597 * 598 * Note: src (4th arg) carries the flowlabel value on the original IPv6 599 * header, in sin6_flowinfo member. 600 */ 601 int 602 in6pcb_notify(struct inpcbtable *table, const struct sockaddr *dst, 603 u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, 604 void *cmdarg, void (*notify)(struct inpcb *, int)) 605 { 606 struct inpcb *inp; 607 struct sockaddr_in6 sa6_src; 608 const struct sockaddr_in6 *sa6_dst; 609 in_port_t fport = fport_arg, lport = lport_arg; 610 int errno; 611 int nmatch = 0; 612 u_int32_t flowinfo; 613 614 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 615 return 0; 616 617 sa6_dst = (const struct sockaddr_in6 *)dst; 618 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 619 return 0; 620 621 /* 622 * note that src can be NULL when we get notify by local fragmentation. 623 */ 624 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 625 flowinfo = sa6_src.sin6_flowinfo; 626 627 /* 628 * Redirects go to all references to the destination, 629 * and use in6pcb_rtchange to invalidate the route cache. 630 * Dead host indications: also use in6pcb_rtchange to invalidate 631 * the cache, and deliver the error to all the sockets. 632 * Otherwise, if we have knowledge of the local port and address, 633 * deliver only to that socket. 634 */ 635 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 636 fport = 0; 637 lport = 0; 638 memset((void *)&sa6_src.sin6_addr, 0, sizeof(sa6_src.sin6_addr)); 639 640 if (cmd != PRC_HOSTDEAD) 641 notify = in6pcb_rtchange; 642 } 643 644 errno = inet6ctlerrmap[cmd]; 645 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 646 struct rtentry *rt = NULL; 647 648 if (inp->inp_af != AF_INET6) 649 continue; 650 651 /* 652 * Under the following condition, notify of redirects 653 * to the pcb, without making address matches against inpcb. 654 * - redirect notification is arrived. 655 * - the inpcb is unconnected. 656 * - the inpcb is caching !RTF_HOST routing entry. 657 * - the ICMPv6 notification is from the gateway cached in the 658 * inpcb. i.e. ICMPv6 notification is from nexthop gateway 659 * the inpcb used very recently. 660 * 661 * This is to improve interaction between netbsd/openbsd 662 * redirect handling code, and inpcb route cache code. 663 * without the clause, !RTF_HOST routing entry (which carries 664 * gateway used by inpcb right before the ICMPv6 redirect) 665 * will be cached forever in unconnected inpcb. 666 * 667 * There still is a question regarding to what is TRT: 668 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be 669 * generated on packet output. inpcb will always cache 670 * RTF_HOST routing entry so there's no need for the clause 671 * (ICMPv6 redirect will update RTF_HOST routing entry, 672 * and inpcb is caching it already). 673 * However, bsdi/freebsd are vulnerable to local DoS attacks 674 * due to the cloned routing entries. 675 * - Specwise, "destination cache" is mentioned in RFC2461. 676 * Jinmei says that it implies bsdi/freebsd behavior, itojun 677 * is not really convinced. 678 * - Having hiwat/lowat on # of cloned host route (redirect/ 679 * pmtud) may be a good idea. netbsd/openbsd has it. see 680 * icmp6_mtudisc_update(). 681 */ 682 if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) && 683 IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) && 684 (rt = rtcache_validate(&inp->inp_route)) != NULL && 685 !(rt->rt_flags & RTF_HOST)) { 686 const struct sockaddr_in6 *dst6; 687 688 dst6 = (const struct sockaddr_in6 *) 689 rtcache_getdst(&inp->inp_route); 690 if (dst6 == NULL) 691 ; 692 else if (IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, 693 &sa6_dst->sin6_addr)) { 694 rtcache_unref(rt, &inp->inp_route); 695 goto do_notify; 696 } 697 } 698 rtcache_unref(rt, &inp->inp_route); 699 700 /* 701 * If the error designates a new path MTU for a destination 702 * and the application (associated with this socket) wanted to 703 * know the value, notify. Note that we notify for all 704 * disconnected sockets if the corresponding application 705 * wanted. This is because some UDP applications keep sending 706 * sockets disconnected. 707 * XXX: should we avoid to notify the value to TCP sockets? 708 */ 709 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 710 (IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp)) || 711 IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp), &sa6_dst->sin6_addr))) { 712 ip6_notify_pmtu(inp, (const struct sockaddr_in6 *)dst, 713 (u_int32_t *)cmdarg); 714 } 715 716 /* 717 * Detect if we should notify the error. If no source and 718 * destination ports are specified, but non-zero flowinfo and 719 * local address match, notify the error. This is the case 720 * when the error is delivered with an encrypted buffer 721 * by ESP. Otherwise, just compare addresses and ports 722 * as usual. 723 */ 724 if (lport == 0 && fport == 0 && flowinfo && 725 inp->inp_socket != NULL && 726 flowinfo == (in6p_flowinfo(inp) & IPV6_FLOWLABEL_MASK) && 727 IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &sa6_src.sin6_addr)) 728 goto do_notify; 729 else if (!IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp), 730 &sa6_dst->sin6_addr) || 731 inp->inp_socket == NULL || 732 (lport && inp->inp_lport != lport) || 733 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 734 !IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), 735 &sa6_src.sin6_addr)) || 736 (fport && inp->inp_fport != fport)) 737 continue; 738 739 do_notify: 740 if (notify) 741 (*notify)(inp, errno); 742 nmatch++; 743 } 744 return nmatch; 745 } 746 747 void 748 in6pcb_purgeif0(struct inpcbtable *table, struct ifnet *ifp) 749 { 750 struct inpcb *inp; 751 struct ip6_moptions *im6o; 752 struct in6_multi_mship *imm, *nimm; 753 754 KASSERT(ifp != NULL); 755 756 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 757 bool need_unlock = false; 758 if (inp->inp_af != AF_INET6) 759 continue; 760 761 /* The caller holds either one of inps' lock */ 762 if (!inp_locked(inp)) { 763 inp_lock(inp); 764 need_unlock = true; 765 } 766 im6o = in6p_moptions(inp); 767 if (im6o) { 768 /* 769 * Unselect the outgoing interface if it is being 770 * detached. 771 */ 772 if (im6o->im6o_multicast_if_index == ifp->if_index) 773 im6o->im6o_multicast_if_index = 0; 774 775 /* 776 * Drop multicast group membership if we joined 777 * through the interface being detached. 778 * XXX controversial - is it really legal for kernel 779 * to force this? 780 */ 781 LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, 782 i6mm_chain, nimm) { 783 if (imm->i6mm_maddr->in6m_ifp == ifp) { 784 LIST_REMOVE(imm, i6mm_chain); 785 in6_leavegroup(imm); 786 } 787 } 788 } 789 790 in_purgeifmcast(inp->inp_moptions, ifp); 791 792 if (need_unlock) 793 inp_unlock(inp); 794 } 795 } 796 797 void 798 in6pcb_purgeif(struct inpcbtable *table, struct ifnet *ifp) 799 { 800 struct rtentry *rt; 801 struct inpcb *inp; 802 803 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 804 if (inp->inp_af != AF_INET6) 805 continue; 806 if ((rt = rtcache_validate(&inp->inp_route)) != NULL && 807 rt->rt_ifp == ifp) { 808 rtcache_unref(rt, &inp->inp_route); 809 in6pcb_rtchange(inp, 0); 810 } else 811 rtcache_unref(rt, &inp->inp_route); 812 } 813 } 814 815 /* 816 * After a routing change, flush old routing. A new route can be 817 * allocated the next time output is attempted. 818 */ 819 void 820 in6pcb_rtchange(struct inpcb *inp, int errno) 821 { 822 if (inp->inp_af != AF_INET6) 823 return; 824 825 rtcache_free(&inp->inp_route); 826 /* 827 * A new route can be allocated the next time 828 * output is attempted. 829 */ 830 } 831 832 struct inpcb * 833 in6pcb_lookup_local(struct inpcbtable *table, struct in6_addr *laddr6, 834 u_int lport_arg, int lookup_wildcard, struct vestigial_inpcb *vp) 835 { 836 struct inpcbhead *head; 837 struct inpcb *inp, *match = NULL; 838 int matchwild = 3, wildcard; 839 in_port_t lport = lport_arg; 840 841 if (vp) 842 vp->valid = 0; 843 844 head = IN6PCBHASH_PORT(table, lport); 845 LIST_FOREACH(inp, head, inp_lhash) { 846 if (inp->inp_af != AF_INET6) 847 continue; 848 849 if (inp->inp_lport != lport) 850 continue; 851 wildcard = 0; 852 if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) { 853 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 854 continue; 855 } 856 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp))) 857 wildcard++; 858 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp))) { 859 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 860 continue; 861 if (!IN6_IS_ADDR_V4MAPPED(laddr6)) 862 continue; 863 864 /* duplicate of IPv4 logic */ 865 wildcard = 0; 866 if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp)) && 867 in6p_faddr(inp).s6_addr32[3]) 868 wildcard++; 869 if (!in6p_laddr(inp).s6_addr32[3]) { 870 if (laddr6->s6_addr32[3]) 871 wildcard++; 872 } else { 873 if (!laddr6->s6_addr32[3]) 874 wildcard++; 875 else { 876 if (in6p_laddr(inp).s6_addr32[3] != 877 laddr6->s6_addr32[3]) 878 continue; 879 } 880 } 881 } else if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp))) { 882 if (IN6_IS_ADDR_V4MAPPED(laddr6)) { 883 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 884 continue; 885 } 886 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 887 wildcard++; 888 } else { 889 if (IN6_IS_ADDR_V4MAPPED(laddr6)) { 890 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 891 continue; 892 } 893 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 894 wildcard++; 895 else { 896 if (!IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), 897 laddr6)) 898 continue; 899 } 900 } 901 if (wildcard && !lookup_wildcard) 902 continue; 903 if (wildcard < matchwild) { 904 match = inp; 905 matchwild = wildcard; 906 if (matchwild == 0) 907 break; 908 } 909 } 910 if (match && matchwild == 0) 911 return match; 912 913 if (vp && table->vestige && table->vestige->init_ports6) { 914 struct vestigial_inpcb better; 915 bool has_better = false; 916 void *state; 917 918 state = (*table->vestige->init_ports6)(laddr6, 919 lport_arg, 920 lookup_wildcard); 921 while (table->vestige 922 && (*table->vestige->next_port6)(state, vp)) { 923 924 if (vp->lport != lport) 925 continue; 926 wildcard = 0; 927 if (!IN6_IS_ADDR_UNSPECIFIED(&vp->faddr.v6)) 928 wildcard++; 929 if (IN6_IS_ADDR_UNSPECIFIED(&vp->laddr.v6)) { 930 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 931 wildcard++; 932 } else { 933 if (IN6_IS_ADDR_V4MAPPED(laddr6)) { 934 if (vp->v6only) 935 continue; 936 } 937 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 938 wildcard++; 939 else { 940 if (!IN6_ARE_ADDR_EQUAL(&vp->laddr.v6, laddr6)) 941 continue; 942 } 943 } 944 if (wildcard && !lookup_wildcard) 945 continue; 946 if (wildcard < matchwild) { 947 better = *vp; 948 has_better = true; 949 950 matchwild = wildcard; 951 if (matchwild == 0) 952 break; 953 } 954 } 955 956 if (has_better) { 957 *vp = better; 958 return 0; 959 } 960 } 961 return match; 962 } 963 964 /* 965 * WARNING: return value (rtentry) could be IPv4 one if inpcb is connected to 966 * IPv4 mapped address. 967 */ 968 struct rtentry * 969 in6pcb_rtentry(struct inpcb *inp) 970 { 971 struct rtentry *rt; 972 struct route *ro; 973 union { 974 const struct sockaddr *sa; 975 const struct sockaddr_in6 *sa6; 976 #ifdef INET 977 const struct sockaddr_in *sa4; 978 #endif 979 } cdst; 980 981 ro = &inp->inp_route; 982 983 if (inp->inp_af != AF_INET6) 984 return NULL; 985 986 cdst.sa = rtcache_getdst(ro); 987 if (cdst.sa == NULL) 988 ; 989 #ifdef INET 990 else if (cdst.sa->sa_family == AF_INET) { 991 KASSERT(IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))); 992 if (cdst.sa4->sin_addr.s_addr != in6p_faddr(inp).s6_addr32[3]) 993 rtcache_free(ro); 994 } 995 #endif 996 else { 997 if (!IN6_ARE_ADDR_EQUAL(&cdst.sa6->sin6_addr, 998 &in6p_faddr(inp))) 999 rtcache_free(ro); 1000 } 1001 if ((rt = rtcache_validate(ro)) == NULL) 1002 rt = rtcache_update(ro, 1); 1003 #ifdef INET 1004 if (rt == NULL && IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) { 1005 union { 1006 struct sockaddr dst; 1007 struct sockaddr_in dst4; 1008 } u; 1009 struct in_addr addr; 1010 1011 addr.s_addr = in6p_faddr(inp).s6_addr32[3]; 1012 1013 sockaddr_in_init(&u.dst4, &addr, 0); 1014 if (rtcache_setdst(ro, &u.dst) != 0) 1015 return NULL; 1016 1017 rt = rtcache_init(ro); 1018 } else 1019 #endif 1020 if (rt == NULL && !IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp))) { 1021 union { 1022 struct sockaddr dst; 1023 struct sockaddr_in6 dst6; 1024 } u; 1025 1026 sockaddr_in6_init(&u.dst6, &in6p_faddr(inp), 0, 0, 0); 1027 if (rtcache_setdst(ro, &u.dst) != 0) 1028 return NULL; 1029 1030 rt = rtcache_init(ro); 1031 } 1032 return rt; 1033 } 1034 1035 void 1036 in6pcb_rtentry_unref(struct rtentry *rt, struct inpcb *inp) 1037 { 1038 1039 rtcache_unref(rt, &inp->inp_route); 1040 } 1041 1042 struct inpcb * 1043 in6pcb_lookup(struct inpcbtable *table, const struct in6_addr *faddr6, 1044 u_int fport_arg, const struct in6_addr *laddr6, u_int lport_arg, 1045 int faith, 1046 struct vestigial_inpcb *vp) 1047 { 1048 struct inpcbhead *head; 1049 struct inpcb *inp; 1050 in_port_t fport = fport_arg, lport = lport_arg; 1051 1052 if (vp) 1053 vp->valid = 0; 1054 1055 head = IN6PCBHASH_CONNECT(table, faddr6, fport, laddr6, lport); 1056 LIST_FOREACH(inp, head, inp_hash) { 1057 if (inp->inp_af != AF_INET6) 1058 continue; 1059 1060 /* find exact match on both source and dest */ 1061 if (inp->inp_fport != fport) 1062 continue; 1063 if (inp->inp_lport != lport) 1064 continue; 1065 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp))) 1066 continue; 1067 if (!IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp), faddr6)) 1068 continue; 1069 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp))) 1070 continue; 1071 if (!IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), laddr6)) 1072 continue; 1073 if ((IN6_IS_ADDR_V4MAPPED(laddr6) || 1074 IN6_IS_ADDR_V4MAPPED(faddr6)) && 1075 (inp->inp_flags & IN6P_IPV6_V6ONLY)) 1076 continue; 1077 return inp; 1078 } 1079 if (vp && table->vestige) { 1080 if ((*table->vestige->lookup6)(faddr6, fport_arg, 1081 laddr6, lport_arg, vp)) 1082 return NULL; 1083 } 1084 1085 return NULL; 1086 } 1087 1088 struct inpcb * 1089 in6pcb_lookup_bound(struct inpcbtable *table, const struct in6_addr *laddr6, 1090 u_int lport_arg, int faith) 1091 { 1092 struct inpcbhead *head; 1093 struct inpcb *inp; 1094 in_port_t lport = lport_arg; 1095 #ifdef INET 1096 struct in6_addr zero_mapped; 1097 #endif 1098 1099 head = IN6PCBHASH_BIND(table, laddr6, lport); 1100 LIST_FOREACH(inp, head, inp_hash) { 1101 if (inp->inp_af != AF_INET6) 1102 continue; 1103 1104 if (faith && (inp->inp_flags & IN6P_FAITH) == 0) 1105 continue; 1106 if (inp->inp_fport != 0) 1107 continue; 1108 if (inp->inp_lport != lport) 1109 continue; 1110 if (IN6_IS_ADDR_V4MAPPED(laddr6) && 1111 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 1112 continue; 1113 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), laddr6)) 1114 goto out; 1115 } 1116 #ifdef INET 1117 if (IN6_IS_ADDR_V4MAPPED(laddr6)) { 1118 memset(&zero_mapped, 0, sizeof(zero_mapped)); 1119 zero_mapped.s6_addr16[5] = 0xffff; 1120 head = IN6PCBHASH_BIND(table, &zero_mapped, lport); 1121 LIST_FOREACH(inp, head, inp_hash) { 1122 if (inp->inp_af != AF_INET6) 1123 continue; 1124 1125 if (faith && (inp->inp_flags & IN6P_FAITH) == 0) 1126 continue; 1127 if (inp->inp_fport != 0) 1128 continue; 1129 if (inp->inp_lport != lport) 1130 continue; 1131 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 1132 continue; 1133 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &zero_mapped)) 1134 goto out; 1135 } 1136 } 1137 #endif 1138 head = IN6PCBHASH_BIND(table, &zeroin6_addr, lport); 1139 LIST_FOREACH(inp, head, inp_hash) { 1140 if (inp->inp_af != AF_INET6) 1141 continue; 1142 1143 if (faith && (inp->inp_flags & IN6P_FAITH) == 0) 1144 continue; 1145 if (inp->inp_fport != 0) 1146 continue; 1147 if (inp->inp_lport != lport) 1148 continue; 1149 if (IN6_IS_ADDR_V4MAPPED(laddr6) && 1150 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 1151 continue; 1152 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &zeroin6_addr)) 1153 goto out; 1154 } 1155 return NULL; 1156 1157 out: 1158 if (inp != LIST_FIRST(head)) { 1159 LIST_REMOVE(inp, inp_hash); 1160 LIST_INSERT_HEAD(head, inp, inp_hash); 1161 } 1162 return inp; 1163 } 1164 1165 void 1166 in6pcb_set_state(struct inpcb *inp, int state) 1167 { 1168 1169 if (inp->inp_af != AF_INET6) 1170 return; 1171 1172 if (inp->inp_state > INP_ATTACHED) 1173 LIST_REMOVE(inp, inp_hash); 1174 1175 switch (state) { 1176 case INP_BOUND: 1177 LIST_INSERT_HEAD(IN6PCBHASH_BIND(inp->inp_table, 1178 &in6p_laddr(inp), inp->inp_lport), inp, 1179 inp_hash); 1180 break; 1181 case INP_CONNECTED: 1182 LIST_INSERT_HEAD(IN6PCBHASH_CONNECT(inp->inp_table, 1183 &in6p_faddr(inp), inp->inp_fport, 1184 &in6p_laddr(inp), inp->inp_lport), inp, 1185 inp_hash); 1186 break; 1187 } 1188 1189 inp->inp_state = state; 1190 } 1191