1 /* $NetBSD: in_pcb.c,v 1.202 2022/11/04 09:05:41 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998, 2011 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Coyote Point Systems, Inc. 38 * This code is derived from software contributed to The NetBSD Foundation 39 * by Public Access Networks Corporation ("Panix"). It was developed under 40 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 52 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 53 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 54 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 55 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 56 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 57 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 58 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 59 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 60 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 61 * POSSIBILITY OF SUCH DAMAGE. 62 */ 63 64 /* 65 * Copyright (c) 1982, 1986, 1991, 1993, 1995 66 * The Regents of the University of California. All rights reserved. 67 * 68 * Redistribution and use in source and binary forms, with or without 69 * modification, are permitted provided that the following conditions 70 * are met: 71 * 1. Redistributions of source code must retain the above copyright 72 * notice, this list of conditions and the following disclaimer. 73 * 2. Redistributions in binary form must reproduce the above copyright 74 * notice, this list of conditions and the following disclaimer in the 75 * documentation and/or other materials provided with the distribution. 76 * 3. Neither the name of the University nor the names of its contributors 77 * may be used to endorse or promote products derived from this software 78 * without specific prior written permission. 79 * 80 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 81 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 82 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 83 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 84 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 85 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 86 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 87 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 88 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 89 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 90 * SUCH DAMAGE. 91 * 92 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 93 */ 94 95 #include <sys/cdefs.h> 96 __KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.202 2022/11/04 09:05:41 ozaki-r Exp $"); 97 98 #ifdef _KERNEL_OPT 99 #include "opt_inet.h" 100 #include "opt_ipsec.h" 101 #endif 102 103 #include <sys/param.h> 104 #include <sys/systm.h> 105 #include <sys/mbuf.h> 106 #include <sys/socket.h> 107 #include <sys/socketvar.h> 108 #include <sys/ioctl.h> 109 #include <sys/errno.h> 110 #include <sys/time.h> 111 #include <sys/once.h> 112 #include <sys/pool.h> 113 #include <sys/proc.h> 114 #include <sys/kauth.h> 115 #include <sys/uidinfo.h> 116 #include <sys/domain.h> 117 118 #include <net/if.h> 119 #include <net/route.h> 120 121 #include <netinet/in.h> 122 #include <netinet/in_systm.h> 123 #include <netinet/ip.h> 124 #include <netinet/in_pcb.h> 125 #include <netinet/in_var.h> 126 #include <netinet/ip_var.h> 127 #include <netinet/portalgo.h> 128 129 #ifdef INET6 130 #include <netinet/ip6.h> 131 #include <netinet6/ip6_var.h> 132 #include <netinet6/in6_pcb.h> 133 #endif 134 135 #ifdef IPSEC 136 #include <netipsec/ipsec.h> 137 #include <netipsec/key.h> 138 #endif /* IPSEC */ 139 140 #include <netinet/tcp_vtw.h> 141 142 struct in_addr zeroin_addr; 143 144 #define INPCBHASH_PORT(table, lport) \ 145 &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash] 146 #define INPCBHASH_BIND(table, laddr, lport) \ 147 &(table)->inpt_bindhashtbl[ \ 148 ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash] 149 #define INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \ 150 &(table)->inpt_connecthashtbl[ \ 151 ((ntohl((faddr).s_addr) + ntohs(fport)) + \ 152 (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash] 153 154 int anonportmin = IPPORT_ANONMIN; 155 int anonportmax = IPPORT_ANONMAX; 156 int lowportmin = IPPORT_RESERVEDMIN; 157 int lowportmax = IPPORT_RESERVEDMAX; 158 159 static pool_cache_t in4pcb_pool_cache; 160 #ifdef INET6 161 static pool_cache_t in6pcb_pool_cache; 162 #endif 163 164 static int 165 inpcb_poolinit(void) 166 { 167 168 in4pcb_pool_cache = pool_cache_init(sizeof(struct in4pcb), coherency_unit, 169 0, 0, "in4pcbpl", NULL, IPL_NET, NULL, NULL, NULL); 170 #ifdef INET6 171 in6pcb_pool_cache = pool_cache_init(sizeof(struct in6pcb), coherency_unit, 172 0, 0, "in6pcbpl", NULL, IPL_NET, NULL, NULL, NULL); 173 #endif 174 return 0; 175 } 176 177 void 178 inpcb_init(struct inpcbtable *table, int bindhashsize, int connecthashsize) 179 { 180 static ONCE_DECL(control); 181 182 TAILQ_INIT(&table->inpt_queue); 183 table->inpt_porthashtbl = hashinit(bindhashsize, HASH_LIST, true, 184 &table->inpt_porthash); 185 table->inpt_bindhashtbl = hashinit(bindhashsize, HASH_LIST, true, 186 &table->inpt_bindhash); 187 table->inpt_connecthashtbl = hashinit(connecthashsize, HASH_LIST, true, 188 &table->inpt_connecthash); 189 table->inpt_lastlow = IPPORT_RESERVEDMAX; 190 table->inpt_lastport = (in_port_t)anonportmax; 191 192 RUN_ONCE(&control, inpcb_poolinit); 193 } 194 195 /* 196 * inpcb_create: construct a new PCB and associated with a given socket. 197 * Sets the PCB state to INP_ATTACHED and makes PCB globally visible. 198 */ 199 int 200 inpcb_create(struct socket *so, void *v) 201 { 202 struct inpcbtable *table = v; 203 struct inpcb *inp; 204 int s; 205 206 #ifdef INET6 207 KASSERT(soaf(so) == AF_INET || soaf(so) == AF_INET6); 208 209 if (soaf(so) == AF_INET) 210 inp = pool_cache_get(in4pcb_pool_cache, PR_NOWAIT); 211 else 212 inp = pool_cache_get(in6pcb_pool_cache, PR_NOWAIT); 213 #else 214 KASSERT(soaf(so) == AF_INET); 215 inp = pool_cache_get(in4pcb_pool_cache, PR_NOWAIT); 216 #endif 217 if (inp == NULL) 218 return ENOBUFS; 219 if (soaf(so) == AF_INET) 220 memset(inp, 0, sizeof(struct in4pcb)); 221 #ifdef INET6 222 else 223 memset(inp, 0, sizeof(struct in6pcb)); 224 #endif 225 inp->inp_af = soaf(so); 226 inp->inp_table = table; 227 inp->inp_socket = so; 228 inp->inp_portalgo = PORTALGO_DEFAULT; 229 inp->inp_bindportonsend = false; 230 231 if (inp->inp_af == AF_INET) { 232 in4p_errormtu(inp) = -1; 233 in4p_prefsrcip(inp).s_addr = INADDR_ANY; 234 } 235 #ifdef INET6 236 else { 237 in6p_hops6(inp) = -1; /* use kernel default */ 238 if (ip6_v6only) 239 inp->inp_flags |= IN6P_IPV6_V6ONLY; 240 } 241 #endif 242 #if defined(IPSEC) 243 if (ipsec_enabled) { 244 int error = ipsec_init_pcbpolicy(so, &inp->inp_sp); 245 if (error != 0) { 246 #ifdef INET6 247 if (inp->inp_af == AF_INET) 248 pool_cache_put(in4pcb_pool_cache, inp); 249 else 250 pool_cache_put(in6pcb_pool_cache, inp); 251 #else 252 KASSERT(inp->inp_af == AF_INET); 253 pool_cache_put(in4pcb_pool_cache, inp); 254 #endif 255 return error; 256 } 257 inp->inp_sp->sp_inp = inp; 258 } 259 #endif 260 so->so_pcb = inp; 261 s = splsoftnet(); 262 TAILQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); 263 LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), inp, 264 inp_lhash); 265 inpcb_set_state(inp, INP_ATTACHED); 266 splx(s); 267 return 0; 268 } 269 270 static int 271 inpcb_set_port(struct sockaddr_in *sin, struct inpcb *inp, kauth_cred_t cred) 272 { 273 struct inpcbtable *table = inp->inp_table; 274 struct socket *so = inp->inp_socket; 275 in_port_t *lastport; 276 in_port_t lport = 0; 277 enum kauth_network_req req; 278 int error; 279 280 if (inp->inp_flags & INP_LOWPORT) { 281 #ifndef IPNOPRIVPORTS 282 req = KAUTH_REQ_NETWORK_BIND_PRIVPORT; 283 #else 284 req = KAUTH_REQ_NETWORK_BIND_PORT; 285 #endif 286 287 lastport = &table->inpt_lastlow; 288 } else { 289 req = KAUTH_REQ_NETWORK_BIND_PORT; 290 291 lastport = &table->inpt_lastport; 292 } 293 294 /* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */ 295 error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req, so, sin, 296 NULL); 297 if (error) 298 return EACCES; 299 300 /* 301 * Use RFC6056 randomized port selection 302 */ 303 error = portalgo_randport(&lport, inp, cred); 304 if (error) 305 return error; 306 307 inp->inp_flags |= INP_ANONPORT; 308 *lastport = lport; 309 lport = htons(lport); 310 inp->inp_lport = lport; 311 inpcb_set_state(inp, INP_BOUND); 312 313 return 0; 314 } 315 316 int 317 inpcb_bindableaddr(const struct inpcb *inp, struct sockaddr_in *sin, 318 kauth_cred_t cred) 319 { 320 int error = EADDRNOTAVAIL; 321 struct ifaddr *ifa = NULL; 322 int s; 323 324 if (sin->sin_family != AF_INET) 325 return EAFNOSUPPORT; 326 327 s = pserialize_read_enter(); 328 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 329 /* Always succeed; port reuse handled in inpcb_bind_port(). */ 330 } else if (!in_nullhost(sin->sin_addr)) { 331 struct in_ifaddr *ia; 332 333 ia = in_get_ia(sin->sin_addr); 334 /* check for broadcast addresses */ 335 if (ia == NULL) { 336 ifa = ifa_ifwithaddr(sintosa(sin)); 337 if (ifa != NULL) 338 ia = ifatoia(ifa); 339 else if ((inp->inp_flags & INP_BINDANY) != 0) { 340 error = 0; 341 goto error; 342 } 343 } 344 if (ia == NULL) 345 goto error; 346 if (ia->ia4_flags & IN_IFF_DUPLICATED) 347 goto error; 348 } 349 error = 0; 350 error: 351 pserialize_read_exit(s); 352 return error; 353 } 354 355 static int 356 inpcb_bind_addr(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred) 357 { 358 int error; 359 360 error = inpcb_bindableaddr(inp, sin, cred); 361 if (error == 0) 362 in4p_laddr(inp) = sin->sin_addr; 363 return error; 364 } 365 366 static int 367 inpcb_bind_port(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred) 368 { 369 struct inpcbtable *table = inp->inp_table; 370 struct socket *so = inp->inp_socket; 371 int reuseport = (so->so_options & SO_REUSEPORT); 372 int wild = 0, error; 373 374 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 375 /* 376 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 377 * allow complete duplication of binding if 378 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 379 * and a multicast address is bound on both 380 * new and duplicated sockets. 381 */ 382 if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT)) 383 reuseport = SO_REUSEADDR|SO_REUSEPORT; 384 } 385 386 if (sin->sin_port == 0) { 387 error = inpcb_set_port(sin, inp, cred); 388 if (error) 389 return error; 390 } else { 391 struct inpcb *t; 392 vestigial_inpcb_t vestige; 393 #ifdef INET6 394 struct inpcb *t6; 395 struct in6_addr mapped; 396 #endif 397 enum kauth_network_req req; 398 399 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 400 wild = 1; 401 402 #ifndef IPNOPRIVPORTS 403 if (ntohs(sin->sin_port) < IPPORT_RESERVED) 404 req = KAUTH_REQ_NETWORK_BIND_PRIVPORT; 405 else 406 #endif /* !IPNOPRIVPORTS */ 407 req = KAUTH_REQ_NETWORK_BIND_PORT; 408 409 error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req, 410 so, sin, NULL); 411 if (error) 412 return EACCES; 413 414 #ifdef INET6 415 in6_in_2_v4mapin6(&sin->sin_addr, &mapped); 416 t6 = in6pcb_lookup_local(table, &mapped, sin->sin_port, wild, &vestige); 417 if (t6 && (reuseport & t6->inp_socket->so_options) == 0) 418 return EADDRINUSE; 419 if (!t6 && vestige.valid) { 420 if (!!reuseport != !!vestige.reuse_port) { 421 return EADDRINUSE; 422 } 423 } 424 #endif 425 426 /* XXX-kauth */ 427 if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) { 428 t = inpcb_lookup_local(table, sin->sin_addr, sin->sin_port, 1, &vestige); 429 /* 430 * XXX: investigate ramifications of loosening this 431 * restriction so that as long as both ports have 432 * SO_REUSEPORT allow the bind 433 */ 434 if (t && 435 (!in_nullhost(sin->sin_addr) || 436 !in_nullhost(in4p_laddr(t)) || 437 (t->inp_socket->so_options & SO_REUSEPORT) == 0) 438 && (so->so_uidinfo->ui_uid != t->inp_socket->so_uidinfo->ui_uid)) { 439 return EADDRINUSE; 440 } 441 if (!t && vestige.valid) { 442 if ((!in_nullhost(sin->sin_addr) 443 || !in_nullhost(vestige.laddr.v4) 444 || !vestige.reuse_port) 445 && so->so_uidinfo->ui_uid != vestige.uid) { 446 return EADDRINUSE; 447 } 448 } 449 } 450 t = inpcb_lookup_local(table, sin->sin_addr, sin->sin_port, wild, &vestige); 451 if (t && (reuseport & t->inp_socket->so_options) == 0) 452 return EADDRINUSE; 453 if (!t 454 && vestige.valid 455 && !(reuseport && vestige.reuse_port)) 456 return EADDRINUSE; 457 458 inp->inp_lport = sin->sin_port; 459 inpcb_set_state(inp, INP_BOUND); 460 } 461 462 LIST_REMOVE(inp, inp_lhash); 463 LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), inp, 464 inp_lhash); 465 466 return 0; 467 } 468 469 /* 470 * inpcb_bind: assign a local IP address and port number to the PCB. 471 * 472 * If the address is not a wildcard, verify that it corresponds to a 473 * local interface. If a port is specified and it is privileged, then 474 * check the permission. Check whether the address or port is in use, 475 * and if so, whether we can re-use them. 476 */ 477 int 478 inpcb_bind(void *v, struct sockaddr_in *sin, struct lwp *l) 479 { 480 struct inpcb *inp = v; 481 struct sockaddr_in lsin; 482 int error; 483 484 if (inp->inp_af != AF_INET) 485 return EINVAL; 486 487 if (inp->inp_lport || !in_nullhost(in4p_laddr(inp))) 488 return EINVAL; 489 490 if (NULL != sin) { 491 if (sin->sin_len != sizeof(*sin)) 492 return EINVAL; 493 } else { 494 lsin = *((const struct sockaddr_in *) 495 inp->inp_socket->so_proto->pr_domain->dom_sa_any); 496 sin = &lsin; 497 } 498 499 /* Bind address. */ 500 error = inpcb_bind_addr(inp, sin, l->l_cred); 501 if (error) 502 return error; 503 504 /* Bind port. */ 505 error = inpcb_bind_port(inp, sin, l->l_cred); 506 if (error) { 507 in4p_laddr(inp).s_addr = INADDR_ANY; 508 509 return error; 510 } 511 512 return 0; 513 } 514 515 /* 516 * inpcb_connect: connect from a socket to a specified address, i.e., 517 * assign a foreign IP address and port number to the PCB. 518 * 519 * Both address and port must be specified in the name argument. 520 * If there is no local address for this socket yet, then pick one. 521 */ 522 int 523 inpcb_connect(void *v, struct sockaddr_in *sin, struct lwp *l) 524 { 525 struct inpcb *inp = v; 526 vestigial_inpcb_t vestige; 527 int error; 528 struct in_addr laddr; 529 530 if (inp->inp_af != AF_INET) 531 return EINVAL; 532 533 if (sin->sin_len != sizeof (*sin)) 534 return EINVAL; 535 if (sin->sin_family != AF_INET) 536 return EAFNOSUPPORT; 537 if (sin->sin_port == 0) 538 return EADDRNOTAVAIL; 539 540 if (IN_MULTICAST(sin->sin_addr.s_addr) && 541 inp->inp_socket->so_type == SOCK_STREAM) 542 return EADDRNOTAVAIL; 543 544 if (!IN_ADDRLIST_READER_EMPTY()) { 545 /* 546 * If the destination address is INADDR_ANY, 547 * use any local address (likely loopback). 548 * If the supplied address is INADDR_BROADCAST, 549 * use the broadcast address of an interface 550 * which supports broadcast. (loopback does not) 551 */ 552 553 if (in_nullhost(sin->sin_addr)) { 554 /* XXX racy */ 555 sin->sin_addr = 556 IN_ADDRLIST_READER_FIRST()->ia_addr.sin_addr; 557 } else if (sin->sin_addr.s_addr == INADDR_BROADCAST) { 558 struct in_ifaddr *ia; 559 int s = pserialize_read_enter(); 560 IN_ADDRLIST_READER_FOREACH(ia) { 561 if (ia->ia_ifp->if_flags & IFF_BROADCAST) { 562 sin->sin_addr = 563 ia->ia_broadaddr.sin_addr; 564 break; 565 } 566 } 567 pserialize_read_exit(s); 568 } 569 } 570 /* 571 * If we haven't bound which network number to use as ours, 572 * we will use the number of the outgoing interface. 573 * This depends on having done a routing lookup, which 574 * we will probably have to do anyway, so we might 575 * as well do it now. On the other hand if we are 576 * sending to multiple destinations we may have already 577 * done the lookup, so see if we can use the route 578 * from before. In any case, we only 579 * chose a port number once, even if sending to multiple 580 * destinations. 581 */ 582 if (in_nullhost(in4p_laddr(inp))) { 583 int xerror; 584 struct in_ifaddr *ia, *_ia; 585 int s; 586 struct psref psref; 587 int bound; 588 589 bound = curlwp_bind(); 590 ia = in_selectsrc(sin, &inp->inp_route, 591 inp->inp_socket->so_options, inp->inp_moptions, &xerror, 592 &psref); 593 if (ia == NULL) { 594 curlwp_bindx(bound); 595 if (xerror == 0) 596 xerror = EADDRNOTAVAIL; 597 return xerror; 598 } 599 s = pserialize_read_enter(); 600 _ia = in_get_ia(IA_SIN(ia)->sin_addr); 601 if (_ia == NULL && (inp->inp_flags & INP_BINDANY) == 0) { 602 pserialize_read_exit(s); 603 ia4_release(ia, &psref); 604 curlwp_bindx(bound); 605 return EADDRNOTAVAIL; 606 } 607 pserialize_read_exit(s); 608 laddr = IA_SIN(ia)->sin_addr; 609 ia4_release(ia, &psref); 610 curlwp_bindx(bound); 611 } else 612 laddr = in4p_laddr(inp); 613 if (inpcb_lookup(inp->inp_table, sin->sin_addr, sin->sin_port, 614 laddr, inp->inp_lport, &vestige) != NULL || 615 vestige.valid) { 616 return EADDRINUSE; 617 } 618 if (in_nullhost(in4p_laddr(inp))) { 619 if (inp->inp_lport == 0) { 620 error = inpcb_bind(inp, NULL, l); 621 /* 622 * This used to ignore the return value 623 * completely, but we need to check for 624 * ephemeral port shortage. 625 * And attempts to request low ports if not root. 626 */ 627 if (error != 0) 628 return error; 629 } 630 in4p_laddr(inp) = laddr; 631 } 632 in4p_faddr(inp) = sin->sin_addr; 633 inp->inp_fport = sin->sin_port; 634 635 /* Late bind, if needed */ 636 if (inp->inp_bindportonsend) { 637 struct sockaddr_in lsin = *((const struct sockaddr_in *) 638 inp->inp_socket->so_proto->pr_domain->dom_sa_any); 639 lsin.sin_addr = in4p_laddr(inp); 640 lsin.sin_port = 0; 641 642 if ((error = inpcb_bind_port(inp, &lsin, l->l_cred)) != 0) 643 return error; 644 } 645 646 inpcb_set_state(inp, INP_CONNECTED); 647 #if defined(IPSEC) 648 if (ipsec_enabled && inp->inp_socket->so_type == SOCK_STREAM) 649 ipsec_pcbconn(inp->inp_sp); 650 #endif 651 return 0; 652 } 653 654 /* 655 * inpcb_disconnect: remove any foreign IP/port association. 656 * 657 * Note: destroys the PCB if socket was closed. 658 */ 659 void 660 inpcb_disconnect(void *v) 661 { 662 struct inpcb *inp = v; 663 664 if (inp->inp_af != AF_INET) 665 return; 666 667 in4p_faddr(inp) = zeroin_addr; 668 inp->inp_fport = 0; 669 inpcb_set_state(inp, INP_BOUND); 670 #if defined(IPSEC) 671 if (ipsec_enabled) 672 ipsec_pcbdisconn(inp->inp_sp); 673 #endif 674 if (inp->inp_socket->so_state & SS_NOFDREF) 675 inpcb_destroy(inp); 676 } 677 678 /* 679 * inpcb_destroy: destroy PCB as well as the associated socket. 680 */ 681 void 682 inpcb_destroy(void *v) 683 { 684 struct inpcb *inp = v; 685 struct socket *so = inp->inp_socket; 686 int s; 687 688 KASSERT(inp->inp_af == AF_INET || inp->inp_af == AF_INET6); 689 690 #if defined(IPSEC) 691 if (ipsec_enabled) 692 ipsec_delete_pcbpolicy(inp); 693 #endif 694 so->so_pcb = NULL; 695 696 s = splsoftnet(); 697 inpcb_set_state(inp, INP_ATTACHED); 698 LIST_REMOVE(inp, inp_lhash); 699 TAILQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); 700 splx(s); 701 702 if (inp->inp_options) { 703 m_free(inp->inp_options); 704 } 705 rtcache_free(&inp->inp_route); 706 ip_freemoptions(inp->inp_moptions); 707 #ifdef INET6 708 if (inp->inp_af == AF_INET6) { 709 if (in6p_outputopts(inp) != NULL) { 710 ip6_clearpktopts(in6p_outputopts(inp), -1); 711 free(in6p_outputopts(inp), M_IP6OPT); 712 } 713 ip6_freemoptions(in6p_moptions(inp)); 714 } 715 #endif 716 sofree(so); /* drops the socket's lock */ 717 718 #ifdef INET6 719 if (inp->inp_af == AF_INET) 720 pool_cache_put(in4pcb_pool_cache, inp); 721 else 722 pool_cache_put(in6pcb_pool_cache, inp); 723 #else 724 KASSERT(inp->inp_af == AF_INET); 725 pool_cache_put(in4pcb_pool_cache, inp); 726 #endif 727 mutex_enter(softnet_lock); /* reacquire the softnet_lock */ 728 } 729 730 /* 731 * inpcb_fetch_sockaddr: fetch the local IP address and port number. 732 */ 733 void 734 inpcb_fetch_sockaddr(struct inpcb *inp, struct sockaddr_in *sin) 735 { 736 737 if (inp->inp_af != AF_INET) 738 return; 739 740 sockaddr_in_init(sin, &in4p_laddr(inp), inp->inp_lport); 741 } 742 743 /* 744 * inpcb_fetch_peeraddr: fetch the foreign IP address and port number. 745 */ 746 void 747 inpcb_fetch_peeraddr(struct inpcb *inp, struct sockaddr_in *sin) 748 { 749 750 if (inp->inp_af != AF_INET) 751 return; 752 753 sockaddr_in_init(sin, &in4p_faddr(inp), inp->inp_fport); 754 } 755 756 /* 757 * inpcb_notify: pass some notification to all connections of a protocol 758 * associated with destination address. The local address and/or port 759 * numbers may be specified to limit the search. The "usual action" will 760 * be taken, depending on the command. 761 * 762 * The caller must filter any commands that are not interesting (e.g., 763 * no error in the map). Call the protocol specific routine (if any) to 764 * report any errors for each matching socket. 765 * 766 * Must be called at splsoftnet. 767 */ 768 int 769 inpcb_notify(struct inpcbtable *table, struct in_addr faddr, u_int fport_arg, 770 struct in_addr laddr, u_int lport_arg, int errno, 771 void (*notify)(struct inpcb *, int)) 772 { 773 struct inpcbhead *head; 774 struct inpcb *inp; 775 in_port_t fport = fport_arg, lport = lport_arg; 776 int nmatch; 777 778 if (in_nullhost(faddr) || notify == NULL) 779 return 0; 780 781 nmatch = 0; 782 head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport); 783 LIST_FOREACH(inp, head, inp_hash) { 784 if (inp->inp_af != AF_INET) 785 continue; 786 787 if (in_hosteq(in4p_faddr(inp), faddr) && 788 inp->inp_fport == fport && 789 inp->inp_lport == lport && 790 in_hosteq(in4p_laddr(inp), laddr)) { 791 (*notify)(inp, errno); 792 nmatch++; 793 } 794 } 795 return nmatch; 796 } 797 798 void 799 inpcb_notifyall(struct inpcbtable *table, struct in_addr faddr, int errno, 800 void (*notify)(struct inpcb *, int)) 801 { 802 struct inpcb *inp; 803 804 if (in_nullhost(faddr) || notify == NULL) 805 return; 806 807 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 808 if (inp->inp_af != AF_INET) 809 continue; 810 if (in_hosteq(in4p_faddr(inp), faddr)) 811 (*notify)(inp, errno); 812 } 813 } 814 815 void 816 in_purgeifmcast(struct ip_moptions *imo, struct ifnet *ifp) 817 { 818 int i, gap; 819 820 /* The owner of imo should be protected by solock */ 821 KASSERT(ifp != NULL); 822 823 if (imo == NULL) 824 return; 825 826 /* 827 * Unselect the outgoing interface if it is being 828 * detached. 829 */ 830 if (imo->imo_multicast_if_index == ifp->if_index) 831 imo->imo_multicast_if_index = 0; 832 833 /* 834 * Drop multicast group membership if we joined 835 * through the interface being detached. 836 */ 837 for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { 838 if (imo->imo_membership[i]->inm_ifp == ifp) { 839 in_delmulti(imo->imo_membership[i]); 840 gap++; 841 } else if (gap != 0) 842 imo->imo_membership[i - gap] = imo->imo_membership[i]; 843 } 844 imo->imo_num_memberships -= gap; 845 } 846 847 void 848 inpcb_purgeif0(struct inpcbtable *table, struct ifnet *ifp) 849 { 850 struct inpcb *inp; 851 852 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 853 bool need_unlock = false; 854 855 if (inp->inp_af != AF_INET) 856 continue; 857 858 /* The caller holds either one of inps' lock */ 859 if (!inp_locked(inp)) { 860 inp_lock(inp); 861 need_unlock = true; 862 } 863 864 in_purgeifmcast(inp->inp_moptions, ifp); 865 866 if (need_unlock) 867 inp_unlock(inp); 868 } 869 } 870 871 void 872 inpcb_purgeif(struct inpcbtable *table, struct ifnet *ifp) 873 { 874 struct rtentry *rt; 875 struct inpcb *inp; 876 877 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 878 if (inp->inp_af != AF_INET) 879 continue; 880 if ((rt = rtcache_validate(&inp->inp_route)) != NULL && 881 rt->rt_ifp == ifp) { 882 rtcache_unref(rt, &inp->inp_route); 883 inpcb_rtchange(inp, 0); 884 } else 885 rtcache_unref(rt, &inp->inp_route); 886 } 887 } 888 889 /* 890 * inpcb_losing: check for alternatives when higher level complains about 891 * service problems. For now, invalidate cached routing information. 892 * If the route was created dynamically (by a redirect), time to try a 893 * default gateway again. 894 */ 895 void 896 inpcb_losing(struct inpcb *inp) 897 { 898 struct rtentry *rt; 899 struct rt_addrinfo info; 900 901 if (inp->inp_af != AF_INET) 902 return; 903 904 if ((rt = rtcache_validate(&inp->inp_route)) == NULL) 905 return; 906 907 memset(&info, 0, sizeof(info)); 908 info.rti_info[RTAX_DST] = rtcache_getdst(&inp->inp_route); 909 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 910 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 911 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 912 if (rt->rt_flags & RTF_DYNAMIC) { 913 int error; 914 struct rtentry *nrt; 915 916 error = rtrequest(RTM_DELETE, rt_getkey(rt), 917 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt); 918 rtcache_unref(rt, &inp->inp_route); 919 if (error == 0) { 920 rt_newmsg_dynamic(RTM_DELETE, nrt); 921 rt_free(nrt); 922 } 923 } else 924 rtcache_unref(rt, &inp->inp_route); 925 /* 926 * A new route can be allocated 927 * the next time output is attempted. 928 */ 929 rtcache_free(&inp->inp_route); 930 } 931 932 /* 933 * inpcb_rtchange: after a routing change, flush old routing. 934 * A new route can be allocated the next time output is attempted. 935 */ 936 void 937 inpcb_rtchange(struct inpcb *inp, int errno) 938 { 939 940 if (inp->inp_af != AF_INET) 941 return; 942 943 rtcache_free(&inp->inp_route); 944 945 /* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */ 946 } 947 948 /* 949 * inpcb_lookup_local: find a PCB by looking at the local port and matching 950 * the local address or resolving the wildcards. Primarily used to detect 951 * when the local address is already in use. 952 */ 953 struct inpcb * 954 inpcb_lookup_local(struct inpcbtable *table, struct in_addr laddr, 955 u_int lport_arg, int lookup_wildcard, vestigial_inpcb_t *vp) 956 { 957 struct inpcbhead *head; 958 struct inpcb *inp; 959 struct inpcb *match = NULL; 960 int matchwild = 3; 961 int wildcard; 962 in_port_t lport = lport_arg; 963 964 if (vp) 965 vp->valid = 0; 966 967 head = INPCBHASH_PORT(table, lport); 968 LIST_FOREACH(inp, head, inp_lhash) { 969 if (inp->inp_af != AF_INET) 970 continue; 971 if (inp->inp_lport != lport) 972 continue; 973 /* 974 * check if inp's faddr and laddr match with ours. 975 * our faddr is considered null. 976 * count the number of wildcard matches. (0 - 2) 977 * 978 * null null match 979 * A null wildcard match 980 * null B wildcard match 981 * A B non match 982 * A A match 983 */ 984 wildcard = 0; 985 if (!in_nullhost(in4p_faddr(inp))) 986 wildcard++; 987 if (in_nullhost(in4p_laddr(inp))) { 988 if (!in_nullhost(laddr)) 989 wildcard++; 990 } else { 991 if (in_nullhost(laddr)) 992 wildcard++; 993 else { 994 if (!in_hosteq(in4p_laddr(inp), laddr)) 995 continue; 996 } 997 } 998 if (wildcard && !lookup_wildcard) 999 continue; 1000 /* 1001 * prefer an address with less wildcards. 1002 */ 1003 if (wildcard < matchwild) { 1004 match = inp; 1005 matchwild = wildcard; 1006 if (matchwild == 0) 1007 break; 1008 } 1009 } 1010 if (match && matchwild == 0) 1011 return match; 1012 1013 if (vp && table->vestige) { 1014 void *state = (*table->vestige->init_ports4)(laddr, lport_arg, lookup_wildcard); 1015 vestigial_inpcb_t better; 1016 bool has_better = false; 1017 1018 while (table->vestige 1019 && (*table->vestige->next_port4)(state, vp)) { 1020 1021 if (vp->lport != lport) 1022 continue; 1023 wildcard = 0; 1024 if (!in_nullhost(vp->faddr.v4)) 1025 wildcard++; 1026 if (in_nullhost(vp->laddr.v4)) { 1027 if (!in_nullhost(laddr)) 1028 wildcard++; 1029 } else { 1030 if (in_nullhost(laddr)) 1031 wildcard++; 1032 else { 1033 if (!in_hosteq(vp->laddr.v4, laddr)) 1034 continue; 1035 } 1036 } 1037 if (wildcard && !lookup_wildcard) 1038 continue; 1039 if (wildcard < matchwild) { 1040 better = *vp; 1041 has_better = true; 1042 1043 matchwild = wildcard; 1044 if (matchwild == 0) 1045 break; 1046 } 1047 } 1048 1049 if (has_better) { 1050 *vp = better; 1051 return 0; 1052 } 1053 } 1054 1055 return match; 1056 } 1057 1058 #ifdef DIAGNOSTIC 1059 int inpcb_notifymiss = 0; 1060 #endif 1061 1062 /* 1063 * inpcb_lookup: perform a full 4-tuple PCB lookup. 1064 */ 1065 struct inpcb * 1066 inpcb_lookup(struct inpcbtable *table, 1067 struct in_addr faddr, u_int fport_arg, 1068 struct in_addr laddr, u_int lport_arg, 1069 vestigial_inpcb_t *vp) 1070 { 1071 struct inpcbhead *head; 1072 struct inpcb *inp; 1073 in_port_t fport = fport_arg, lport = lport_arg; 1074 1075 if (vp) 1076 vp->valid = 0; 1077 1078 head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport); 1079 LIST_FOREACH(inp, head, inp_hash) { 1080 if (inp->inp_af != AF_INET) 1081 continue; 1082 1083 if (in_hosteq(in4p_faddr(inp), faddr) && 1084 inp->inp_fport == fport && 1085 inp->inp_lport == lport && 1086 in_hosteq(in4p_laddr(inp), laddr)) 1087 goto out; 1088 } 1089 if (vp && table->vestige) { 1090 if ((*table->vestige->lookup4)(faddr, fport_arg, 1091 laddr, lport_arg, vp)) 1092 return 0; 1093 } 1094 1095 #ifdef DIAGNOSTIC 1096 if (inpcb_notifymiss) { 1097 printf("inpcb_lookup: faddr=%08x fport=%d laddr=%08x lport=%d\n", 1098 ntohl(faddr.s_addr), ntohs(fport), 1099 ntohl(laddr.s_addr), ntohs(lport)); 1100 } 1101 #endif 1102 return 0; 1103 1104 out: 1105 /* Move this PCB to the head of hash chain. */ 1106 if (inp != LIST_FIRST(head)) { 1107 LIST_REMOVE(inp, inp_hash); 1108 LIST_INSERT_HEAD(head, inp, inp_hash); 1109 } 1110 return inp; 1111 } 1112 1113 /* 1114 * inpcb_lookup_bound: find a PCB by looking at the local address and port. 1115 * Primarily used to find the listening (i.e., already bound) socket. 1116 */ 1117 struct inpcb * 1118 inpcb_lookup_bound(struct inpcbtable *table, 1119 struct in_addr laddr, u_int lport_arg) 1120 { 1121 struct inpcbhead *head; 1122 struct inpcb *inp; 1123 in_port_t lport = lport_arg; 1124 1125 head = INPCBHASH_BIND(table, laddr, lport); 1126 LIST_FOREACH(inp, head, inp_hash) { 1127 if (inp->inp_af != AF_INET) 1128 continue; 1129 1130 if (inp->inp_lport == lport && 1131 in_hosteq(in4p_laddr(inp), laddr)) 1132 goto out; 1133 } 1134 head = INPCBHASH_BIND(table, zeroin_addr, lport); 1135 LIST_FOREACH(inp, head, inp_hash) { 1136 if (inp->inp_af != AF_INET) 1137 continue; 1138 1139 if (inp->inp_lport == lport && 1140 in_hosteq(in4p_laddr(inp), zeroin_addr)) 1141 goto out; 1142 } 1143 #ifdef DIAGNOSTIC 1144 if (inpcb_notifymiss) { 1145 printf("inpcb_lookup_bound: laddr=%08x lport=%d\n", 1146 ntohl(laddr.s_addr), ntohs(lport)); 1147 } 1148 #endif 1149 return 0; 1150 1151 out: 1152 /* Move this PCB to the head of hash chain. */ 1153 if (inp != LIST_FIRST(head)) { 1154 LIST_REMOVE(inp, inp_hash); 1155 LIST_INSERT_HEAD(head, inp, inp_hash); 1156 } 1157 return inp; 1158 } 1159 1160 void 1161 inpcb_set_state(struct inpcb *inp, int state) 1162 { 1163 1164 #ifdef INET6 1165 if (inp->inp_af == AF_INET6) { 1166 in6pcb_set_state(inp, state); 1167 return; 1168 } 1169 #else 1170 if (inp->inp_af != AF_INET) 1171 return; 1172 #endif 1173 1174 if (inp->inp_state > INP_ATTACHED) 1175 LIST_REMOVE(inp, inp_hash); 1176 1177 switch (state) { 1178 case INP_BOUND: 1179 LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table, 1180 in4p_laddr(inp), inp->inp_lport), inp, 1181 inp_hash); 1182 break; 1183 case INP_CONNECTED: 1184 LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table, 1185 in4p_faddr(inp), inp->inp_fport, 1186 in4p_laddr(inp), inp->inp_lport), inp, 1187 inp_hash); 1188 break; 1189 } 1190 1191 inp->inp_state = state; 1192 } 1193 1194 struct rtentry * 1195 inpcb_rtentry(struct inpcb *inp) 1196 { 1197 struct route *ro; 1198 union { 1199 struct sockaddr dst; 1200 struct sockaddr_in dst4; 1201 } u; 1202 1203 #ifdef INET6 1204 if (inp->inp_af == AF_INET6) 1205 return in6pcb_rtentry(inp); 1206 #endif 1207 if (inp->inp_af != AF_INET) 1208 return NULL; 1209 1210 ro = &inp->inp_route; 1211 1212 sockaddr_in_init(&u.dst4, &in4p_faddr(inp), 0); 1213 return rtcache_lookup(ro, &u.dst); 1214 } 1215 1216 void 1217 inpcb_rtentry_unref(struct rtentry *rt, struct inpcb *inp) 1218 { 1219 1220 rtcache_unref(rt, &inp->inp_route); 1221 } 1222