1 /* $OpenBSD: in_pcb.c,v 1.157 2014/07/12 21:06:34 yasuoka Exp $ */ 2 /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "pf.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/proc.h> 80 #include <sys/domain.h> 81 #include <sys/pool.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 #include <net/pfvar.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/ip.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip_var.h> 93 #include <dev/rndvar.h> 94 95 #include <sys/mount.h> 96 #include <nfs/nfsproto.h> 97 98 #ifdef INET6 99 #include <netinet6/in6_var.h> 100 #include <netinet6/ip6_var.h> 101 #endif /* INET6 */ 102 #ifdef IPSEC 103 #include <netinet/ip_esp.h> 104 #endif /* IPSEC */ 105 106 struct in_addr zeroin_addr; 107 108 /* 109 * These configure the range of local port addresses assigned to 110 * "unspecified" outgoing connections/packets/whatever. 111 */ 112 int ipport_firstauto = IPPORT_RESERVED; 113 int ipport_lastauto = IPPORT_USERRESERVED; 114 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; 115 int ipport_hilastauto = IPPORT_HILASTAUTO; 116 117 struct baddynamicports baddynamicports; 118 struct pool inpcb_pool; 119 int inpcb_pool_initialized = 0; 120 121 int in_pcbresize (struct inpcbtable *, int); 122 123 #define INPCBHASH_LOADFACTOR(_x) (((_x) * 3) / 4) 124 125 #define INPCBHASH(table, faddr, fport, laddr, lport, rdom) \ 126 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \ 127 ntohs((fport)) + ntohs((lport)) + (rdom)) & (table->inpt_hash)] 128 129 #define IN6PCBHASH(table, faddr, fport, laddr, lport, rdom) \ 130 &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \ 131 (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport)) + (rdom)) & \ 132 (table->inpt_hash)] 133 134 #define INPCBLHASH(table, lport, rdom) \ 135 &(table)->inpt_lhashtbl[(ntohs((lport)) + (rdom)) & table->inpt_lhash] 136 137 void 138 in_pcbinit(struct inpcbtable *table, int hashsize) 139 { 140 141 TAILQ_INIT(&table->inpt_queue); 142 table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 143 &table->inpt_hash); 144 if (table->inpt_hashtbl == NULL) 145 panic("in_pcbinit: hashinit failed"); 146 table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 147 &table->inpt_lhash); 148 if (table->inpt_lhashtbl == NULL) 149 panic("in_pcbinit: hashinit failed for lport"); 150 table->inpt_lastport = 0; 151 table->inpt_count = 0; 152 } 153 154 /* 155 * Check if the specified port is invalid for dynamic allocation. 156 */ 157 int 158 in_baddynamic(u_int16_t port, u_int16_t proto) 159 { 160 switch (proto) { 161 case IPPROTO_TCP: 162 return (DP_ISSET(baddynamicports.tcp, port)); 163 case IPPROTO_UDP: 164 #ifdef IPSEC 165 /* Cannot preset this as it is a sysctl */ 166 if (port == udpencap_port) 167 return (1); 168 #endif 169 return (DP_ISSET(baddynamicports.udp, port)); 170 default: 171 return (0); 172 } 173 } 174 175 int 176 in_pcballoc(struct socket *so, struct inpcbtable *table) 177 { 178 struct inpcb *inp; 179 int s; 180 181 splsoftassert(IPL_SOFTNET); 182 183 if (inpcb_pool_initialized == 0) { 184 pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, 185 "inpcbpl", NULL); 186 inpcb_pool_initialized = 1; 187 } 188 inp = pool_get(&inpcb_pool, PR_NOWAIT|PR_ZERO); 189 if (inp == NULL) 190 return (ENOBUFS); 191 inp->inp_table = table; 192 inp->inp_socket = so; 193 inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT; 194 inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT; 195 inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT; 196 inp->inp_seclevel[SL_IPCOMP] = IPSEC_IPCOMP_LEVEL_DEFAULT; 197 inp->inp_rtableid = curproc->p_p->ps_rtableid; 198 s = splnet(); 199 if (table->inpt_hash != 0 && 200 table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_hash)) 201 (void)in_pcbresize(table, (table->inpt_hash + 1) * 2); 202 TAILQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); 203 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, 204 inp->inp_rtableid), inp, inp_lhash); 205 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport, 206 &inp->inp_laddr, inp->inp_lport, rtable_l2(inp->inp_rtableid)), 207 inp, inp_hash); 208 splx(s); 209 so->so_pcb = inp; 210 inp->inp_hops = -1; 211 212 #ifdef INET6 213 /* 214 * Small change in this function to set the INP_IPV6 flag so routines 215 * outside pcb-specific routines don't need to use sotopf(), and all 216 * of its pointer chasing, later. 217 */ 218 if (sotopf(so) == PF_INET6) 219 inp->inp_flags = INP_IPV6; 220 inp->inp_cksum6 = -1; 221 #endif /* INET6 */ 222 return (0); 223 } 224 225 int 226 in_pcbbind(struct inpcb *inp, struct mbuf *nam, struct proc *p) 227 { 228 struct socket *so = inp->inp_socket; 229 struct inpcbtable *table = inp->inp_table; 230 u_int16_t *lastport = &inp->inp_table->inpt_lastport; 231 struct sockaddr_in *sin; 232 u_int16_t lport = 0; 233 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 234 int error; 235 236 #ifdef INET6 237 if (sotopf(so) == PF_INET6) 238 return in6_pcbbind(inp, nam, p); 239 #endif /* INET6 */ 240 241 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 242 return (EINVAL); 243 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 244 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 245 (so->so_options & SO_ACCEPTCONN) == 0)) 246 wild = INPLOOKUP_WILDCARD; 247 if (nam) { 248 sin = mtod(nam, struct sockaddr_in *); 249 if (nam->m_len != sizeof (*sin)) 250 return (EINVAL); 251 #ifdef notdef 252 /* 253 * We should check the family, but old programs 254 * incorrectly fail to initialize it. 255 */ 256 if (sin->sin_family != AF_INET) 257 return (EAFNOSUPPORT); 258 #endif 259 lport = sin->sin_port; 260 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 261 /* 262 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 263 * allow complete duplication of binding if 264 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 265 * and a multicast address is bound on both 266 * new and duplicated sockets. 267 */ 268 if (so->so_options & SO_REUSEADDR) 269 reuseport = SO_REUSEADDR|SO_REUSEPORT; 270 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 271 sin->sin_port = 0; /* yech... */ 272 if (!((so->so_options & SO_BINDANY) || 273 (sin->sin_addr.s_addr == INADDR_BROADCAST && 274 so->so_type == SOCK_DGRAM))) { 275 struct in_ifaddr *ia; 276 277 ia = ifatoia(ifa_ifwithaddr(sintosa(sin), 278 inp->inp_rtableid)); 279 if (ia == NULL) 280 return (EADDRNOTAVAIL); 281 282 /* SOCK_RAW does not use in_pcbbind() */ 283 if (so->so_type != SOCK_DGRAM && 284 sin->sin_addr.s_addr != 285 ia->ia_addr.sin_addr.s_addr) 286 return (EADDRNOTAVAIL); 287 } 288 } 289 if (lport) { 290 struct inpcb *t; 291 292 /* GROSS */ 293 if (ntohs(lport) < IPPORT_RESERVED && 294 (error = suser(p, 0))) 295 return (EACCES); 296 if (so->so_euid) { 297 t = in_pcblookup(table, &zeroin_addr, 0, 298 &sin->sin_addr, lport, INPLOOKUP_WILDCARD, 299 inp->inp_rtableid); 300 if (t && (so->so_euid != t->inp_socket->so_euid)) 301 return (EADDRINUSE); 302 } 303 t = in_pcblookup(table, &zeroin_addr, 0, 304 &sin->sin_addr, lport, wild, inp->inp_rtableid); 305 if (t && (reuseport & t->inp_socket->so_options) == 0) 306 return (EADDRINUSE); 307 } 308 inp->inp_laddr = sin->sin_addr; 309 } 310 if (lport == 0) { 311 u_int16_t first, last; 312 int count; 313 314 if (inp->inp_flags & INP_HIGHPORT) { 315 first = ipport_hifirstauto; /* sysctl */ 316 last = ipport_hilastauto; 317 } else if (inp->inp_flags & INP_LOWPORT) { 318 if ((error = suser(p, 0))) 319 return (EACCES); 320 first = IPPORT_RESERVED-1; /* 1023 */ 321 last = 600; /* not IPPORT_RESERVED/2 */ 322 } else { 323 first = ipport_firstauto; /* sysctl */ 324 last = ipport_lastauto; 325 } 326 327 /* 328 * Simple check to ensure all ports are not used up causing 329 * a deadlock here. 330 * 331 * We split the two cases (up and down) so that the direction 332 * is not being tested on each round of the loop. 333 */ 334 335 if (first > last) { 336 /* 337 * counting down 338 */ 339 count = first - last; 340 if (count) 341 *lastport = first - arc4random_uniform(count); 342 343 do { 344 if (count-- < 0) /* completely used? */ 345 return (EADDRNOTAVAIL); 346 --*lastport; 347 if (*lastport > first || *lastport < last) 348 *lastport = first; 349 lport = htons(*lastport); 350 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 351 in_pcblookup(table, &zeroin_addr, 0, 352 &inp->inp_laddr, lport, wild, inp->inp_rtableid)); 353 } else { 354 /* 355 * counting up 356 */ 357 count = last - first; 358 if (count) 359 *lastport = first + arc4random_uniform(count); 360 361 do { 362 if (count-- < 0) /* completely used? */ 363 return (EADDRNOTAVAIL); 364 ++*lastport; 365 if (*lastport < first || *lastport > last) 366 *lastport = first; 367 lport = htons(*lastport); 368 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 369 in_pcblookup(table, &zeroin_addr, 0, 370 &inp->inp_laddr, lport, wild, inp->inp_rtableid)); 371 } 372 } 373 inp->inp_lport = lport; 374 in_pcbrehash(inp); 375 return (0); 376 } 377 378 /* 379 * Connect from a socket to a specified address. 380 * Both address and port must be specified in argument sin. 381 * If don't have a local address for this socket yet, 382 * then pick one. 383 */ 384 int 385 in_pcbconnect(struct inpcb *inp, struct mbuf *nam) 386 { 387 struct in_addr *ina = NULL; 388 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); 389 int error; 390 391 #ifdef INET6 392 if (sotopf(inp->inp_socket) == PF_INET6) 393 return (in6_pcbconnect(inp, nam)); 394 if ((inp->inp_flags & INP_IPV6) != 0) 395 panic("IPv6 pcb passed into in_pcbconnect"); 396 #endif /* INET6 */ 397 398 if (nam->m_len != sizeof (*sin)) 399 return (EINVAL); 400 if (sin->sin_family != AF_INET) 401 return (EAFNOSUPPORT); 402 if (sin->sin_port == 0) 403 return (EADDRNOTAVAIL); 404 405 error = in_selectsrc(&ina, sin, inp->inp_moptions, &inp->inp_route, 406 &inp->inp_laddr, inp->inp_rtableid); 407 if (error) 408 return (error); 409 410 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port, 411 *ina, inp->inp_lport, inp->inp_rtableid) != 0) 412 return (EADDRINUSE); 413 414 KASSERT(inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_lport); 415 416 if (inp->inp_laddr.s_addr == INADDR_ANY) { 417 if (inp->inp_lport == 0 && 418 in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL) 419 return (EADDRNOTAVAIL); 420 inp->inp_laddr = *ina; 421 } 422 inp->inp_faddr = sin->sin_addr; 423 inp->inp_fport = sin->sin_port; 424 in_pcbrehash(inp); 425 #ifdef IPSEC 426 { 427 /* Cause an IPsec SA to be established. */ 428 /* error is just ignored */ 429 ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT, 430 NULL, inp, NULL); 431 } 432 #endif 433 return (0); 434 } 435 436 void 437 in_pcbdisconnect(struct inpcb *inp) 438 { 439 switch (sotopf(inp->inp_socket)) { 440 #ifdef INET6 441 case PF_INET6: 442 inp->inp_faddr6 = in6addr_any; 443 break; 444 #endif 445 case PF_INET: 446 inp->inp_faddr.s_addr = INADDR_ANY; 447 break; 448 } 449 450 inp->inp_fport = 0; 451 in_pcbrehash(inp); 452 if (inp->inp_socket->so_state & SS_NOFDREF) 453 in_pcbdetach(inp); 454 } 455 456 void 457 in_pcbdetach(struct inpcb *inp) 458 { 459 struct socket *so = inp->inp_socket; 460 int s; 461 462 splsoftassert(IPL_SOFTNET); 463 464 so->so_pcb = 0; 465 sofree(so); 466 if (inp->inp_options) 467 m_freem(inp->inp_options); 468 if (inp->inp_route.ro_rt) 469 rtfree(inp->inp_route.ro_rt); 470 #ifdef INET6 471 if (inp->inp_flags & INP_IPV6) { 472 ip6_freepcbopts(inp->inp_outputopts6); 473 ip6_freemoptions(inp->inp_moptions6); 474 } else 475 #endif 476 ip_freemoptions(inp->inp_moptions); 477 #ifdef IPSEC 478 /* IPsec cleanup here */ 479 if (inp->inp_tdb_in) 480 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 481 inp, inp_tdb_in_next); 482 if (inp->inp_tdb_out) 483 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp, 484 inp_tdb_out_next); 485 if (inp->inp_ipsec_remotecred) 486 ipsp_reffree(inp->inp_ipsec_remotecred); 487 if (inp->inp_ipsec_remoteauth) 488 ipsp_reffree(inp->inp_ipsec_remoteauth); 489 if (inp->inp_ipo) 490 ipsec_delete_policy(inp->inp_ipo); 491 #endif 492 #if NPF > 0 493 if (inp->inp_pf_sk) { 494 struct pf_state_key *sk; 495 struct pf_state_item *si; 496 497 sk = inp->inp_pf_sk; 498 TAILQ_FOREACH(si, &sk->states, entry) 499 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 500 si->s->rule.ptr->divert.port) { 501 pf_unlink_state(si->s); 502 break; 503 } 504 /* pf_unlink_state() may have detached the state */ 505 if (inp->inp_pf_sk) 506 inp->inp_pf_sk->inp = NULL; 507 } 508 #endif 509 s = splnet(); 510 LIST_REMOVE(inp, inp_lhash); 511 LIST_REMOVE(inp, inp_hash); 512 TAILQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); 513 inp->inp_table->inpt_count--; 514 splx(s); 515 pool_put(&inpcb_pool, inp); 516 } 517 518 void 519 in_setsockaddr(struct inpcb *inp, struct mbuf *nam) 520 { 521 struct sockaddr_in *sin; 522 523 nam->m_len = sizeof (*sin); 524 sin = mtod(nam, struct sockaddr_in *); 525 bzero((caddr_t)sin, sizeof (*sin)); 526 sin->sin_family = AF_INET; 527 sin->sin_len = sizeof(*sin); 528 sin->sin_port = inp->inp_lport; 529 sin->sin_addr = inp->inp_laddr; 530 } 531 532 void 533 in_setpeeraddr(struct inpcb *inp, struct mbuf *nam) 534 { 535 struct sockaddr_in *sin; 536 537 #ifdef INET6 538 if (sotopf(inp->inp_socket) == PF_INET6) { 539 in6_setpeeraddr(inp, nam); 540 return; 541 } 542 #endif /* INET6 */ 543 544 nam->m_len = sizeof (*sin); 545 sin = mtod(nam, struct sockaddr_in *); 546 bzero((caddr_t)sin, sizeof (*sin)); 547 sin->sin_family = AF_INET; 548 sin->sin_len = sizeof(*sin); 549 sin->sin_port = inp->inp_fport; 550 sin->sin_addr = inp->inp_faddr; 551 } 552 553 /* 554 * Pass some notification to all connections of a protocol 555 * associated with address dst. The "usual action" will be 556 * taken, depending on the ctlinput cmd. The caller must filter any 557 * cmds that are uninteresting (e.g., no error in the map). 558 * Call the protocol specific routine (if any) to report 559 * any errors for each matching socket. 560 * 561 * Must be called at splsoftnet. 562 */ 563 void 564 in_pcbnotifyall(struct inpcbtable *table, struct sockaddr *dst, u_int rdomain, 565 int errno, void (*notify)(struct inpcb *, int)) 566 { 567 struct inpcb *inp, *ninp; 568 struct in_addr faddr; 569 570 splsoftassert(IPL_SOFTNET); 571 572 #ifdef INET6 573 /* 574 * See in6_pcbnotify() for IPv6 codepath. By the time this 575 * gets called, the addresses passed are either definitely IPv4 or 576 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. 577 */ 578 #endif /* INET6 */ 579 580 if (dst->sa_family != AF_INET) 581 return; 582 faddr = satosin(dst)->sin_addr; 583 if (faddr.s_addr == INADDR_ANY) 584 return; 585 586 rdomain = rtable_l2(rdomain); 587 TAILQ_FOREACH_SAFE(inp, &table->inpt_queue, inp_queue, ninp) { 588 #ifdef INET6 589 if (inp->inp_flags & INP_IPV6) 590 continue; 591 #endif 592 if (inp->inp_faddr.s_addr != faddr.s_addr || 593 rtable_l2(inp->inp_rtableid) != rdomain || 594 inp->inp_socket == 0) { 595 continue; 596 } 597 if (notify) 598 (*notify)(inp, errno); 599 } 600 } 601 602 /* 603 * Check for alternatives when higher level complains 604 * about service problems. For now, invalidate cached 605 * routing information. If the route was created dynamically 606 * (by a redirect), time to try a default gateway again. 607 */ 608 void 609 in_losing(struct inpcb *inp) 610 { 611 struct rtentry *rt; 612 struct rt_addrinfo info; 613 614 if ((rt = inp->inp_route.ro_rt)) { 615 inp->inp_route.ro_rt = 0; 616 bzero((caddr_t)&info, sizeof(info)); 617 info.rti_flags = rt->rt_flags; 618 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; 619 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 620 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 621 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0, 622 inp->inp_rtableid); 623 if (rt->rt_flags & RTF_DYNAMIC) 624 (void)rtrequest1(RTM_DELETE, &info, rt->rt_priority, 625 (struct rtentry **)0, inp->inp_rtableid); 626 /* 627 * A new route can be allocated 628 * the next time output is attempted. 629 * rtfree() needs to be called in anycase because the inp 630 * is still holding a reference to rt. 631 */ 632 rtfree(rt); 633 } 634 } 635 636 /* 637 * After a routing change, flush old routing 638 * and allocate a (hopefully) better one. 639 */ 640 void 641 in_rtchange(struct inpcb *inp, int errno) 642 { 643 if (inp->inp_route.ro_rt) { 644 rtfree(inp->inp_route.ro_rt); 645 inp->inp_route.ro_rt = 0; 646 /* 647 * A new route can be allocated the next time 648 * output is attempted. 649 */ 650 } 651 } 652 653 struct inpcb * 654 in_pcblookup(struct inpcbtable *table, void *faddrp, u_int fport_arg, 655 void *laddrp, u_int lport_arg, int flags, u_int rdomain) 656 { 657 struct inpcb *inp, *match = NULL; 658 int matchwild = 3, wildcard; 659 u_int16_t fport = fport_arg, lport = lport_arg; 660 struct in_addr faddr = *(struct in_addr *)faddrp; 661 struct in_addr laddr = *(struct in_addr *)laddrp; 662 663 rdomain = rtable_l2(rdomain); /* convert passed rtableid to rdomain */ 664 LIST_FOREACH(inp, INPCBLHASH(table, lport, rdomain), inp_lhash) { 665 if (rtable_l2(inp->inp_rtableid) != rdomain) 666 continue; 667 if (inp->inp_lport != lport) 668 continue; 669 wildcard = 0; 670 #ifdef INET6 671 if (flags & INPLOOKUP_IPV6) { 672 struct in6_addr *laddr6 = (struct in6_addr *)laddrp; 673 struct in6_addr *faddr6 = (struct in6_addr *)faddrp; 674 675 if (!(inp->inp_flags & INP_IPV6)) 676 continue; 677 678 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 679 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 680 wildcard++; 681 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6)) 682 continue; 683 } else { 684 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 685 wildcard++; 686 } 687 688 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { 689 if (IN6_IS_ADDR_UNSPECIFIED(faddr6)) 690 wildcard++; 691 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, 692 faddr6) || inp->inp_fport != fport) 693 continue; 694 } else { 695 if (!IN6_IS_ADDR_UNSPECIFIED(faddr6)) 696 wildcard++; 697 } 698 } else 699 #endif /* INET6 */ 700 { 701 #ifdef INET6 702 if (inp->inp_flags & INP_IPV6) 703 continue; 704 #endif /* INET6 */ 705 706 if (inp->inp_faddr.s_addr != INADDR_ANY) { 707 if (faddr.s_addr == INADDR_ANY) 708 wildcard++; 709 else if (inp->inp_faddr.s_addr != faddr.s_addr || 710 inp->inp_fport != fport) 711 continue; 712 } else { 713 if (faddr.s_addr != INADDR_ANY) 714 wildcard++; 715 } 716 if (inp->inp_laddr.s_addr != INADDR_ANY) { 717 if (laddr.s_addr == INADDR_ANY) 718 wildcard++; 719 else if (inp->inp_laddr.s_addr != laddr.s_addr) 720 continue; 721 } else { 722 if (laddr.s_addr != INADDR_ANY) 723 wildcard++; 724 } 725 } 726 if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) && 727 wildcard < matchwild) { 728 match = inp; 729 if ((matchwild = wildcard) == 0) 730 break; 731 } 732 } 733 return (match); 734 } 735 736 struct rtentry * 737 in_pcbrtentry(struct inpcb *inp) 738 { 739 struct route *ro; 740 741 ro = &inp->inp_route; 742 743 /* check if route is still valid */ 744 if (ro->ro_rt && (ro->ro_rt->rt_flags & RTF_UP) == 0) { 745 RTFREE(ro->ro_rt); 746 ro->ro_rt = NULL; 747 } 748 749 /* 750 * No route yet, so try to acquire one. 751 */ 752 if (ro->ro_rt == NULL) { 753 #ifdef INET6 754 bzero(ro, sizeof(struct route_in6)); 755 #else 756 bzero(ro, sizeof(struct route)); 757 #endif 758 759 switch(sotopf(inp->inp_socket)) { 760 #ifdef INET6 761 case PF_INET6: 762 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 763 break; 764 ro->ro_dst.sa_family = AF_INET6; 765 ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); 766 satosin6(&ro->ro_dst)->sin6_addr = inp->inp_faddr6; 767 ro->ro_tableid = inp->inp_rtableid; 768 rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0]); 769 break; 770 #endif /* INET6 */ 771 case PF_INET: 772 if (inp->inp_faddr.s_addr == INADDR_ANY) 773 break; 774 ro->ro_dst.sa_family = AF_INET; 775 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 776 satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; 777 ro->ro_tableid = inp->inp_rtableid; 778 rtalloc_mpath(ro, &inp->inp_laddr.s_addr); 779 break; 780 } 781 } 782 return (ro->ro_rt); 783 } 784 785 /* 786 * Return an IPv4 address, which is the most appropriate for a given 787 * destination. 788 * If necessary, this function lookups the routing table and returns 789 * an entry to the caller for later use. 790 */ 791 int 792 in_selectsrc(struct in_addr **insrc, struct sockaddr_in *sin, 793 struct ip_moptions *mopts, struct route *ro, struct in_addr *laddr, 794 u_int rtableid) 795 { 796 struct sockaddr_in *sin2; 797 struct in_ifaddr *ia = NULL; 798 799 /* 800 * If the source address is not specified but the socket(if any) 801 * is already bound, use the bound address. 802 */ 803 if (laddr && laddr->s_addr != INADDR_ANY) { 804 *insrc = laddr; 805 return (0); 806 } 807 808 /* 809 * If the destination address is multicast and an outgoing 810 * interface has been set as a multicast option, use the 811 * address of that interface as our source address. 812 */ 813 if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) { 814 struct ifnet *ifp; 815 816 ifp = mopts->imo_multicast_ifp; 817 if (ifp != NULL) { 818 if (ifp->if_rdomain == rtable_l2(rtableid)) 819 IFP_TO_IA(ifp, ia); 820 if (ia == NULL) 821 return (EADDRNOTAVAIL); 822 823 *insrc = &ia->ia_addr.sin_addr; 824 return (0); 825 } 826 } 827 /* 828 * If route is known or can be allocated now, 829 * our src addr is taken from the i/f, else punt. 830 */ 831 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 832 (satosin(&ro->ro_dst)->sin_addr.s_addr != sin->sin_addr.s_addr))) { 833 RTFREE(ro->ro_rt); 834 ro->ro_rt = NULL; 835 } 836 if ((ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { 837 /* No route yet, so try to acquire one */ 838 ro->ro_dst.sa_family = AF_INET; 839 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 840 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; 841 ro->ro_tableid = rtableid; 842 rtalloc_mpath(ro, NULL); 843 844 /* 845 * It is important to bzero out the rest of the 846 * struct sockaddr_in when mixing v6 & v4! 847 */ 848 sin2 = (struct sockaddr_in *)&ro->ro_dst; 849 bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); 850 } 851 /* 852 * If we found a route, use the address 853 * corresponding to the outgoing interface. 854 */ 855 if (ro->ro_rt && ro->ro_rt->rt_ifp) 856 ia = ifatoia(ro->ro_rt->rt_ifa); 857 858 if (ia == NULL) 859 return (EADDRNOTAVAIL); 860 861 *insrc = &ia->ia_addr.sin_addr; 862 return (0); 863 } 864 865 void 866 in_pcbrehash(struct inpcb *inp) 867 { 868 struct inpcbtable *table = inp->inp_table; 869 int s; 870 871 s = splnet(); 872 LIST_REMOVE(inp, inp_lhash); 873 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rtableid), 874 inp, inp_lhash); 875 LIST_REMOVE(inp, inp_hash); 876 #ifdef INET6 877 if (inp->inp_flags & INP_IPV6) { 878 LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6, 879 inp->inp_fport, &inp->inp_laddr6, inp->inp_lport, 880 rtable_l2(inp->inp_rtableid)), inp, inp_hash); 881 } else { 882 #endif /* INET6 */ 883 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, 884 inp->inp_fport, &inp->inp_laddr, inp->inp_lport, 885 rtable_l2(inp->inp_rtableid)), inp, inp_hash); 886 #ifdef INET6 887 } 888 #endif /* INET6 */ 889 splx(s); 890 } 891 892 int 893 in_pcbresize(struct inpcbtable *table, int hashsize) 894 { 895 u_long nhash, nlhash; 896 void *nhashtbl, *nlhashtbl, *ohashtbl, *olhashtbl; 897 struct inpcb *inp0, *inp1; 898 899 ohashtbl = table->inpt_hashtbl; 900 olhashtbl = table->inpt_lhashtbl; 901 902 nhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, &nhash); 903 nlhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, &nlhash); 904 if (nhashtbl == NULL || nlhashtbl == NULL) { 905 if (nhashtbl != NULL) 906 free(nhashtbl, M_PCB, 0); 907 if (nlhashtbl != NULL) 908 free(nlhashtbl, M_PCB, 0); 909 return (ENOBUFS); 910 } 911 table->inpt_hashtbl = nhashtbl; 912 table->inpt_lhashtbl = nlhashtbl; 913 table->inpt_hash = nhash; 914 table->inpt_lhash = nlhash; 915 916 TAILQ_FOREACH_SAFE(inp0, &table->inpt_queue, inp_queue, inp1) { 917 in_pcbrehash(inp0); 918 } 919 free(ohashtbl, M_PCB, 0); 920 free(olhashtbl, M_PCB, 0); 921 922 return (0); 923 } 924 925 #ifdef DIAGNOSTIC 926 int in_pcbnotifymiss = 0; 927 #endif 928 929 /* 930 * The in(6)_pcbhashlookup functions are used to locate connected sockets 931 * quickly: 932 * faddr.fport <-> laddr.lport 933 * No wildcard matching is done so that listening sockets are not found. 934 * If the functions return NULL in(6)_pcblookup_listen can be used to 935 * find a listening/bound socket that may accept the connection. 936 * After those two lookups no other are necessary. 937 */ 938 struct inpcb * 939 in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr, 940 u_int fport_arg, struct in_addr laddr, u_int lport_arg, u_int rdomain) 941 { 942 struct inpcbhead *head; 943 struct inpcb *inp; 944 u_int16_t fport = fport_arg, lport = lport_arg; 945 946 rdomain = rtable_l2(rdomain); /* convert passed rtableid to rdomain */ 947 head = INPCBHASH(table, &faddr, fport, &laddr, lport, rdomain); 948 LIST_FOREACH(inp, head, inp_hash) { 949 #ifdef INET6 950 if (inp->inp_flags & INP_IPV6) 951 continue; /*XXX*/ 952 #endif 953 if (inp->inp_faddr.s_addr == faddr.s_addr && 954 inp->inp_fport == fport && inp->inp_lport == lport && 955 inp->inp_laddr.s_addr == laddr.s_addr && 956 rtable_l2(inp->inp_rtableid) == rdomain) { 957 /* 958 * Move this PCB to the head of hash chain so that 959 * repeated accesses are quicker. This is analogous to 960 * the historic single-entry PCB cache. 961 */ 962 if (inp != LIST_FIRST(head)) { 963 LIST_REMOVE(inp, inp_hash); 964 LIST_INSERT_HEAD(head, inp, inp_hash); 965 } 966 break; 967 } 968 } 969 #ifdef DIAGNOSTIC 970 if (inp == NULL && in_pcbnotifymiss) { 971 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%d\n", 972 ntohl(faddr.s_addr), ntohs(fport), 973 ntohl(laddr.s_addr), ntohs(lport), rdomain); 974 } 975 #endif 976 return (inp); 977 } 978 979 #ifdef INET6 980 struct inpcb * 981 in6_pcbhashlookup(struct inpcbtable *table, const struct in6_addr *faddr, 982 u_int fport_arg, const struct in6_addr *laddr, u_int lport_arg, 983 u_int rtable) 984 { 985 struct inpcbhead *head; 986 struct inpcb *inp; 987 u_int16_t fport = fport_arg, lport = lport_arg; 988 989 rtable = rtable_l2(rtable); /* convert passed rtableid to rdomain */ 990 head = IN6PCBHASH(table, faddr, fport, laddr, lport, rtable); 991 LIST_FOREACH(inp, head, inp_hash) { 992 if (!(inp->inp_flags & INP_IPV6)) 993 continue; 994 if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && 995 inp->inp_fport == fport && inp->inp_lport == lport && 996 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) && 997 rtable_l2(inp->inp_rtableid) == rtable) { 998 /* 999 * Move this PCB to the head of hash chain so that 1000 * repeated accesses are quicker. This is analogous to 1001 * the historic single-entry PCB cache. 1002 */ 1003 if (inp != LIST_FIRST(head)) { 1004 LIST_REMOVE(inp, inp_hash); 1005 LIST_INSERT_HEAD(head, inp, inp_hash); 1006 } 1007 break; 1008 } 1009 } 1010 #ifdef DIAGNOSTIC 1011 if (inp == NULL && in_pcbnotifymiss) { 1012 printf("in6_pcbhashlookup: faddr="); 1013 printf(" fport=%d laddr=", ntohs(fport)); 1014 printf(" lport=%d\n", ntohs(lport)); 1015 } 1016 #endif 1017 return (inp); 1018 } 1019 #endif /* INET6 */ 1020 1021 /* 1022 * The in(6)_pcblookup_listen functions are used to locate listening 1023 * sockets quickly. This are sockets with unspecified foreign address 1024 * and port: 1025 * *.* <-> laddr.lport 1026 * *.* <-> *.lport 1027 */ 1028 struct inpcb * 1029 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, 1030 u_int lport_arg, int reverse, struct mbuf *m, u_int rdomain) 1031 { 1032 struct inpcbhead *head; 1033 struct in_addr *key1, *key2; 1034 struct inpcb *inp; 1035 u_int16_t lport = lport_arg; 1036 1037 rdomain = rtable_l2(rdomain); /* convert passed rtableid to rdomain */ 1038 #if NPF > 0 1039 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1040 struct pf_divert *divert; 1041 1042 if ((divert = pf_find_divert(m)) == NULL) 1043 return (NULL); 1044 key1 = key2 = &divert->addr.v4; 1045 lport = divert->port; 1046 } else 1047 #endif 1048 if (reverse) { 1049 key1 = &zeroin_addr; 1050 key2 = &laddr; 1051 } else { 1052 key1 = &laddr; 1053 key2 = &zeroin_addr; 1054 } 1055 1056 head = INPCBHASH(table, &zeroin_addr, 0, key1, lport, rdomain); 1057 LIST_FOREACH(inp, head, inp_hash) { 1058 #ifdef INET6 1059 if (inp->inp_flags & INP_IPV6) 1060 continue; /*XXX*/ 1061 #endif 1062 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1063 inp->inp_laddr.s_addr == key1->s_addr && 1064 inp->inp_faddr.s_addr == INADDR_ANY && 1065 rtable_l2(inp->inp_rtableid) == rdomain) 1066 break; 1067 } 1068 if (inp == NULL && key1->s_addr != key2->s_addr) { 1069 head = INPCBHASH(table, &zeroin_addr, 0, key2, lport, rdomain); 1070 LIST_FOREACH(inp, head, inp_hash) { 1071 #ifdef INET6 1072 if (inp->inp_flags & INP_IPV6) 1073 continue; /*XXX*/ 1074 #endif 1075 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1076 inp->inp_laddr.s_addr == key2->s_addr && 1077 inp->inp_faddr.s_addr == INADDR_ANY && 1078 rtable_l2(inp->inp_rtableid) == rdomain) 1079 break; 1080 } 1081 } 1082 #ifdef DIAGNOSTIC 1083 if (inp == NULL && in_pcbnotifymiss) { 1084 printf("in_pcblookup_listen: laddr=%08x lport=%d\n", 1085 ntohl(laddr.s_addr), ntohs(lport)); 1086 } 1087 #endif 1088 /* 1089 * Move this PCB to the head of hash chain so that 1090 * repeated accesses are quicker. This is analogous to 1091 * the historic single-entry PCB cache. 1092 */ 1093 if (inp != NULL && inp != LIST_FIRST(head)) { 1094 LIST_REMOVE(inp, inp_hash); 1095 LIST_INSERT_HEAD(head, inp, inp_hash); 1096 } 1097 return (inp); 1098 } 1099 1100 #ifdef INET6 1101 struct inpcb * 1102 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, 1103 u_int lport_arg, int reverse, struct mbuf *m, u_int rtable) 1104 { 1105 struct inpcbhead *head; 1106 struct in6_addr *key1, *key2; 1107 struct inpcb *inp; 1108 u_int16_t lport = lport_arg; 1109 1110 rtable = rtable_l2(rtable); /* convert passed rtableid to rdomain */ 1111 #if NPF > 0 1112 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1113 struct pf_divert *divert; 1114 1115 if ((divert = pf_find_divert(m)) == NULL) 1116 return (NULL); 1117 key1 = key2 = &divert->addr.v6; 1118 lport = divert->port; 1119 } else 1120 #endif 1121 if (reverse) { 1122 key1 = &zeroin6_addr; 1123 key2 = laddr; 1124 } else { 1125 key1 = laddr; 1126 key2 = &zeroin6_addr; 1127 } 1128 1129 head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport, rtable); 1130 LIST_FOREACH(inp, head, inp_hash) { 1131 if (!(inp->inp_flags & INP_IPV6)) 1132 continue; 1133 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1134 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) && 1135 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6) && 1136 rtable_l2(inp->inp_rtableid) == rtable) 1137 break; 1138 } 1139 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { 1140 head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport, rtable); 1141 LIST_FOREACH(inp, head, inp_hash) { 1142 if (!(inp->inp_flags & INP_IPV6)) 1143 continue; 1144 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1145 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) && 1146 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6) && 1147 rtable_l2(inp->inp_rtableid) == rtable) 1148 break; 1149 } 1150 } 1151 #ifdef DIAGNOSTIC 1152 if (inp == NULL && in_pcbnotifymiss) { 1153 printf("in6_pcblookup_listen: laddr= lport=%d\n", 1154 ntohs(lport)); 1155 } 1156 #endif 1157 /* 1158 * Move this PCB to the head of hash chain so that 1159 * repeated accesses are quicker. This is analogous to 1160 * the historic single-entry PCB cache. 1161 */ 1162 if (inp != NULL && inp != LIST_FIRST(head)) { 1163 LIST_REMOVE(inp, inp_hash); 1164 LIST_INSERT_HEAD(head, inp, inp_hash); 1165 } 1166 return (inp); 1167 } 1168 #endif /* INET6 */ 1169