1 /* $OpenBSD: in_pcb.c,v 1.104 2009/03/15 19:40:41 miod Exp $ */ 2 /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "pf.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/proc.h> 80 #include <sys/domain.h> 81 #include <sys/pool.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 #include <net/pfvar.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/ip.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip_var.h> 93 #include <dev/rndvar.h> 94 95 #include <sys/mount.h> 96 #include <nfs/nfsproto.h> 97 98 #ifdef INET6 99 #include <netinet6/ip6_var.h> 100 #endif /* INET6 */ 101 #ifdef IPSEC 102 #include <netinet/ip_esp.h> 103 #endif /* IPSEC */ 104 105 struct in_addr zeroin_addr; 106 107 extern int ipsec_auth_default_level; 108 extern int ipsec_esp_trans_default_level; 109 extern int ipsec_esp_network_default_level; 110 extern int ipsec_ipcomp_default_level; 111 112 /* 113 * These configure the range of local port addresses assigned to 114 * "unspecified" outgoing connections/packets/whatever. 115 */ 116 int ipport_firstauto = IPPORT_RESERVED; 117 int ipport_lastauto = IPPORT_USERRESERVED; 118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; 119 int ipport_hilastauto = IPPORT_HILASTAUTO; 120 121 struct pool inpcb_pool; 122 int inpcb_pool_initialized = 0; 123 124 #define INPCBHASH(table, faddr, fport, laddr, lport) \ 125 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \ 126 ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)] 127 128 #define IN6PCBHASH(table, faddr, fport, laddr, lport) \ 129 &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \ 130 (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \ 131 (table->inpt_hash)] 132 133 #define INPCBLHASH(table, lport) \ 134 &(table)->inpt_lhashtbl[lport & table->inpt_lhash] 135 136 void 137 in_pcbinit(table, hashsize) 138 struct inpcbtable *table; 139 int hashsize; 140 { 141 142 CIRCLEQ_INIT(&table->inpt_queue); 143 table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 144 &table->inpt_hash); 145 if (table->inpt_hashtbl == NULL) 146 panic("in_pcbinit: hashinit failed"); 147 table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 148 &table->inpt_lhash); 149 if (table->inpt_lhashtbl == NULL) 150 panic("in_pcbinit: hashinit failed for lport"); 151 table->inpt_lastport = 0; 152 } 153 154 struct baddynamicports baddynamicports; 155 156 /* 157 * Check if the specified port is invalid for dynamic allocation. 158 */ 159 int 160 in_baddynamic(u_int16_t port, u_int16_t proto) 161 { 162 switch (proto) { 163 case IPPROTO_TCP: 164 return (DP_ISSET(baddynamicports.tcp, port)); 165 case IPPROTO_UDP: 166 #ifdef IPSEC 167 /* Cannot preset this as it is a sysctl */ 168 if (port == udpencap_port) 169 return (1); 170 #endif 171 return (DP_ISSET(baddynamicports.udp, port)); 172 default: 173 return (0); 174 } 175 } 176 177 int 178 in_pcballoc(so, v) 179 struct socket *so; 180 void *v; 181 { 182 struct inpcbtable *table = v; 183 struct inpcb *inp; 184 int s; 185 186 if (inpcb_pool_initialized == 0) { 187 pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, 188 "inpcbpl", NULL); 189 inpcb_pool_initialized = 1; 190 } 191 inp = pool_get(&inpcb_pool, PR_NOWAIT); 192 if (inp == NULL) 193 return (ENOBUFS); 194 bzero((caddr_t)inp, sizeof(*inp)); 195 inp->inp_table = table; 196 inp->inp_socket = so; 197 inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level; 198 inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level; 199 inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level; 200 inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level; 201 s = splnet(); 202 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); 203 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash); 204 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport, 205 &inp->inp_laddr, inp->inp_lport), inp, inp_hash); 206 splx(s); 207 so->so_pcb = inp; 208 inp->inp_hops = -1; 209 210 #ifdef INET6 211 /* 212 * Small change in this function to set the INP_IPV6 flag so routines 213 * outside pcb-specific routines don't need to use sotopf(), and all 214 * of its pointer chasing, later. 215 */ 216 if (sotopf(so) == PF_INET6) 217 inp->inp_flags = INP_IPV6; 218 inp->in6p_cksum = -1; 219 #endif /* INET6 */ 220 return (0); 221 } 222 223 int 224 in_pcbbind(v, nam, p) 225 void *v; 226 struct mbuf *nam; 227 struct proc *p; 228 { 229 struct inpcb *inp = v; 230 struct socket *so = inp->inp_socket; 231 struct inpcbtable *table = inp->inp_table; 232 u_int16_t *lastport = &inp->inp_table->inpt_lastport; 233 struct sockaddr_in *sin; 234 u_int16_t lport = 0; 235 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 236 int error; 237 238 #ifdef INET6 239 if (sotopf(so) == PF_INET6) 240 return in6_pcbbind(inp, nam, p); 241 #endif /* INET6 */ 242 243 if (TAILQ_EMPTY(&in_ifaddr)) 244 return (EADDRNOTAVAIL); 245 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 246 return (EINVAL); 247 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 248 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 249 (so->so_options & SO_ACCEPTCONN) == 0)) 250 wild = INPLOOKUP_WILDCARD; 251 if (nam) { 252 sin = mtod(nam, struct sockaddr_in *); 253 if (nam->m_len != sizeof (*sin)) 254 return (EINVAL); 255 #ifdef notdef 256 /* 257 * We should check the family, but old programs 258 * incorrectly fail to initialize it. 259 */ 260 if (sin->sin_family != AF_INET) 261 return (EAFNOSUPPORT); 262 #endif 263 lport = sin->sin_port; 264 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 265 /* 266 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 267 * allow complete duplication of binding if 268 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 269 * and a multicast address is bound on both 270 * new and duplicated sockets. 271 */ 272 if (so->so_options & SO_REUSEADDR) 273 reuseport = SO_REUSEADDR|SO_REUSEPORT; 274 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 275 sin->sin_port = 0; /* yech... */ 276 if (!(so->so_options & SO_BINDANY) && 277 in_iawithaddr(sin->sin_addr, NULL) == 0) 278 return (EADDRNOTAVAIL); 279 } 280 if (lport) { 281 struct inpcb *t; 282 283 /* GROSS */ 284 if (ntohs(lport) < IPPORT_RESERVED && 285 (error = suser(p, 0))) 286 return (EACCES); 287 if (so->so_euid) { 288 t = in_pcblookup(table, &zeroin_addr, 0, 289 &sin->sin_addr, lport, INPLOOKUP_WILDCARD); 290 if (t && (so->so_euid != t->inp_socket->so_euid)) 291 return (EADDRINUSE); 292 } 293 t = in_pcblookup(table, &zeroin_addr, 0, 294 &sin->sin_addr, lport, wild); 295 if (t && (reuseport & t->inp_socket->so_options) == 0) 296 return (EADDRINUSE); 297 } 298 inp->inp_laddr = sin->sin_addr; 299 } 300 if (lport == 0) { 301 u_int16_t first, last; 302 int count; 303 304 if (inp->inp_flags & INP_HIGHPORT) { 305 first = ipport_hifirstauto; /* sysctl */ 306 last = ipport_hilastauto; 307 } else if (inp->inp_flags & INP_LOWPORT) { 308 if ((error = suser(p, 0))) 309 return (EACCES); 310 first = IPPORT_RESERVED-1; /* 1023 */ 311 last = 600; /* not IPPORT_RESERVED/2 */ 312 } else { 313 first = ipport_firstauto; /* sysctl */ 314 last = ipport_lastauto; 315 } 316 317 /* 318 * Simple check to ensure all ports are not used up causing 319 * a deadlock here. 320 * 321 * We split the two cases (up and down) so that the direction 322 * is not being tested on each round of the loop. 323 */ 324 325 if (first > last) { 326 /* 327 * counting down 328 */ 329 count = first - last; 330 if (count) 331 *lastport = first - arc4random_uniform(count); 332 333 do { 334 if (count-- < 0) /* completely used? */ 335 return (EADDRNOTAVAIL); 336 --*lastport; 337 if (*lastport > first || *lastport < last) 338 *lastport = first; 339 lport = htons(*lastport); 340 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 341 in_pcblookup(table, &zeroin_addr, 0, 342 &inp->inp_laddr, lport, wild)); 343 } else { 344 /* 345 * counting up 346 */ 347 count = last - first; 348 if (count) 349 *lastport = first + arc4random_uniform(count); 350 351 do { 352 if (count-- < 0) /* completely used? */ 353 return (EADDRNOTAVAIL); 354 ++*lastport; 355 if (*lastport < first || *lastport > last) 356 *lastport = first; 357 lport = htons(*lastport); 358 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 359 in_pcblookup(table, &zeroin_addr, 0, 360 &inp->inp_laddr, lport, wild)); 361 } 362 } 363 inp->inp_lport = lport; 364 in_pcbrehash(inp); 365 return (0); 366 } 367 368 /* 369 * Connect from a socket to a specified address. 370 * Both address and port must be specified in argument sin. 371 * If don't have a local address for this socket yet, 372 * then pick one. 373 */ 374 int 375 in_pcbconnect(v, nam) 376 void *v; 377 struct mbuf *nam; 378 { 379 struct inpcb *inp = v; 380 struct sockaddr_in *ifaddr = NULL; 381 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); 382 383 #ifdef INET6 384 if (sotopf(inp->inp_socket) == PF_INET6) 385 return (in6_pcbconnect(inp, nam)); 386 if ((inp->inp_flags & INP_IPV6) != 0) 387 panic("IPv6 pcb passed into in_pcbconnect"); 388 #endif /* INET6 */ 389 390 if (nam->m_len != sizeof (*sin)) 391 return (EINVAL); 392 if (sin->sin_family != AF_INET) 393 return (EAFNOSUPPORT); 394 if (sin->sin_port == 0) 395 return (EADDRNOTAVAIL); 396 if (!TAILQ_EMPTY(&in_ifaddr)) { 397 /* 398 * If the destination address is INADDR_ANY, 399 * use the primary local address. 400 * If the supplied address is INADDR_BROADCAST, 401 * and the primary interface supports broadcast, 402 * choose the broadcast address for that interface. 403 */ 404 if (sin->sin_addr.s_addr == INADDR_ANY) 405 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr; 406 else if (sin->sin_addr.s_addr == INADDR_BROADCAST && 407 (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST)) 408 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr; 409 } 410 if (inp->inp_laddr.s_addr == INADDR_ANY) { 411 int error; 412 ifaddr = in_selectsrc(sin, &inp->inp_route, 413 inp->inp_socket->so_options, inp->inp_moptions, &error); 414 if (ifaddr == NULL) { 415 if (error == 0) 416 error = EADDRNOTAVAIL; 417 return error; 418 } 419 } 420 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port, 421 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, 422 inp->inp_lport) != 0) 423 return (EADDRINUSE); 424 if (inp->inp_laddr.s_addr == INADDR_ANY) { 425 if (inp->inp_lport == 0 && 426 in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL) 427 return (EADDRNOTAVAIL); 428 inp->inp_laddr = ifaddr->sin_addr; 429 } 430 inp->inp_faddr = sin->sin_addr; 431 inp->inp_fport = sin->sin_port; 432 in_pcbrehash(inp); 433 #ifdef IPSEC 434 { 435 int error; /* This is just ignored */ 436 437 /* Cause an IPsec SA to be established. */ 438 ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT, 439 NULL, inp, NULL); 440 } 441 #endif 442 return (0); 443 } 444 445 void 446 in_pcbdisconnect(v) 447 void *v; 448 { 449 struct inpcb *inp = v; 450 451 switch (sotopf(inp->inp_socket)) { 452 #ifdef INET6 453 case PF_INET6: 454 inp->inp_faddr6 = in6addr_any; 455 break; 456 #endif 457 case PF_INET: 458 inp->inp_faddr.s_addr = INADDR_ANY; 459 break; 460 } 461 462 inp->inp_fport = 0; 463 in_pcbrehash(inp); 464 if (inp->inp_socket->so_state & SS_NOFDREF) 465 in_pcbdetach(inp); 466 } 467 468 void 469 in_pcbdetach(v) 470 void *v; 471 { 472 struct inpcb *inp = v; 473 struct socket *so = inp->inp_socket; 474 int s; 475 476 so->so_pcb = 0; 477 sofree(so); 478 if (inp->inp_options) 479 (void)m_freem(inp->inp_options); 480 if (inp->inp_route.ro_rt) 481 rtfree(inp->inp_route.ro_rt); 482 #ifdef INET6 483 if (inp->inp_flags & INP_IPV6) { 484 ip6_freepcbopts(inp->inp_outputopts6); 485 ip6_freemoptions(inp->inp_moptions6); 486 } else 487 #endif 488 ip_freemoptions(inp->inp_moptions); 489 #ifdef IPSEC 490 /* IPsec cleanup here */ 491 s = spltdb(); 492 if (inp->inp_tdb_in) 493 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 494 inp, inp_tdb_in_next); 495 if (inp->inp_tdb_out) 496 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp, 497 inp_tdb_out_next); 498 if (inp->inp_ipsec_remotecred) 499 ipsp_reffree(inp->inp_ipsec_remotecred); 500 if (inp->inp_ipsec_remoteauth) 501 ipsp_reffree(inp->inp_ipsec_remoteauth); 502 if (inp->inp_ipo) 503 ipsec_delete_policy(inp->inp_ipo); 504 splx(s); 505 #endif 506 #if NPF > 0 507 if (inp->inp_pf_sk) 508 ((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL; 509 #endif 510 s = splnet(); 511 LIST_REMOVE(inp, inp_lhash); 512 LIST_REMOVE(inp, inp_hash); 513 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); 514 splx(s); 515 pool_put(&inpcb_pool, inp); 516 } 517 518 void 519 in_setsockaddr(inp, nam) 520 struct inpcb *inp; 521 struct mbuf *nam; 522 { 523 struct sockaddr_in *sin; 524 525 nam->m_len = sizeof (*sin); 526 sin = mtod(nam, struct sockaddr_in *); 527 bzero((caddr_t)sin, sizeof (*sin)); 528 sin->sin_family = AF_INET; 529 sin->sin_len = sizeof(*sin); 530 sin->sin_port = inp->inp_lport; 531 sin->sin_addr = inp->inp_laddr; 532 } 533 534 void 535 in_setpeeraddr(inp, nam) 536 struct inpcb *inp; 537 struct mbuf *nam; 538 { 539 struct sockaddr_in *sin; 540 541 #ifdef INET6 542 if (sotopf(inp->inp_socket) == PF_INET6) { 543 in6_setpeeraddr(inp, nam); 544 return; 545 } 546 #endif /* INET6 */ 547 548 nam->m_len = sizeof (*sin); 549 sin = mtod(nam, struct sockaddr_in *); 550 bzero((caddr_t)sin, sizeof (*sin)); 551 sin->sin_family = AF_INET; 552 sin->sin_len = sizeof(*sin); 553 sin->sin_port = inp->inp_fport; 554 sin->sin_addr = inp->inp_faddr; 555 } 556 557 /* 558 * Pass some notification to all connections of a protocol 559 * associated with address dst. The "usual action" will be 560 * taken, depending on the ctlinput cmd. The caller must filter any 561 * cmds that are uninteresting (e.g., no error in the map). 562 * Call the protocol specific routine (if any) to report 563 * any errors for each matching socket. 564 * 565 * Must be called at splsoftnet. 566 */ 567 void 568 in_pcbnotifyall(table, dst, errno, notify) 569 struct inpcbtable *table; 570 struct sockaddr *dst; 571 int errno; 572 void (*notify)(struct inpcb *, int); 573 { 574 struct inpcb *inp, *oinp; 575 struct in_addr faddr; 576 577 splsoftassert(IPL_SOFTNET); 578 579 #ifdef INET6 580 /* 581 * See in6_pcbnotify() for IPv6 codepath. By the time this 582 * gets called, the addresses passed are either definitely IPv4 or 583 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. 584 */ 585 #endif /* INET6 */ 586 587 if (dst->sa_family != AF_INET) 588 return; 589 faddr = satosin(dst)->sin_addr; 590 if (faddr.s_addr == INADDR_ANY) 591 return; 592 593 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 594 inp != CIRCLEQ_END(&table->inpt_queue);) { 595 #ifdef INET6 596 if (inp->inp_flags & INP_IPV6) { 597 inp = CIRCLEQ_NEXT(inp, inp_queue); 598 continue; 599 } 600 #endif 601 if (inp->inp_faddr.s_addr != faddr.s_addr || 602 inp->inp_socket == 0) { 603 inp = CIRCLEQ_NEXT(inp, inp_queue); 604 continue; 605 } 606 oinp = inp; 607 inp = CIRCLEQ_NEXT(inp, inp_queue); 608 if (notify) 609 (*notify)(oinp, errno); 610 } 611 } 612 613 /* 614 * Check for alternatives when higher level complains 615 * about service problems. For now, invalidate cached 616 * routing information. If the route was created dynamically 617 * (by a redirect), time to try a default gateway again. 618 */ 619 void 620 in_losing(inp) 621 struct inpcb *inp; 622 { 623 struct rtentry *rt; 624 struct rt_addrinfo info; 625 626 if ((rt = inp->inp_route.ro_rt)) { 627 inp->inp_route.ro_rt = 0; 628 bzero((caddr_t)&info, sizeof(info)); 629 info.rti_flags = rt->rt_flags; 630 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; 631 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 632 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 633 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0, 0); 634 if (rt->rt_flags & RTF_DYNAMIC) 635 (void)rtrequest1(RTM_DELETE, &info, rt->rt_priority, 636 (struct rtentry **)0, 0); 637 /* 638 * A new route can be allocated 639 * the next time output is attempted. 640 * rtfree() needs to be called in anycase because the inp 641 * is still holding a reference to rt. 642 */ 643 rtfree(rt); 644 } 645 } 646 647 /* 648 * After a routing change, flush old routing 649 * and allocate a (hopefully) better one. 650 */ 651 void 652 in_rtchange(inp, errno) 653 struct inpcb *inp; 654 int errno; 655 { 656 if (inp->inp_route.ro_rt) { 657 rtfree(inp->inp_route.ro_rt); 658 inp->inp_route.ro_rt = 0; 659 /* 660 * A new route can be allocated the next time 661 * output is attempted. 662 */ 663 } 664 } 665 666 struct inpcb * 667 in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags) 668 struct inpcbtable *table; 669 void *faddrp, *laddrp; 670 u_int fport_arg, lport_arg; 671 int flags; 672 { 673 struct inpcb *inp, *match = 0; 674 int matchwild = 3, wildcard; 675 u_int16_t fport = fport_arg, lport = lport_arg; 676 struct in_addr faddr = *(struct in_addr *)faddrp; 677 struct in_addr laddr = *(struct in_addr *)laddrp; 678 679 for (inp = LIST_FIRST(INPCBLHASH(table, lport)); inp; 680 inp = LIST_NEXT(inp, inp_lhash)) { 681 if (inp->inp_lport != lport) 682 continue; 683 wildcard = 0; 684 #ifdef INET6 685 if (flags & INPLOOKUP_IPV6) { 686 struct in6_addr *laddr6 = (struct in6_addr *)laddrp; 687 struct in6_addr *faddr6 = (struct in6_addr *)faddrp; 688 689 if (!(inp->inp_flags & INP_IPV6)) 690 continue; 691 692 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 693 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 694 wildcard++; 695 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6)) 696 continue; 697 } else { 698 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 699 wildcard++; 700 } 701 702 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { 703 if (IN6_IS_ADDR_UNSPECIFIED(faddr6)) 704 wildcard++; 705 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, 706 faddr6) || inp->inp_fport != fport) 707 continue; 708 } else { 709 if (!IN6_IS_ADDR_UNSPECIFIED(faddr6)) 710 wildcard++; 711 } 712 } else 713 #endif /* INET6 */ 714 { 715 #ifdef INET6 716 if (inp->inp_flags & INP_IPV6) 717 continue; 718 #endif /* INET6 */ 719 720 if (inp->inp_faddr.s_addr != INADDR_ANY) { 721 if (faddr.s_addr == INADDR_ANY) 722 wildcard++; 723 else if (inp->inp_faddr.s_addr != faddr.s_addr || 724 inp->inp_fport != fport) 725 continue; 726 } else { 727 if (faddr.s_addr != INADDR_ANY) 728 wildcard++; 729 } 730 if (inp->inp_laddr.s_addr != INADDR_ANY) { 731 if (laddr.s_addr == INADDR_ANY) 732 wildcard++; 733 else if (inp->inp_laddr.s_addr != laddr.s_addr) 734 continue; 735 } else { 736 if (laddr.s_addr != INADDR_ANY) 737 wildcard++; 738 } 739 } 740 if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) && 741 wildcard < matchwild) { 742 match = inp; 743 if ((matchwild = wildcard) == 0) 744 break; 745 } 746 } 747 return (match); 748 } 749 750 struct rtentry * 751 in_pcbrtentry(inp) 752 struct inpcb *inp; 753 { 754 struct route *ro; 755 756 ro = &inp->inp_route; 757 758 /* 759 * No route yet, so try to acquire one. 760 */ 761 if (ro->ro_rt == NULL) { 762 #ifdef INET6 763 bzero(ro, sizeof(struct route_in6)); 764 #else 765 bzero(ro, sizeof(struct route)); 766 #endif 767 768 switch(sotopf(inp->inp_socket)) { 769 #ifdef INET6 770 case PF_INET6: 771 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 772 break; 773 ro->ro_dst.sa_family = AF_INET6; 774 ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); 775 ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr = 776 inp->inp_faddr6; 777 rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0); 778 break; 779 #endif /* INET6 */ 780 case PF_INET: 781 if (inp->inp_faddr.s_addr == INADDR_ANY) 782 break; 783 ro->ro_dst.sa_family = AF_INET; 784 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 785 satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; 786 rtalloc_mpath(ro, &inp->inp_laddr.s_addr, 0); 787 break; 788 } 789 } 790 return (ro->ro_rt); 791 } 792 793 struct sockaddr_in * 794 in_selectsrc(sin, ro, soopts, mopts, errorp) 795 struct sockaddr_in *sin; 796 struct route *ro; 797 int soopts; 798 struct ip_moptions *mopts; 799 int *errorp; 800 { 801 struct sockaddr_in *sin2; 802 struct in_ifaddr *ia; 803 804 ia = (struct in_ifaddr *)0; 805 /* 806 * If route is known or can be allocated now, 807 * our src addr is taken from the i/f, else punt. 808 */ 809 if (ro->ro_rt && 810 (satosin(&ro->ro_dst)->sin_addr.s_addr != 811 sin->sin_addr.s_addr || 812 soopts & SO_DONTROUTE)) { 813 RTFREE(ro->ro_rt); 814 ro->ro_rt = (struct rtentry *)0; 815 } 816 if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/ 817 (ro->ro_rt == (struct rtentry *)0 || 818 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 819 /* No route yet, so try to acquire one */ 820 ro->ro_dst.sa_family = AF_INET; 821 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 822 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; 823 rtalloc_mpath(ro, NULL, 0); 824 825 /* 826 * It is important to bzero out the rest of the 827 * struct sockaddr_in when mixing v6 & v4! 828 */ 829 sin2 = (struct sockaddr_in *)&ro->ro_dst; 830 bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); 831 } 832 /* 833 * If we found a route, use the address 834 * corresponding to the outgoing interface 835 * unless it is the loopback (in case a route 836 * to our address on another net goes to loopback). 837 */ 838 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 839 ia = ifatoia(ro->ro_rt->rt_ifa); 840 if (ia == 0) { 841 u_int16_t fport = sin->sin_port; 842 843 sin->sin_port = 0; 844 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 845 if (ia == 0) 846 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 847 sin->sin_port = fport; 848 if (ia == 0) 849 ia = TAILQ_FIRST(&in_ifaddr); 850 if (ia == 0) { 851 *errorp = EADDRNOTAVAIL; 852 return NULL; 853 } 854 } 855 /* 856 * If the destination address is multicast and an outgoing 857 * interface has been set as a multicast option, use the 858 * address of that interface as our source address. 859 */ 860 if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) { 861 struct ip_moptions *imo; 862 struct ifnet *ifp; 863 864 imo = mopts; 865 if (imo->imo_multicast_ifp != NULL) { 866 ifp = imo->imo_multicast_ifp; 867 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) 868 if (ia->ia_ifp == ifp) 869 break; 870 if (ia == 0) { 871 *errorp = EADDRNOTAVAIL; 872 return NULL; 873 } 874 } 875 } 876 return satosin(&ia->ia_addr); 877 } 878 879 void 880 in_pcbrehash(inp) 881 struct inpcb *inp; 882 { 883 struct inpcbtable *table = inp->inp_table; 884 int s; 885 886 s = splnet(); 887 LIST_REMOVE(inp, inp_lhash); 888 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash); 889 LIST_REMOVE(inp, inp_hash); 890 #ifdef INET6 891 if (inp->inp_flags & INP_IPV6) { 892 LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6, 893 inp->inp_fport, &inp->inp_laddr6, inp->inp_lport), 894 inp, inp_hash); 895 } else { 896 #endif /* INET6 */ 897 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, 898 inp->inp_fport, &inp->inp_laddr, inp->inp_lport), 899 inp, inp_hash); 900 #ifdef INET6 901 } 902 #endif /* INET6 */ 903 splx(s); 904 } 905 906 #ifdef DIAGNOSTIC 907 int in_pcbnotifymiss = 0; 908 #endif 909 910 /* 911 * The in(6)_pcbhashlookup functions are used to locate connected sockets 912 * quickly: 913 * faddr.fport <-> laddr.lport 914 * No wildcard matching is done so that listening sockets are not found. 915 * If the functions return NULL in(6)_pcblookup_listen can be used to 916 * find a listening/bound socket that may accept the connection. 917 * After those two lookups no other are necessary. 918 */ 919 struct inpcb * 920 in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg) 921 struct inpcbtable *table; 922 struct in_addr faddr, laddr; 923 u_int fport_arg, lport_arg; 924 { 925 struct inpcbhead *head; 926 struct inpcb *inp; 927 u_int16_t fport = fport_arg, lport = lport_arg; 928 929 head = INPCBHASH(table, &faddr, fport, &laddr, lport); 930 LIST_FOREACH(inp, head, inp_hash) { 931 #ifdef INET6 932 if (inp->inp_flags & INP_IPV6) 933 continue; /*XXX*/ 934 #endif 935 if (inp->inp_faddr.s_addr == faddr.s_addr && 936 inp->inp_fport == fport && 937 inp->inp_lport == lport && 938 inp->inp_laddr.s_addr == laddr.s_addr) { 939 /* 940 * Move this PCB to the head of hash chain so that 941 * repeated accesses are quicker. This is analogous to 942 * the historic single-entry PCB cache. 943 */ 944 if (inp != LIST_FIRST(head)) { 945 LIST_REMOVE(inp, inp_hash); 946 LIST_INSERT_HEAD(head, inp, inp_hash); 947 } 948 break; 949 } 950 } 951 #ifdef DIAGNOSTIC 952 if (inp == NULL && in_pcbnotifymiss) { 953 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n", 954 ntohl(faddr.s_addr), ntohs(fport), 955 ntohl(laddr.s_addr), ntohs(lport)); 956 } 957 #endif 958 return (inp); 959 } 960 961 #ifdef INET6 962 struct inpcb * 963 in6_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg) 964 struct inpcbtable *table; 965 struct in6_addr *faddr, *laddr; 966 u_int fport_arg, lport_arg; 967 { 968 struct inpcbhead *head; 969 struct inpcb *inp; 970 u_int16_t fport = fport_arg, lport = lport_arg; 971 972 head = IN6PCBHASH(table, faddr, fport, laddr, lport); 973 LIST_FOREACH(inp, head, inp_hash) { 974 if (!(inp->inp_flags & INP_IPV6)) 975 continue; 976 if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && 977 inp->inp_fport == fport && inp->inp_lport == lport && 978 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) { 979 /* 980 * Move this PCB to the head of hash chain so that 981 * repeated accesses are quicker. This is analogous to 982 * the historic single-entry PCB cache. 983 */ 984 if (inp != LIST_FIRST(head)) { 985 LIST_REMOVE(inp, inp_hash); 986 LIST_INSERT_HEAD(head, inp, inp_hash); 987 } 988 break; 989 } 990 } 991 #ifdef DIAGNOSTIC 992 if (inp == NULL && in_pcbnotifymiss) { 993 printf("in6_pcbhashlookup: faddr="); 994 printf(" fport=%d laddr=", ntohs(fport)); 995 printf(" lport=%d\n", ntohs(lport)); 996 } 997 #endif 998 return (inp); 999 } 1000 #endif /* INET6 */ 1001 1002 /* 1003 * The in(6)_pcblookup_listen functions are used to locate listening 1004 * sockets quickly. This are sockets with unspecified foreign address 1005 * and port: 1006 * *.* <-> laddr.lport 1007 * *.* <-> *.lport 1008 */ 1009 struct inpcb * 1010 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, 1011 u_int lport_arg, int reverse, struct mbuf *m) 1012 { 1013 struct inpcbhead *head; 1014 struct in_addr *key1, *key2; 1015 struct inpcb *inp; 1016 u_int16_t lport = lport_arg; 1017 1018 #if NPF > 0 1019 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1020 struct pf_divert *divert; 1021 1022 if ((divert = pf_find_divert(m)) == NULL) 1023 return (NULL); 1024 key1 = key2 = &divert->addr.ipv4; 1025 lport = divert->port; 1026 } else 1027 #endif 1028 if (reverse) { 1029 key1 = &zeroin_addr; 1030 key2 = &laddr; 1031 } else { 1032 key1 = &laddr; 1033 key2 = &zeroin_addr; 1034 } 1035 1036 head = INPCBHASH(table, &zeroin_addr, 0, key1, lport); 1037 LIST_FOREACH(inp, head, inp_hash) { 1038 #ifdef INET6 1039 if (inp->inp_flags & INP_IPV6) 1040 continue; /*XXX*/ 1041 #endif 1042 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1043 inp->inp_laddr.s_addr == key1->s_addr && 1044 inp->inp_faddr.s_addr == INADDR_ANY) 1045 break; 1046 } 1047 if (inp == NULL && key1->s_addr != key2->s_addr) { 1048 head = INPCBHASH(table, &zeroin_addr, 0, key2, lport); 1049 LIST_FOREACH(inp, head, inp_hash) { 1050 #ifdef INET6 1051 if (inp->inp_flags & INP_IPV6) 1052 continue; /*XXX*/ 1053 #endif 1054 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1055 inp->inp_laddr.s_addr == key2->s_addr && 1056 inp->inp_faddr.s_addr == INADDR_ANY) 1057 break; 1058 } 1059 } 1060 #ifdef DIAGNOSTIC 1061 if (inp == NULL && in_pcbnotifymiss) { 1062 printf("in_pcblookup_listen: laddr=%08x lport=%d\n", 1063 ntohl(laddr.s_addr), ntohs(lport)); 1064 } 1065 #endif 1066 /* 1067 * Move this PCB to the head of hash chain so that 1068 * repeated accesses are quicker. This is analogous to 1069 * the historic single-entry PCB cache. 1070 */ 1071 if (inp != NULL && inp != LIST_FIRST(head)) { 1072 LIST_REMOVE(inp, inp_hash); 1073 LIST_INSERT_HEAD(head, inp, inp_hash); 1074 } 1075 return (inp); 1076 } 1077 1078 #ifdef INET6 1079 struct inpcb * 1080 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, 1081 u_int lport_arg, int reverse, struct mbuf *m) 1082 { 1083 struct inpcbhead *head; 1084 struct in6_addr *key1, *key2; 1085 struct inpcb *inp; 1086 u_int16_t lport = lport_arg; 1087 1088 #if NPF > 0 1089 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1090 struct pf_divert *divert; 1091 1092 if ((divert = pf_find_divert(m)) == NULL) 1093 return (NULL); 1094 key1 = key2 = &divert->addr.ipv6; 1095 lport = divert->port; 1096 } else 1097 #endif 1098 if (reverse) { 1099 key1 = &zeroin6_addr; 1100 key2 = laddr; 1101 } else { 1102 key1 = laddr; 1103 key2 = &zeroin6_addr; 1104 } 1105 1106 head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport); 1107 LIST_FOREACH(inp, head, inp_hash) { 1108 if (!(inp->inp_flags & INP_IPV6)) 1109 continue; 1110 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1111 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) && 1112 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1113 break; 1114 } 1115 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { 1116 head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport); 1117 LIST_FOREACH(inp, head, inp_hash) { 1118 if (!(inp->inp_flags & INP_IPV6)) 1119 continue; 1120 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1121 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) && 1122 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1123 break; 1124 } 1125 } 1126 #ifdef DIAGNOSTIC 1127 if (inp == NULL && in_pcbnotifymiss) { 1128 printf("in6_pcblookup_listen: laddr= lport=%d\n", 1129 ntohs(lport)); 1130 } 1131 #endif 1132 /* 1133 * Move this PCB to the head of hash chain so that 1134 * repeated accesses are quicker. This is analogous to 1135 * the historic single-entry PCB cache. 1136 */ 1137 if (inp != NULL && inp != LIST_FIRST(head)) { 1138 LIST_REMOVE(inp, inp_hash); 1139 LIST_INSERT_HEAD(head, inp, inp_hash); 1140 } 1141 return (inp); 1142 } 1143 #endif /* INET6 */ 1144