1 /* $OpenBSD: in_pcb.c,v 1.108 2009/11/13 20:54:05 claudio Exp $ */ 2 /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "pf.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/proc.h> 80 #include <sys/domain.h> 81 #include <sys/pool.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 #include <net/pfvar.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/ip.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip_var.h> 93 #include <dev/rndvar.h> 94 95 #include <sys/mount.h> 96 #include <nfs/nfsproto.h> 97 98 #ifdef INET6 99 #include <netinet6/ip6_var.h> 100 #endif /* INET6 */ 101 #ifdef IPSEC 102 #include <netinet/ip_esp.h> 103 #endif /* IPSEC */ 104 105 struct in_addr zeroin_addr; 106 107 extern int ipsec_auth_default_level; 108 extern int ipsec_esp_trans_default_level; 109 extern int ipsec_esp_network_default_level; 110 extern int ipsec_ipcomp_default_level; 111 112 /* 113 * These configure the range of local port addresses assigned to 114 * "unspecified" outgoing connections/packets/whatever. 115 */ 116 int ipport_firstauto = IPPORT_RESERVED; 117 int ipport_lastauto = IPPORT_USERRESERVED; 118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; 119 int ipport_hilastauto = IPPORT_HILASTAUTO; 120 121 struct pool inpcb_pool; 122 int inpcb_pool_initialized = 0; 123 124 #define INPCBHASH(table, faddr, fport, laddr, lport, rdom) \ 125 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \ 126 ntohs((fport)) + ntohs((lport)) + (rdom)) & (table->inpt_hash)] 127 128 #define IN6PCBHASH(table, faddr, fport, laddr, lport) \ 129 &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \ 130 (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \ 131 (table->inpt_hash)] 132 133 #define INPCBLHASH(table, lport, rdom) \ 134 &(table)->inpt_lhashtbl[(ntohs((lport)) + (rdom)) & table->inpt_lhash] 135 136 void 137 in_pcbinit(table, hashsize) 138 struct inpcbtable *table; 139 int hashsize; 140 { 141 142 CIRCLEQ_INIT(&table->inpt_queue); 143 table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 144 &table->inpt_hash); 145 if (table->inpt_hashtbl == NULL) 146 panic("in_pcbinit: hashinit failed"); 147 table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 148 &table->inpt_lhash); 149 if (table->inpt_lhashtbl == NULL) 150 panic("in_pcbinit: hashinit failed for lport"); 151 table->inpt_lastport = 0; 152 } 153 154 struct baddynamicports baddynamicports; 155 156 /* 157 * Check if the specified port is invalid for dynamic allocation. 158 */ 159 int 160 in_baddynamic(u_int16_t port, u_int16_t proto) 161 { 162 switch (proto) { 163 case IPPROTO_TCP: 164 return (DP_ISSET(baddynamicports.tcp, port)); 165 case IPPROTO_UDP: 166 #ifdef IPSEC 167 /* Cannot preset this as it is a sysctl */ 168 if (port == udpencap_port) 169 return (1); 170 #endif 171 return (DP_ISSET(baddynamicports.udp, port)); 172 default: 173 return (0); 174 } 175 } 176 177 int 178 in_pcballoc(so, v) 179 struct socket *so; 180 void *v; 181 { 182 struct inpcbtable *table = v; 183 struct inpcb *inp; 184 int s; 185 186 if (inpcb_pool_initialized == 0) { 187 pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, 188 "inpcbpl", NULL); 189 inpcb_pool_initialized = 1; 190 } 191 inp = pool_get(&inpcb_pool, PR_NOWAIT); 192 if (inp == NULL) 193 return (ENOBUFS); 194 bzero((caddr_t)inp, sizeof(*inp)); 195 inp->inp_table = table; 196 inp->inp_socket = so; 197 inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level; 198 inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level; 199 inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level; 200 inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level; 201 s = splnet(); 202 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); 203 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, 204 inp->inp_rdomain), inp, inp_lhash); 205 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport, 206 &inp->inp_laddr, inp->inp_lport, inp->inp_rdomain), 207 inp, inp_hash); 208 splx(s); 209 so->so_pcb = inp; 210 inp->inp_hops = -1; 211 212 #ifdef INET6 213 /* 214 * Small change in this function to set the INP_IPV6 flag so routines 215 * outside pcb-specific routines don't need to use sotopf(), and all 216 * of its pointer chasing, later. 217 */ 218 if (sotopf(so) == PF_INET6) 219 inp->inp_flags = INP_IPV6; 220 inp->in6p_cksum = -1; 221 #endif /* INET6 */ 222 return (0); 223 } 224 225 int 226 in_pcbbind(v, nam, p) 227 void *v; 228 struct mbuf *nam; 229 struct proc *p; 230 { 231 struct inpcb *inp = v; 232 struct socket *so = inp->inp_socket; 233 struct inpcbtable *table = inp->inp_table; 234 u_int16_t *lastport = &inp->inp_table->inpt_lastport; 235 struct sockaddr_in *sin; 236 u_int16_t lport = 0; 237 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 238 int error; 239 240 #ifdef INET6 241 if (sotopf(so) == PF_INET6) 242 return in6_pcbbind(inp, nam, p); 243 #endif /* INET6 */ 244 245 if (TAILQ_EMPTY(&in_ifaddr)) 246 return (EADDRNOTAVAIL); 247 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 248 return (EINVAL); 249 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 250 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 251 (so->so_options & SO_ACCEPTCONN) == 0)) 252 wild = INPLOOKUP_WILDCARD; 253 if (nam) { 254 sin = mtod(nam, struct sockaddr_in *); 255 if (nam->m_len != sizeof (*sin)) 256 return (EINVAL); 257 #ifdef notdef 258 /* 259 * We should check the family, but old programs 260 * incorrectly fail to initialize it. 261 */ 262 if (sin->sin_family != AF_INET) 263 return (EAFNOSUPPORT); 264 #endif 265 lport = sin->sin_port; 266 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 267 /* 268 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 269 * allow complete duplication of binding if 270 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 271 * and a multicast address is bound on both 272 * new and duplicated sockets. 273 */ 274 if (so->so_options & SO_REUSEADDR) 275 reuseport = SO_REUSEADDR|SO_REUSEPORT; 276 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 277 sin->sin_port = 0; /* yech... */ 278 if (!(so->so_options & SO_BINDANY) && 279 in_iawithaddr(sin->sin_addr, NULL, 280 inp->inp_rdomain) == 0) 281 return (EADDRNOTAVAIL); 282 } 283 if (lport) { 284 struct inpcb *t; 285 286 /* GROSS */ 287 if (ntohs(lport) < IPPORT_RESERVED && 288 (error = suser(p, 0))) 289 return (EACCES); 290 if (so->so_euid) { 291 t = in_pcblookup(table, &zeroin_addr, 0, 292 &sin->sin_addr, lport, INPLOOKUP_WILDCARD, 293 inp->inp_rdomain); 294 if (t && (so->so_euid != t->inp_socket->so_euid)) 295 return (EADDRINUSE); 296 } 297 t = in_pcblookup(table, &zeroin_addr, 0, 298 &sin->sin_addr, lport, wild, inp->inp_rdomain); 299 if (t && (reuseport & t->inp_socket->so_options) == 0) 300 return (EADDRINUSE); 301 } 302 inp->inp_laddr = sin->sin_addr; 303 } 304 if (lport == 0) { 305 u_int16_t first, last; 306 int count; 307 308 if (inp->inp_flags & INP_HIGHPORT) { 309 first = ipport_hifirstauto; /* sysctl */ 310 last = ipport_hilastauto; 311 } else if (inp->inp_flags & INP_LOWPORT) { 312 if ((error = suser(p, 0))) 313 return (EACCES); 314 first = IPPORT_RESERVED-1; /* 1023 */ 315 last = 600; /* not IPPORT_RESERVED/2 */ 316 } else { 317 first = ipport_firstauto; /* sysctl */ 318 last = ipport_lastauto; 319 } 320 321 /* 322 * Simple check to ensure all ports are not used up causing 323 * a deadlock here. 324 * 325 * We split the two cases (up and down) so that the direction 326 * is not being tested on each round of the loop. 327 */ 328 329 if (first > last) { 330 /* 331 * counting down 332 */ 333 count = first - last; 334 if (count) 335 *lastport = first - arc4random_uniform(count); 336 337 do { 338 if (count-- < 0) /* completely used? */ 339 return (EADDRNOTAVAIL); 340 --*lastport; 341 if (*lastport > first || *lastport < last) 342 *lastport = first; 343 lport = htons(*lastport); 344 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 345 in_pcblookup(table, &zeroin_addr, 0, 346 &inp->inp_laddr, lport, wild, inp->inp_rdomain)); 347 } else { 348 /* 349 * counting up 350 */ 351 count = last - first; 352 if (count) 353 *lastport = first + arc4random_uniform(count); 354 355 do { 356 if (count-- < 0) /* completely used? */ 357 return (EADDRNOTAVAIL); 358 ++*lastport; 359 if (*lastport < first || *lastport > last) 360 *lastport = first; 361 lport = htons(*lastport); 362 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 363 in_pcblookup(table, &zeroin_addr, 0, 364 &inp->inp_laddr, lport, wild, inp->inp_rdomain)); 365 } 366 } 367 inp->inp_lport = lport; 368 in_pcbrehash(inp); 369 return (0); 370 } 371 372 /* 373 * Connect from a socket to a specified address. 374 * Both address and port must be specified in argument sin. 375 * If don't have a local address for this socket yet, 376 * then pick one. 377 */ 378 int 379 in_pcbconnect(v, nam) 380 void *v; 381 struct mbuf *nam; 382 { 383 struct inpcb *inp = v; 384 struct sockaddr_in *ifaddr = NULL; 385 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); 386 387 #ifdef INET6 388 if (sotopf(inp->inp_socket) == PF_INET6) 389 return (in6_pcbconnect(inp, nam)); 390 if ((inp->inp_flags & INP_IPV6) != 0) 391 panic("IPv6 pcb passed into in_pcbconnect"); 392 #endif /* INET6 */ 393 394 if (nam->m_len != sizeof (*sin)) 395 return (EINVAL); 396 if (sin->sin_family != AF_INET) 397 return (EAFNOSUPPORT); 398 if (sin->sin_port == 0) 399 return (EADDRNOTAVAIL); 400 if (!TAILQ_EMPTY(&in_ifaddr)) { 401 /* 402 * If the destination address is INADDR_ANY, 403 * use the primary local address. 404 * If the supplied address is INADDR_BROADCAST, 405 * and the primary interface supports broadcast, 406 * choose the broadcast address for that interface. 407 */ 408 if (sin->sin_addr.s_addr == INADDR_ANY) 409 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr; 410 else if (sin->sin_addr.s_addr == INADDR_BROADCAST && 411 (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST)) 412 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr; 413 } 414 if (inp->inp_laddr.s_addr == INADDR_ANY) { 415 int error; 416 ifaddr = in_selectsrc(sin, &inp->inp_route, 417 inp->inp_socket->so_options, inp->inp_moptions, &error, 418 inp->inp_rdomain); 419 if (ifaddr == NULL) { 420 if (error == 0) 421 error = EADDRNOTAVAIL; 422 return error; 423 } 424 } 425 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port, 426 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, 427 inp->inp_lport, inp->inp_rdomain) != 0) 428 return (EADDRINUSE); 429 if (inp->inp_laddr.s_addr == INADDR_ANY) { 430 if (inp->inp_lport == 0 && 431 in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL) 432 return (EADDRNOTAVAIL); 433 inp->inp_laddr = ifaddr->sin_addr; 434 } 435 inp->inp_faddr = sin->sin_addr; 436 inp->inp_fport = sin->sin_port; 437 in_pcbrehash(inp); 438 #ifdef IPSEC 439 { 440 int error; /* This is just ignored */ 441 442 /* Cause an IPsec SA to be established. */ 443 ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT, 444 NULL, inp, NULL); 445 } 446 #endif 447 return (0); 448 } 449 450 void 451 in_pcbdisconnect(v) 452 void *v; 453 { 454 struct inpcb *inp = v; 455 456 switch (sotopf(inp->inp_socket)) { 457 #ifdef INET6 458 case PF_INET6: 459 inp->inp_faddr6 = in6addr_any; 460 break; 461 #endif 462 case PF_INET: 463 inp->inp_faddr.s_addr = INADDR_ANY; 464 break; 465 } 466 467 inp->inp_fport = 0; 468 in_pcbrehash(inp); 469 if (inp->inp_socket->so_state & SS_NOFDREF) 470 in_pcbdetach(inp); 471 } 472 473 void 474 in_pcbdetach(v) 475 void *v; 476 { 477 struct inpcb *inp = v; 478 struct socket *so = inp->inp_socket; 479 int s; 480 481 so->so_pcb = 0; 482 sofree(so); 483 if (inp->inp_options) 484 m_freem(inp->inp_options); 485 if (inp->inp_route.ro_rt) 486 rtfree(inp->inp_route.ro_rt); 487 #ifdef INET6 488 if (inp->inp_flags & INP_IPV6) { 489 ip6_freepcbopts(inp->inp_outputopts6); 490 ip6_freemoptions(inp->inp_moptions6); 491 } else 492 #endif 493 ip_freemoptions(inp->inp_moptions); 494 #ifdef IPSEC 495 /* IPsec cleanup here */ 496 s = spltdb(); 497 if (inp->inp_tdb_in) 498 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 499 inp, inp_tdb_in_next); 500 if (inp->inp_tdb_out) 501 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp, 502 inp_tdb_out_next); 503 if (inp->inp_ipsec_remotecred) 504 ipsp_reffree(inp->inp_ipsec_remotecred); 505 if (inp->inp_ipsec_remoteauth) 506 ipsp_reffree(inp->inp_ipsec_remoteauth); 507 if (inp->inp_ipo) 508 ipsec_delete_policy(inp->inp_ipo); 509 splx(s); 510 #endif 511 #if NPF > 0 512 if (inp->inp_pf_sk) 513 ((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL; 514 #endif 515 s = splnet(); 516 LIST_REMOVE(inp, inp_lhash); 517 LIST_REMOVE(inp, inp_hash); 518 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); 519 splx(s); 520 pool_put(&inpcb_pool, inp); 521 } 522 523 void 524 in_setsockaddr(inp, nam) 525 struct inpcb *inp; 526 struct mbuf *nam; 527 { 528 struct sockaddr_in *sin; 529 530 nam->m_len = sizeof (*sin); 531 sin = mtod(nam, struct sockaddr_in *); 532 bzero((caddr_t)sin, sizeof (*sin)); 533 sin->sin_family = AF_INET; 534 sin->sin_len = sizeof(*sin); 535 sin->sin_port = inp->inp_lport; 536 sin->sin_addr = inp->inp_laddr; 537 } 538 539 void 540 in_setpeeraddr(inp, nam) 541 struct inpcb *inp; 542 struct mbuf *nam; 543 { 544 struct sockaddr_in *sin; 545 546 #ifdef INET6 547 if (sotopf(inp->inp_socket) == PF_INET6) { 548 in6_setpeeraddr(inp, nam); 549 return; 550 } 551 #endif /* INET6 */ 552 553 nam->m_len = sizeof (*sin); 554 sin = mtod(nam, struct sockaddr_in *); 555 bzero((caddr_t)sin, sizeof (*sin)); 556 sin->sin_family = AF_INET; 557 sin->sin_len = sizeof(*sin); 558 sin->sin_port = inp->inp_fport; 559 sin->sin_addr = inp->inp_faddr; 560 } 561 562 /* 563 * Pass some notification to all connections of a protocol 564 * associated with address dst. The "usual action" will be 565 * taken, depending on the ctlinput cmd. The caller must filter any 566 * cmds that are uninteresting (e.g., no error in the map). 567 * Call the protocol specific routine (if any) to report 568 * any errors for each matching socket. 569 * 570 * Must be called at splsoftnet. 571 */ 572 void 573 in_pcbnotifyall(table, dst, rdomain, errno, notify) 574 struct inpcbtable *table; 575 struct sockaddr *dst; 576 u_int rdomain; 577 int errno; 578 void (*notify)(struct inpcb *, int); 579 { 580 struct inpcb *inp, *oinp; 581 struct in_addr faddr; 582 583 splsoftassert(IPL_SOFTNET); 584 585 #ifdef INET6 586 /* 587 * See in6_pcbnotify() for IPv6 codepath. By the time this 588 * gets called, the addresses passed are either definitely IPv4 or 589 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. 590 */ 591 #endif /* INET6 */ 592 593 if (dst->sa_family != AF_INET) 594 return; 595 faddr = satosin(dst)->sin_addr; 596 if (faddr.s_addr == INADDR_ANY) 597 return; 598 599 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 600 inp != CIRCLEQ_END(&table->inpt_queue);) { 601 #ifdef INET6 602 if (inp->inp_flags & INP_IPV6) { 603 inp = CIRCLEQ_NEXT(inp, inp_queue); 604 continue; 605 } 606 #endif 607 if (inp->inp_faddr.s_addr != faddr.s_addr || 608 inp->inp_rdomain != rdomain || 609 inp->inp_socket == 0) { 610 inp = CIRCLEQ_NEXT(inp, inp_queue); 611 continue; 612 } 613 oinp = inp; 614 inp = CIRCLEQ_NEXT(inp, inp_queue); 615 if (notify) 616 (*notify)(oinp, errno); 617 } 618 } 619 620 /* 621 * Check for alternatives when higher level complains 622 * about service problems. For now, invalidate cached 623 * routing information. If the route was created dynamically 624 * (by a redirect), time to try a default gateway again. 625 */ 626 void 627 in_losing(inp) 628 struct inpcb *inp; 629 { 630 struct rtentry *rt; 631 struct rt_addrinfo info; 632 633 if ((rt = inp->inp_route.ro_rt)) { 634 inp->inp_route.ro_rt = 0; 635 bzero((caddr_t)&info, sizeof(info)); 636 info.rti_flags = rt->rt_flags; 637 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; 638 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 639 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 640 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0, 641 inp->inp_rdomain); 642 if (rt->rt_flags & RTF_DYNAMIC) 643 (void)rtrequest1(RTM_DELETE, &info, rt->rt_priority, 644 (struct rtentry **)0, inp->inp_rdomain); 645 /* 646 * A new route can be allocated 647 * the next time output is attempted. 648 * rtfree() needs to be called in anycase because the inp 649 * is still holding a reference to rt. 650 */ 651 rtfree(rt); 652 } 653 } 654 655 /* 656 * After a routing change, flush old routing 657 * and allocate a (hopefully) better one. 658 */ 659 void 660 in_rtchange(inp, errno) 661 struct inpcb *inp; 662 int errno; 663 { 664 if (inp->inp_route.ro_rt) { 665 rtfree(inp->inp_route.ro_rt); 666 inp->inp_route.ro_rt = 0; 667 /* 668 * A new route can be allocated the next time 669 * output is attempted. 670 */ 671 } 672 } 673 674 struct inpcb * 675 in_pcblookup(struct inpcbtable *table, void *faddrp, u_int fport_arg, void *laddrp, u_int lport_arg, int flags, u_int rdomain) 676 { 677 struct inpcb *inp, *match = 0; 678 int matchwild = 3, wildcard; 679 u_int16_t fport = fport_arg, lport = lport_arg; 680 struct in_addr faddr = *(struct in_addr *)faddrp; 681 struct in_addr laddr = *(struct in_addr *)laddrp; 682 683 rdomain = rtable_l2(rdomain); 684 for (inp = LIST_FIRST(INPCBLHASH(table, lport, rdomain)); inp; 685 inp = LIST_NEXT(inp, inp_lhash)) { 686 if (inp->inp_rdomain != rdomain) 687 continue; 688 if (inp->inp_lport != lport) 689 continue; 690 wildcard = 0; 691 #ifdef INET6 692 if (flags & INPLOOKUP_IPV6) { 693 struct in6_addr *laddr6 = (struct in6_addr *)laddrp; 694 struct in6_addr *faddr6 = (struct in6_addr *)faddrp; 695 696 if (!(inp->inp_flags & INP_IPV6)) 697 continue; 698 699 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 700 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 701 wildcard++; 702 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6)) 703 continue; 704 } else { 705 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 706 wildcard++; 707 } 708 709 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { 710 if (IN6_IS_ADDR_UNSPECIFIED(faddr6)) 711 wildcard++; 712 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, 713 faddr6) || inp->inp_fport != fport) 714 continue; 715 } else { 716 if (!IN6_IS_ADDR_UNSPECIFIED(faddr6)) 717 wildcard++; 718 } 719 } else 720 #endif /* INET6 */ 721 { 722 #ifdef INET6 723 if (inp->inp_flags & INP_IPV6) 724 continue; 725 #endif /* INET6 */ 726 727 if (inp->inp_faddr.s_addr != INADDR_ANY) { 728 if (faddr.s_addr == INADDR_ANY) 729 wildcard++; 730 else if (inp->inp_faddr.s_addr != faddr.s_addr || 731 inp->inp_fport != fport) 732 continue; 733 } else { 734 if (faddr.s_addr != INADDR_ANY) 735 wildcard++; 736 } 737 if (inp->inp_laddr.s_addr != INADDR_ANY) { 738 if (laddr.s_addr == INADDR_ANY) 739 wildcard++; 740 else if (inp->inp_laddr.s_addr != laddr.s_addr) 741 continue; 742 } else { 743 if (laddr.s_addr != INADDR_ANY) 744 wildcard++; 745 } 746 } 747 if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) && 748 wildcard < matchwild) { 749 match = inp; 750 if ((matchwild = wildcard) == 0) 751 break; 752 } 753 } 754 return (match); 755 } 756 757 struct rtentry * 758 in_pcbrtentry(inp) 759 struct inpcb *inp; 760 { 761 struct route *ro; 762 763 ro = &inp->inp_route; 764 765 /* 766 * No route yet, so try to acquire one. 767 */ 768 if (ro->ro_rt == NULL) { 769 #ifdef INET6 770 bzero(ro, sizeof(struct route_in6)); 771 #else 772 bzero(ro, sizeof(struct route)); 773 #endif 774 775 switch(sotopf(inp->inp_socket)) { 776 #ifdef INET6 777 case PF_INET6: 778 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 779 break; 780 ro->ro_dst.sa_family = AF_INET6; 781 ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); 782 ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr = 783 inp->inp_faddr6; 784 rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0); 785 break; 786 #endif /* INET6 */ 787 case PF_INET: 788 if (inp->inp_faddr.s_addr == INADDR_ANY) 789 break; 790 ro->ro_dst.sa_family = AF_INET; 791 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 792 satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; 793 rtalloc_mpath(ro, &inp->inp_laddr.s_addr, 794 inp->inp_rdomain); 795 break; 796 } 797 } 798 return (ro->ro_rt); 799 } 800 801 struct sockaddr_in * 802 in_selectsrc(struct sockaddr_in *sin, struct route *ro, int soopts, 803 struct ip_moptions *mopts, int *errorp, u_int rdomain) 804 { 805 struct sockaddr_in *sin2; 806 struct in_ifaddr *ia; 807 808 ia = (struct in_ifaddr *)0; 809 /* 810 * If route is known or can be allocated now, 811 * our src addr is taken from the i/f, else punt. 812 */ 813 if (ro->ro_rt && 814 (satosin(&ro->ro_dst)->sin_addr.s_addr != 815 sin->sin_addr.s_addr || 816 soopts & SO_DONTROUTE)) { 817 RTFREE(ro->ro_rt); 818 ro->ro_rt = (struct rtentry *)0; 819 } 820 if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/ 821 (ro->ro_rt == (struct rtentry *)0 || 822 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 823 /* No route yet, so try to acquire one */ 824 ro->ro_dst.sa_family = AF_INET; 825 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 826 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; 827 rtalloc_mpath(ro, NULL, rdomain); 828 829 /* 830 * It is important to bzero out the rest of the 831 * struct sockaddr_in when mixing v6 & v4! 832 */ 833 sin2 = (struct sockaddr_in *)&ro->ro_dst; 834 bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); 835 } 836 /* 837 * If we found a route, use the address 838 * corresponding to the outgoing interface 839 * unless it is the loopback (in case a route 840 * to our address on another net goes to loopback). 841 */ 842 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 843 ia = ifatoia(ro->ro_rt->rt_ifa); 844 if (ia == 0) { 845 u_int16_t fport = sin->sin_port; 846 847 sin->sin_port = 0; 848 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin), rdomain)); 849 if (ia == 0) 850 ia = ifatoia(ifa_ifwithnet(sintosa(sin), rdomain)); 851 sin->sin_port = fport; 852 if (ia == 0) 853 ia = TAILQ_FIRST(&in_ifaddr); 854 if (ia == 0) { 855 *errorp = EADDRNOTAVAIL; 856 return NULL; 857 } 858 } 859 /* 860 * If the destination address is multicast and an outgoing 861 * interface has been set as a multicast option, use the 862 * address of that interface as our source address. 863 */ 864 if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) { 865 struct ip_moptions *imo; 866 struct ifnet *ifp; 867 868 imo = mopts; 869 if (imo->imo_multicast_ifp != NULL) { 870 ifp = imo->imo_multicast_ifp; 871 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) 872 if (ia->ia_ifp == ifp) 873 break; 874 if (ia == 0) { 875 *errorp = EADDRNOTAVAIL; 876 return NULL; 877 } 878 } 879 } 880 return satosin(&ia->ia_addr); 881 } 882 883 void 884 in_pcbrehash(inp) 885 struct inpcb *inp; 886 { 887 struct inpcbtable *table = inp->inp_table; 888 int s; 889 890 s = splnet(); 891 LIST_REMOVE(inp, inp_lhash); 892 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain), 893 inp, inp_lhash); 894 LIST_REMOVE(inp, inp_hash); 895 #ifdef INET6 896 if (inp->inp_flags & INP_IPV6) { 897 LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6, 898 inp->inp_fport, &inp->inp_laddr6, inp->inp_lport), 899 inp, inp_hash); 900 } else { 901 #endif /* INET6 */ 902 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, 903 inp->inp_fport, &inp->inp_laddr, inp->inp_lport, 904 inp->inp_rdomain), inp, inp_hash); 905 #ifdef INET6 906 } 907 #endif /* INET6 */ 908 splx(s); 909 } 910 911 #ifdef DIAGNOSTIC 912 int in_pcbnotifymiss = 0; 913 #endif 914 915 /* 916 * The in(6)_pcbhashlookup functions are used to locate connected sockets 917 * quickly: 918 * faddr.fport <-> laddr.lport 919 * No wildcard matching is done so that listening sockets are not found. 920 * If the functions return NULL in(6)_pcblookup_listen can be used to 921 * find a listening/bound socket that may accept the connection. 922 * After those two lookups no other are necessary. 923 */ 924 struct inpcb * 925 in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr, 926 u_int fport_arg, struct in_addr laddr, u_int lport_arg, u_int rdomain) 927 { 928 struct inpcbhead *head; 929 struct inpcb *inp; 930 u_int16_t fport = fport_arg, lport = lport_arg; 931 932 rdomain = rtable_l2(rdomain); 933 head = INPCBHASH(table, &faddr, fport, &laddr, lport, rdomain); 934 LIST_FOREACH(inp, head, inp_hash) { 935 #ifdef INET6 936 if (inp->inp_flags & INP_IPV6) 937 continue; /*XXX*/ 938 #endif 939 if (inp->inp_faddr.s_addr == faddr.s_addr && 940 inp->inp_fport == fport && 941 inp->inp_lport == lport && 942 inp->inp_laddr.s_addr == laddr.s_addr && 943 inp->inp_rdomain == rdomain) { 944 /* 945 * Move this PCB to the head of hash chain so that 946 * repeated accesses are quicker. This is analogous to 947 * the historic single-entry PCB cache. 948 */ 949 if (inp != LIST_FIRST(head)) { 950 LIST_REMOVE(inp, inp_hash); 951 LIST_INSERT_HEAD(head, inp, inp_hash); 952 } 953 break; 954 } 955 } 956 #ifdef DIAGNOSTIC 957 if (inp == NULL && in_pcbnotifymiss) { 958 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%d\n", 959 ntohl(faddr.s_addr), ntohs(fport), 960 ntohl(laddr.s_addr), ntohs(lport), rdomain); 961 } 962 #endif 963 return (inp); 964 } 965 966 #ifdef INET6 967 struct inpcb * 968 in6_pcbhashlookup(struct inpcbtable *table, struct in6_addr *faddr, 969 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg) 970 { 971 struct inpcbhead *head; 972 struct inpcb *inp; 973 u_int16_t fport = fport_arg, lport = lport_arg; 974 975 head = IN6PCBHASH(table, faddr, fport, laddr, lport); 976 LIST_FOREACH(inp, head, inp_hash) { 977 if (!(inp->inp_flags & INP_IPV6)) 978 continue; 979 if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && 980 inp->inp_fport == fport && inp->inp_lport == lport && 981 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) { 982 /* 983 * Move this PCB to the head of hash chain so that 984 * repeated accesses are quicker. This is analogous to 985 * the historic single-entry PCB cache. 986 */ 987 if (inp != LIST_FIRST(head)) { 988 LIST_REMOVE(inp, inp_hash); 989 LIST_INSERT_HEAD(head, inp, inp_hash); 990 } 991 break; 992 } 993 } 994 #ifdef DIAGNOSTIC 995 if (inp == NULL && in_pcbnotifymiss) { 996 printf("in6_pcbhashlookup: faddr="); 997 printf(" fport=%d laddr=", ntohs(fport)); 998 printf(" lport=%d\n", ntohs(lport)); 999 } 1000 #endif 1001 return (inp); 1002 } 1003 #endif /* INET6 */ 1004 1005 /* 1006 * The in(6)_pcblookup_listen functions are used to locate listening 1007 * sockets quickly. This are sockets with unspecified foreign address 1008 * and port: 1009 * *.* <-> laddr.lport 1010 * *.* <-> *.lport 1011 */ 1012 struct inpcb * 1013 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, 1014 u_int lport_arg, int reverse, struct mbuf *m, u_int rdomain) 1015 { 1016 struct inpcbhead *head; 1017 struct in_addr *key1, *key2; 1018 struct inpcb *inp; 1019 u_int16_t lport = lport_arg; 1020 1021 rdomain = rtable_l2(rdomain); 1022 #if NPF > 0 1023 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1024 struct pf_divert *divert; 1025 /* XXX rdomain */ 1026 if ((divert = pf_find_divert(m)) == NULL) 1027 return (NULL); 1028 key1 = key2 = &divert->addr.ipv4; 1029 lport = divert->port; 1030 } else 1031 #endif 1032 if (reverse) { 1033 key1 = &zeroin_addr; 1034 key2 = &laddr; 1035 } else { 1036 key1 = &laddr; 1037 key2 = &zeroin_addr; 1038 } 1039 1040 head = INPCBHASH(table, &zeroin_addr, 0, key1, lport, rdomain); 1041 LIST_FOREACH(inp, head, inp_hash) { 1042 #ifdef INET6 1043 if (inp->inp_flags & INP_IPV6) 1044 continue; /*XXX*/ 1045 #endif 1046 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1047 inp->inp_laddr.s_addr == key1->s_addr && 1048 inp->inp_faddr.s_addr == INADDR_ANY && 1049 inp->inp_rdomain == rdomain) 1050 break; 1051 } 1052 if (inp == NULL && key1->s_addr != key2->s_addr) { 1053 head = INPCBHASH(table, &zeroin_addr, 0, key2, lport, rdomain); 1054 LIST_FOREACH(inp, head, inp_hash) { 1055 #ifdef INET6 1056 if (inp->inp_flags & INP_IPV6) 1057 continue; /*XXX*/ 1058 #endif 1059 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1060 inp->inp_laddr.s_addr == key2->s_addr && 1061 inp->inp_faddr.s_addr == INADDR_ANY && 1062 inp->inp_rdomain == rdomain) 1063 break; 1064 } 1065 } 1066 #ifdef DIAGNOSTIC 1067 if (inp == NULL && in_pcbnotifymiss) { 1068 printf("in_pcblookup_listen: laddr=%08x lport=%d\n", 1069 ntohl(laddr.s_addr), ntohs(lport)); 1070 } 1071 #endif 1072 /* 1073 * Move this PCB to the head of hash chain so that 1074 * repeated accesses are quicker. This is analogous to 1075 * the historic single-entry PCB cache. 1076 */ 1077 if (inp != NULL && inp != LIST_FIRST(head)) { 1078 LIST_REMOVE(inp, inp_hash); 1079 LIST_INSERT_HEAD(head, inp, inp_hash); 1080 } 1081 return (inp); 1082 } 1083 1084 #ifdef INET6 1085 struct inpcb * 1086 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, 1087 u_int lport_arg, int reverse, struct mbuf *m) 1088 { 1089 struct inpcbhead *head; 1090 struct in6_addr *key1, *key2; 1091 struct inpcb *inp; 1092 u_int16_t lport = lport_arg; 1093 1094 #if NPF > 0 1095 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1096 struct pf_divert *divert; 1097 1098 if ((divert = pf_find_divert(m)) == NULL) 1099 return (NULL); 1100 key1 = key2 = &divert->addr.ipv6; 1101 lport = divert->port; 1102 } else 1103 #endif 1104 if (reverse) { 1105 key1 = &zeroin6_addr; 1106 key2 = laddr; 1107 } else { 1108 key1 = laddr; 1109 key2 = &zeroin6_addr; 1110 } 1111 1112 head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport); 1113 LIST_FOREACH(inp, head, inp_hash) { 1114 if (!(inp->inp_flags & INP_IPV6)) 1115 continue; 1116 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1117 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) && 1118 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1119 break; 1120 } 1121 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { 1122 head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport); 1123 LIST_FOREACH(inp, head, inp_hash) { 1124 if (!(inp->inp_flags & INP_IPV6)) 1125 continue; 1126 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1127 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) && 1128 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1129 break; 1130 } 1131 } 1132 #ifdef DIAGNOSTIC 1133 if (inp == NULL && in_pcbnotifymiss) { 1134 printf("in6_pcblookup_listen: laddr= lport=%d\n", 1135 ntohs(lport)); 1136 } 1137 #endif 1138 /* 1139 * Move this PCB to the head of hash chain so that 1140 * repeated accesses are quicker. This is analogous to 1141 * the historic single-entry PCB cache. 1142 */ 1143 if (inp != NULL && inp != LIST_FIRST(head)) { 1144 LIST_REMOVE(inp, inp_hash); 1145 LIST_INSERT_HEAD(head, inp, inp_hash); 1146 } 1147 return (inp); 1148 } 1149 #endif /* INET6 */ 1150