1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 68 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.48 2008/11/08 03:38:23 sephe Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/proc.h> 83 #include <sys/priv.h> 84 #include <sys/jail.h> 85 #include <sys/kernel.h> 86 #include <sys/sysctl.h> 87 88 #include <sys/thread2.h> 89 #include <sys/socketvar2.h> 90 #include <sys/msgport2.h> 91 92 #include <machine/limits.h> 93 94 #include <net/if.h> 95 #include <net/if_types.h> 96 #include <net/route.h> 97 98 #include <netinet/in.h> 99 #include <netinet/in_pcb.h> 100 #include <netinet/in_var.h> 101 #include <netinet/ip_var.h> 102 #ifdef INET6 103 #include <netinet/ip6.h> 104 #include <netinet6/ip6_var.h> 105 #endif /* INET6 */ 106 107 #ifdef IPSEC 108 #include <netinet6/ipsec.h> 109 #include <netproto/key/key.h> 110 #include <netproto/ipsec/esp_var.h> 111 #endif 112 113 #ifdef FAST_IPSEC 114 #if defined(IPSEC) || defined(IPSEC_ESP) 115 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 116 #endif 117 118 #include <netproto/ipsec/ipsec.h> 119 #include <netproto/ipsec/key.h> 120 #define IPSEC 121 #endif /* FAST_IPSEC */ 122 123 struct in_addr zeroin_addr; 124 125 /* 126 * These configure the range of local port addresses assigned to 127 * "unspecified" outgoing connections/packets/whatever. 128 */ 129 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 130 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 131 132 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 133 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 134 135 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 136 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 137 138 #define RANGECHK(var, min, max) \ 139 if ((var) < (min)) { (var) = (min); } \ 140 else if ((var) > (max)) { (var) = (max); } 141 142 int udpencap_enable = 1; /* enabled by default */ 143 int udpencap_port = 4500; /* triggers decapsulation */ 144 145 static int 146 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 147 { 148 int error; 149 150 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 151 if (!error) { 152 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 153 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 154 155 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 156 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 157 158 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 159 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 160 } 161 return (error); 162 } 163 164 #undef RANGECHK 165 166 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 167 168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 169 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 171 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 173 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 175 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 177 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 178 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 179 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 180 181 /* 182 * in_pcb.c: manage the Protocol Control Blocks. 183 * 184 * NOTE: It is assumed that most of these functions will be called from 185 * a critical section. XXX - There are, unfortunately, a few exceptions 186 * to this rule that should be fixed. 187 * 188 * NOTE: The caller should initialize the cpu field to the cpu running the 189 * protocol stack associated with this inpcbinfo. 190 */ 191 192 void 193 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 194 { 195 LIST_INIT(&pcbinfo->pcblisthead); 196 pcbinfo->cpu = -1; 197 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 198 M_WAITOK | M_ZERO); 199 } 200 201 struct baddynamicports baddynamicports; 202 203 /* 204 * Check if the specified port is invalid for dynamic allocation. 205 */ 206 int 207 in_baddynamic(u_int16_t port, u_int16_t proto) 208 { 209 switch (proto) { 210 case IPPROTO_TCP: 211 return (DP_ISSET(baddynamicports.tcp, port)); 212 case IPPROTO_UDP: 213 #ifdef IPSEC 214 /* Cannot preset this as it is a sysctl */ 215 if (port == udpencap_port) 216 return (1); 217 #endif 218 return (DP_ISSET(baddynamicports.udp, port)); 219 default: 220 return (0); 221 } 222 } 223 224 225 /* 226 * Allocate a PCB and associate it with the socket. 227 */ 228 int 229 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 230 { 231 struct inpcb *inp; 232 #ifdef IPSEC 233 int error; 234 #endif 235 236 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO); 237 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 238 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 239 inp->inp_socket = so; 240 #ifdef IPSEC 241 error = ipsec_init_policy(so, &inp->inp_sp); 242 if (error != 0) { 243 kfree(inp, M_PCB); 244 return (error); 245 } 246 #endif 247 #ifdef INET6 248 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 249 inp->inp_flags |= IN6P_IPV6_V6ONLY; 250 if (ip6_auto_flowlabel) 251 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 252 #endif 253 soreference(so); 254 so->so_pcb = inp; 255 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 256 pcbinfo->ipi_count++; 257 return (0); 258 } 259 260 /* 261 * Unlink a pcb with the intention of moving it to another cpu with a 262 * different pcbinfo. While unlinked nothing should attempt to dereference 263 * inp_pcbinfo, NULL it out so we assert if it does. 264 */ 265 void 266 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 267 { 268 KKASSERT(inp->inp_pcbinfo == pcbinfo); 269 270 LIST_REMOVE(inp, inp_list); 271 pcbinfo->ipi_count--; 272 inp->inp_pcbinfo = NULL; 273 } 274 275 /* 276 * Relink a pcb into a new pcbinfo. 277 */ 278 void 279 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 280 { 281 KKASSERT(inp->inp_pcbinfo == NULL); 282 inp->inp_pcbinfo = pcbinfo; 283 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 284 pcbinfo->ipi_count++; 285 } 286 287 int 288 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 289 { 290 struct socket *so = inp->inp_socket; 291 struct proc *p = td->td_proc; 292 unsigned short *lastport; 293 struct sockaddr_in *sin; 294 struct sockaddr_in jsin; 295 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 296 struct ucred *cred = NULL; 297 u_short lport = 0; 298 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 299 int error; 300 301 KKASSERT(p); 302 303 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 304 return (EADDRNOTAVAIL); 305 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 306 return (EINVAL); /* already bound */ 307 308 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 309 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 310 if (p) 311 cred = p->p_ucred; 312 313 /* 314 * This has to be atomic. If the porthash is shared across multiple 315 * protocol threads (aka tcp) then the token will be non-NULL. 316 */ 317 if (pcbinfo->porttoken) 318 lwkt_gettoken(pcbinfo->porttoken); 319 320 if (nam != NULL) { 321 sin = (struct sockaddr_in *)nam; 322 if (nam->sa_len != sizeof *sin) { 323 error = EINVAL; 324 goto done; 325 } 326 #ifdef notdef 327 /* 328 * We should check the family, but old programs 329 * incorrectly fail to initialize it. 330 */ 331 if (sin->sin_family != AF_INET) { 332 error = EAFNOSUPPORT; 333 goto done; 334 } 335 #endif 336 if (!prison_replace_wildcards(td, nam)) { 337 error = EINVAL; 338 goto done; 339 } 340 lport = sin->sin_port; 341 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 342 /* 343 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 344 * allow complete duplication of binding if 345 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 346 * and a multicast address is bound on both 347 * new and duplicated sockets. 348 */ 349 if (so->so_options & SO_REUSEADDR) 350 reuseport = SO_REUSEADDR | SO_REUSEPORT; 351 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 352 sin->sin_port = 0; /* yech... */ 353 bzero(&sin->sin_zero, sizeof sin->sin_zero); 354 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 355 error = EADDRNOTAVAIL; 356 goto done; 357 } 358 } 359 if (lport != 0) { 360 struct inpcb *t; 361 362 /* GROSS */ 363 if (ntohs(lport) < IPPORT_RESERVED && 364 cred && 365 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 366 error = EACCES; 367 goto done; 368 } 369 if (so->so_cred->cr_uid != 0 && 370 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 371 t = in_pcblookup_local(pcbinfo, 372 sin->sin_addr, 373 lport, 374 INPLOOKUP_WILDCARD, 375 cred); 376 if (t && 377 (!in_nullhost(sin->sin_addr) || 378 !in_nullhost(t->inp_laddr) || 379 (t->inp_socket->so_options & 380 SO_REUSEPORT) == 0) && 381 (so->so_cred->cr_uid != 382 t->inp_socket->so_cred->cr_uid)) { 383 #ifdef INET6 384 if (!in_nullhost(sin->sin_addr) || 385 !in_nullhost(t->inp_laddr) || 386 INP_SOCKAF(so) == 387 INP_SOCKAF(t->inp_socket)) 388 #endif 389 { 390 error = EADDRINUSE; 391 goto done; 392 } 393 } 394 } 395 if (cred && !prison_replace_wildcards(td, nam)) { 396 error = EADDRNOTAVAIL; 397 goto done; 398 } 399 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 400 wild, cred); 401 if (t && !(reuseport & t->inp_socket->so_options)) { 402 #ifdef INET6 403 if (!in_nullhost(sin->sin_addr) || 404 !in_nullhost(t->inp_laddr) || 405 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 406 #endif 407 { 408 error = EADDRINUSE; 409 goto done; 410 } 411 } 412 } 413 inp->inp_laddr = sin->sin_addr; 414 } 415 if (lport == 0) { 416 ushort first, last; 417 int count; 418 419 jsin.sin_family = AF_INET; 420 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 421 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 422 inp->inp_laddr.s_addr = INADDR_ANY; 423 error = EINVAL; 424 goto done; 425 } 426 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 427 428 inp->inp_flags |= INP_ANONPORT; 429 430 if (inp->inp_flags & INP_HIGHPORT) { 431 first = ipport_hifirstauto; /* sysctl */ 432 last = ipport_hilastauto; 433 lastport = &pcbinfo->lasthi; 434 } else if (inp->inp_flags & INP_LOWPORT) { 435 if (cred && 436 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 437 inp->inp_laddr.s_addr = INADDR_ANY; 438 goto done; 439 } 440 first = ipport_lowfirstauto; /* 1023 */ 441 last = ipport_lowlastauto; /* 600 */ 442 lastport = &pcbinfo->lastlow; 443 } else { 444 first = ipport_firstauto; /* sysctl */ 445 last = ipport_lastauto; 446 lastport = &pcbinfo->lastport; 447 } 448 /* 449 * Simple check to ensure all ports are not used up causing 450 * a deadlock here. 451 * 452 * We split the two cases (up and down) so that the direction 453 * is not being tested on each round of the loop. 454 */ 455 if (first > last) { 456 /* 457 * counting down 458 */ 459 count = first - last; 460 461 do { 462 if (count-- < 0) { /* completely used? */ 463 inp->inp_laddr.s_addr = INADDR_ANY; 464 error = EADDRNOTAVAIL; 465 goto done; 466 } 467 --*lastport; 468 if (*lastport > first || *lastport < last) 469 *lastport = first; 470 lport = htons(*lastport); 471 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 472 lport, wild, cred)); 473 } else { 474 /* 475 * counting up 476 */ 477 count = last - first; 478 479 do { 480 if (count-- < 0) { /* completely used? */ 481 inp->inp_laddr.s_addr = INADDR_ANY; 482 error = EADDRNOTAVAIL; 483 goto done; 484 } 485 ++*lastport; 486 if (*lastport < first || *lastport > last) 487 *lastport = first; 488 lport = htons(*lastport); 489 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 490 lport, wild, cred)); 491 } 492 } 493 inp->inp_lport = lport; 494 495 jsin.sin_family = AF_INET; 496 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 497 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 498 inp->inp_laddr.s_addr = INADDR_ANY; 499 inp->inp_lport = 0; 500 error = EINVAL; 501 goto done; 502 } 503 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 504 505 if (in_pcbinsporthash(inp) != 0) { 506 inp->inp_laddr.s_addr = INADDR_ANY; 507 inp->inp_lport = 0; 508 error = EAGAIN; 509 goto done; 510 } 511 error = 0; 512 done: 513 if (pcbinfo->porttoken) 514 lwkt_reltoken(pcbinfo->porttoken); 515 return error; 516 } 517 518 static struct inpcb * 519 in_pcblookup_addrport(struct inpcbinfo *pcbinfo, struct in_addr laddr, 520 u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred) 521 { 522 struct inpcb *inp; 523 struct inpcbporthead *porthash; 524 struct inpcbport *phd; 525 struct inpcb *match = NULL; 526 527 /* 528 * If the porthashbase is shared across several cpus we need 529 * to lock. 530 */ 531 if (pcbinfo->porttoken) 532 lwkt_gettoken(pcbinfo->porttoken); 533 534 /* 535 * Best fit PCB lookup. 536 * 537 * First see if this local port is in use by looking on the 538 * port hash list. 539 */ 540 porthash = &pcbinfo->porthashbase[ 541 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 542 LIST_FOREACH(phd, porthash, phd_hash) { 543 if (phd->phd_port == lport) 544 break; 545 } 546 if (phd != NULL) { 547 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 548 #ifdef INET6 549 if ((inp->inp_vflag & INP_IPV4) == 0) 550 continue; 551 #endif 552 if (inp->inp_laddr.s_addr != INADDR_ANY && 553 inp->inp_laddr.s_addr != laddr.s_addr) 554 continue; 555 556 if (inp->inp_faddr.s_addr != INADDR_ANY && 557 inp->inp_faddr.s_addr != faddr.s_addr) 558 continue; 559 560 if (inp->inp_fport != 0 && inp->inp_fport != fport) 561 continue; 562 563 if (cred == NULL || 564 cred->cr_prison == 565 inp->inp_socket->so_cred->cr_prison) { 566 match = inp; 567 break; 568 } 569 } 570 } 571 if (pcbinfo->porttoken) 572 lwkt_reltoken(pcbinfo->porttoken); 573 return (match); 574 } 575 576 int 577 in_pcbconn_bind(struct inpcb *inp, const struct sockaddr *nam, 578 struct thread *td) 579 { 580 struct proc *p = td->td_proc; 581 unsigned short *lastport; 582 const struct sockaddr_in *sin = (const struct sockaddr_in *)nam; 583 struct sockaddr_in jsin; 584 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 585 struct ucred *cred = NULL; 586 u_short lport = 0; 587 ushort first, last; 588 int count, error, dup = 0; 589 590 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 591 return (EADDRNOTAVAIL); 592 593 KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY); 594 if (inp->inp_lport != 0) 595 return (EINVAL); /* already bound */ 596 597 KKASSERT(p); 598 cred = p->p_ucred; 599 600 /* 601 * This has to be atomic. If the porthash is shared across multiple 602 * protocol threads (aka tcp) then the token will be non-NULL. 603 */ 604 if (pcbinfo->porttoken) 605 lwkt_gettoken(pcbinfo->porttoken); 606 607 jsin.sin_family = AF_INET; 608 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 609 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 610 inp->inp_laddr.s_addr = INADDR_ANY; 611 error = EINVAL; 612 goto done; 613 } 614 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 615 616 inp->inp_flags |= INP_ANONPORT; 617 618 if (inp->inp_flags & INP_HIGHPORT) { 619 first = ipport_hifirstauto; /* sysctl */ 620 last = ipport_hilastauto; 621 lastport = &pcbinfo->lasthi; 622 } else if (inp->inp_flags & INP_LOWPORT) { 623 if (cred && 624 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 625 inp->inp_laddr.s_addr = INADDR_ANY; 626 goto done; 627 } 628 first = ipport_lowfirstauto; /* 1023 */ 629 last = ipport_lowlastauto; /* 600 */ 630 lastport = &pcbinfo->lastlow; 631 } else { 632 first = ipport_firstauto; /* sysctl */ 633 last = ipport_lastauto; 634 lastport = &pcbinfo->lastport; 635 } 636 637 again: 638 /* 639 * Simple check to ensure all ports are not used up causing 640 * a deadlock here. 641 * 642 * We split the two cases (up and down) so that the direction 643 * is not being tested on each round of the loop. 644 */ 645 if (first > last) { 646 /* 647 * counting down 648 */ 649 count = first - last; 650 651 do { 652 if (count-- < 0) { /* completely used? */ 653 inp->inp_laddr.s_addr = INADDR_ANY; 654 error = EADDRNOTAVAIL; 655 goto done; 656 } 657 --*lastport; 658 if (*lastport > first || *lastport < last) 659 *lastport = first; 660 lport = htons(*lastport); 661 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 662 sin->sin_addr, sin->sin_port, cred)); 663 } else { 664 /* 665 * counting up 666 */ 667 count = last - first; 668 669 do { 670 if (count-- < 0) { /* completely used? */ 671 inp->inp_laddr.s_addr = INADDR_ANY; 672 error = EADDRNOTAVAIL; 673 goto done; 674 } 675 ++*lastport; 676 if (*lastport < first || *lastport > last) 677 *lastport = first; 678 lport = htons(*lastport); 679 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 680 sin->sin_addr, sin->sin_port, cred)); 681 } 682 683 /* This could happen on loopback interface */ 684 if (sin->sin_port == lport && 685 sin->sin_addr.s_addr == inp->inp_laddr.s_addr) { 686 if (dup) { 687 /* 688 * Duplicate again; give up 689 */ 690 inp->inp_laddr.s_addr = INADDR_ANY; 691 error = EADDRNOTAVAIL; 692 goto done; 693 } 694 dup = 1; 695 goto again; 696 } 697 inp->inp_lport = lport; 698 699 jsin.sin_family = AF_INET; 700 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 701 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 702 inp->inp_laddr.s_addr = INADDR_ANY; 703 inp->inp_lport = 0; 704 error = EINVAL; 705 goto done; 706 } 707 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 708 709 if (in_pcbinsporthash(inp) != 0) { 710 inp->inp_laddr.s_addr = INADDR_ANY; 711 inp->inp_lport = 0; 712 error = EAGAIN; 713 goto done; 714 } 715 error = 0; 716 done: 717 if (pcbinfo->porttoken) 718 lwkt_reltoken(pcbinfo->porttoken); 719 return error; 720 } 721 722 /* 723 * Transform old in_pcbconnect() into an inner subroutine for new 724 * in_pcbconnect(): Do some validity-checking on the remote 725 * address (in mbuf 'nam') and then determine local host address 726 * (i.e., which interface) to use to access that remote host. 727 * 728 * This preserves definition of in_pcbconnect(), while supporting a 729 * slightly different version for T/TCP. (This is more than 730 * a bit of a kludge, but cleaning up the internal interfaces would 731 * have forced minor changes in every protocol). 732 */ 733 int 734 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 735 struct sockaddr_in **plocal_sin, struct thread *td) 736 { 737 struct in_ifaddr *ia; 738 struct ucred *cred = NULL; 739 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 740 struct sockaddr *jsin; 741 int jailed = 0, alloc_route = 0; 742 743 if (nam->sa_len != sizeof *sin) 744 return (EINVAL); 745 if (sin->sin_family != AF_INET) 746 return (EAFNOSUPPORT); 747 if (sin->sin_port == 0) 748 return (EADDRNOTAVAIL); 749 if (td && td->td_proc && td->td_proc->p_ucred) 750 cred = td->td_proc->p_ucred; 751 if (cred && cred->cr_prison) 752 jailed = 1; 753 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 754 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 755 /* 756 * If the destination address is INADDR_ANY, 757 * use the primary local address. 758 * If the supplied address is INADDR_BROADCAST, 759 * and the primary interface supports broadcast, 760 * choose the broadcast address for that interface. 761 */ 762 if (sin->sin_addr.s_addr == INADDR_ANY) 763 sin->sin_addr = IA_SIN(ia)->sin_addr; 764 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 765 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 766 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 767 } 768 if (inp->inp_laddr.s_addr == INADDR_ANY) { 769 struct route *ro; 770 771 ia = NULL; 772 /* 773 * If route is known or can be allocated now, 774 * our src addr is taken from the i/f, else punt. 775 * Note that we should check the address family of the cached 776 * destination, in case of sharing the cache with IPv6. 777 */ 778 ro = &inp->inp_route; 779 if (ro->ro_rt && 780 (!(ro->ro_rt->rt_flags & RTF_UP) || 781 ro->ro_dst.sa_family != AF_INET || 782 satosin(&ro->ro_dst)->sin_addr.s_addr != 783 sin->sin_addr.s_addr || 784 inp->inp_socket->so_options & SO_DONTROUTE)) { 785 RTFREE(ro->ro_rt); 786 ro->ro_rt = NULL; 787 } 788 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 789 (ro->ro_rt == NULL || 790 ro->ro_rt->rt_ifp == NULL)) { 791 /* No route yet, so try to acquire one */ 792 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 793 ro->ro_dst.sa_family = AF_INET; 794 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 795 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 796 sin->sin_addr; 797 rtalloc(ro); 798 alloc_route = 1; 799 } 800 /* 801 * If we found a route, use the address 802 * corresponding to the outgoing interface 803 * unless it is the loopback (in case a route 804 * to our address on another net goes to loopback). 805 */ 806 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 807 if (jailed) { 808 if (jailed_ip(cred->cr_prison, 809 ro->ro_rt->rt_ifa->ifa_addr)) { 810 ia = ifatoia(ro->ro_rt->rt_ifa); 811 } 812 } else { 813 ia = ifatoia(ro->ro_rt->rt_ifa); 814 } 815 } 816 if (ia == NULL) { 817 u_short fport = sin->sin_port; 818 819 sin->sin_port = 0; 820 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 821 if (ia && jailed && !jailed_ip(cred->cr_prison, 822 sintosa(&ia->ia_addr))) 823 ia = NULL; 824 if (ia == NULL) 825 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 826 if (ia && jailed && !jailed_ip(cred->cr_prison, 827 sintosa(&ia->ia_addr))) 828 ia = NULL; 829 sin->sin_port = fport; 830 if (ia == NULL && 831 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 832 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 833 if (ia && jailed && !jailed_ip(cred->cr_prison, 834 sintosa(&ia->ia_addr))) 835 ia = NULL; 836 837 if (!jailed && ia == NULL) 838 goto fail; 839 } 840 /* 841 * If the destination address is multicast and an outgoing 842 * interface has been set as a multicast option, use the 843 * address of that interface as our source address. 844 */ 845 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 846 inp->inp_moptions != NULL) { 847 struct ip_moptions *imo; 848 struct ifnet *ifp; 849 850 imo = inp->inp_moptions; 851 if (imo->imo_multicast_ifp != NULL) { 852 struct in_ifaddr_container *iac; 853 854 ifp = imo->imo_multicast_ifp; 855 ia = NULL; 856 TAILQ_FOREACH(iac, 857 &in_ifaddrheads[mycpuid], ia_link) { 858 if (iac->ia->ia_ifp == ifp) { 859 ia = iac->ia; 860 break; 861 } 862 } 863 if (ia == NULL) 864 goto fail; 865 } 866 } 867 /* 868 * Don't do pcblookup call here; return interface in plocal_sin 869 * and exit to caller, that will do the lookup. 870 */ 871 if (ia == NULL && jailed) { 872 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 873 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 874 *plocal_sin = satosin(jsin); 875 } else { 876 /* IPv6 only Jail */ 877 goto fail; 878 } 879 } else { 880 *plocal_sin = &ia->ia_addr; 881 } 882 } 883 return (0); 884 fail: 885 if (alloc_route) { 886 struct route *ro = &inp->inp_route; 887 888 if (ro->ro_rt != NULL) 889 RTFREE(ro->ro_rt); 890 bzero(ro, sizeof(*ro)); 891 } 892 return (EADDRNOTAVAIL); 893 } 894 895 /* 896 * Outer subroutine: 897 * Connect from a socket to a specified address. 898 * Both address and port must be specified in argument sin. 899 * If don't have a local address for this socket yet, 900 * then pick one. 901 */ 902 int 903 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 904 { 905 struct sockaddr_in *if_sin; 906 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 907 int error; 908 909 /* Call inner routine to assign local interface address. */ 910 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 911 return (error); 912 913 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 914 inp->inp_laddr.s_addr ? 915 inp->inp_laddr : if_sin->sin_addr, 916 inp->inp_lport, FALSE, NULL) != NULL) { 917 return (EADDRINUSE); 918 } 919 if (inp->inp_laddr.s_addr == INADDR_ANY) { 920 if (inp->inp_lport == 0) { 921 error = in_pcbbind(inp, NULL, td); 922 if (error) 923 return (error); 924 } 925 inp->inp_laddr = if_sin->sin_addr; 926 } 927 inp->inp_faddr = sin->sin_addr; 928 inp->inp_fport = sin->sin_port; 929 in_pcbinsconnhash(inp); 930 return (0); 931 } 932 933 void 934 in_pcbdisconnect(struct inpcb *inp) 935 { 936 937 inp->inp_faddr.s_addr = INADDR_ANY; 938 inp->inp_fport = 0; 939 in_pcbremconnhash(inp); 940 if (inp->inp_socket->so_state & SS_NOFDREF) 941 in_pcbdetach(inp); 942 } 943 944 void 945 in_pcbdetach(struct inpcb *inp) 946 { 947 struct socket *so = inp->inp_socket; 948 struct inpcbinfo *ipi = inp->inp_pcbinfo; 949 950 #ifdef IPSEC 951 ipsec4_delete_pcbpolicy(inp); 952 #endif /*IPSEC*/ 953 inp->inp_gencnt = ++ipi->ipi_gencnt; 954 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 955 in_pcbremlists(inp); 956 so->so_pcb = NULL; 957 sofree(so); /* remove pcb ref */ 958 if (inp->inp_options) 959 m_free(inp->inp_options); 960 if (inp->inp_route.ro_rt) 961 rtfree(inp->inp_route.ro_rt); 962 ip_freemoptions(inp->inp_moptions); 963 inp->inp_vflag = 0; 964 kfree(inp, M_PCB); 965 } 966 967 /* 968 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 969 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 970 * in struct pr_usrreqs, so that protocols can just reference then directly 971 * without the need for a wrapper function. The socket must have a valid 972 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 973 * except through a kernel programming error, so it is acceptable to panic 974 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 975 * because there actually /is/ a programming error somewhere... XXX) 976 */ 977 int 978 in_setsockaddr(struct socket *so, struct sockaddr **nam) 979 { 980 struct inpcb *inp; 981 struct sockaddr_in *sin; 982 983 /* 984 * Do the malloc first in case it blocks. 985 */ 986 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 987 M_WAITOK | M_ZERO); 988 sin->sin_family = AF_INET; 989 sin->sin_len = sizeof *sin; 990 991 crit_enter(); 992 inp = so->so_pcb; 993 if (!inp) { 994 crit_exit(); 995 kfree(sin, M_SONAME); 996 return (ECONNRESET); 997 } 998 sin->sin_port = inp->inp_lport; 999 sin->sin_addr = inp->inp_laddr; 1000 crit_exit(); 1001 1002 *nam = (struct sockaddr *)sin; 1003 return (0); 1004 } 1005 1006 void 1007 in_setsockaddr_dispatch(netmsg_t msg) 1008 { 1009 int error; 1010 1011 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1012 lwkt_replymsg(&msg->lmsg, error); 1013 } 1014 1015 int 1016 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1017 { 1018 struct inpcb *inp; 1019 struct sockaddr_in *sin; 1020 1021 /* 1022 * Do the malloc first in case it blocks. 1023 */ 1024 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 1025 M_WAITOK | M_ZERO); 1026 sin->sin_family = AF_INET; 1027 sin->sin_len = sizeof *sin; 1028 1029 crit_enter(); 1030 inp = so->so_pcb; 1031 if (!inp) { 1032 crit_exit(); 1033 kfree(sin, M_SONAME); 1034 return (ECONNRESET); 1035 } 1036 sin->sin_port = inp->inp_fport; 1037 sin->sin_addr = inp->inp_faddr; 1038 crit_exit(); 1039 1040 *nam = (struct sockaddr *)sin; 1041 return (0); 1042 } 1043 1044 void 1045 in_setpeeraddr_dispatch(netmsg_t msg) 1046 { 1047 int error; 1048 1049 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1050 lwkt_replymsg(&msg->lmsg, error); 1051 } 1052 1053 void 1054 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 1055 void (*notify)(struct inpcb *, int)) 1056 { 1057 struct inpcb *inp, *ninp; 1058 1059 /* 1060 * note: if INP_PLACEMARKER is set we must ignore the rest of 1061 * the structure and skip it. 1062 */ 1063 crit_enter(); 1064 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 1065 if (inp->inp_flags & INP_PLACEMARKER) 1066 continue; 1067 #ifdef INET6 1068 if (!(inp->inp_vflag & INP_IPV4)) 1069 continue; 1070 #endif 1071 if (inp->inp_faddr.s_addr != faddr.s_addr || 1072 inp->inp_socket == NULL) 1073 continue; 1074 (*notify)(inp, err); /* can remove inp from list! */ 1075 } 1076 crit_exit(); 1077 } 1078 1079 void 1080 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 1081 { 1082 struct inpcb *inp; 1083 struct ip_moptions *imo; 1084 int i, gap; 1085 1086 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 1087 if (inp->inp_flags & INP_PLACEMARKER) 1088 continue; 1089 imo = inp->inp_moptions; 1090 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 1091 /* 1092 * Unselect the outgoing interface if it is being 1093 * detached. 1094 */ 1095 if (imo->imo_multicast_ifp == ifp) 1096 imo->imo_multicast_ifp = NULL; 1097 1098 /* 1099 * Drop multicast group membership if we joined 1100 * through the interface being detached. 1101 */ 1102 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1103 i++) { 1104 if (imo->imo_membership[i]->inm_ifp == ifp) { 1105 in_delmulti(imo->imo_membership[i]); 1106 gap++; 1107 } else if (gap != 0) 1108 imo->imo_membership[i - gap] = 1109 imo->imo_membership[i]; 1110 } 1111 imo->imo_num_memberships -= gap; 1112 } 1113 } 1114 } 1115 1116 /* 1117 * Check for alternatives when higher level complains 1118 * about service problems. For now, invalidate cached 1119 * routing information. If the route was created dynamically 1120 * (by a redirect), time to try a default gateway again. 1121 */ 1122 void 1123 in_losing(struct inpcb *inp) 1124 { 1125 struct rtentry *rt; 1126 struct rt_addrinfo rtinfo; 1127 1128 if ((rt = inp->inp_route.ro_rt)) { 1129 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1130 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 1131 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1132 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 1133 rtinfo.rti_flags = rt->rt_flags; 1134 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 1135 if (rt->rt_flags & RTF_DYNAMIC) 1136 rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL); 1137 inp->inp_route.ro_rt = NULL; 1138 rtfree(rt); 1139 /* 1140 * A new route can be allocated 1141 * the next time output is attempted. 1142 */ 1143 } 1144 } 1145 1146 /* 1147 * After a routing change, flush old routing 1148 * and allocate a (hopefully) better one. 1149 */ 1150 void 1151 in_rtchange(struct inpcb *inp, int err) 1152 { 1153 if (inp->inp_route.ro_rt) { 1154 rtfree(inp->inp_route.ro_rt); 1155 inp->inp_route.ro_rt = NULL; 1156 /* 1157 * A new route can be allocated the next time 1158 * output is attempted. 1159 */ 1160 } 1161 } 1162 1163 /* 1164 * Lookup a PCB based on the local address and port. 1165 */ 1166 struct inpcb * 1167 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1168 u_int lport_arg, int wild_okay, struct ucred *cred) 1169 { 1170 struct inpcb *inp; 1171 int matchwild = 3, wildcard; 1172 u_short lport = lport_arg; 1173 struct inpcbporthead *porthash; 1174 struct inpcbport *phd; 1175 struct inpcb *match = NULL; 1176 1177 /* 1178 * If the porthashbase is shared across several cpus we need 1179 * to lock. 1180 */ 1181 if (pcbinfo->porttoken) 1182 lwkt_gettoken(pcbinfo->porttoken); 1183 1184 /* 1185 * Best fit PCB lookup. 1186 * 1187 * First see if this local port is in use by looking on the 1188 * port hash list. 1189 */ 1190 porthash = &pcbinfo->porthashbase[ 1191 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 1192 LIST_FOREACH(phd, porthash, phd_hash) { 1193 if (phd->phd_port == lport) 1194 break; 1195 } 1196 if (phd != NULL) { 1197 /* 1198 * Port is in use by one or more PCBs. Look for best 1199 * fit. 1200 */ 1201 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1202 wildcard = 0; 1203 #ifdef INET6 1204 if ((inp->inp_vflag & INP_IPV4) == 0) 1205 continue; 1206 #endif 1207 if (inp->inp_faddr.s_addr != INADDR_ANY) 1208 wildcard++; 1209 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1210 if (laddr.s_addr == INADDR_ANY) 1211 wildcard++; 1212 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1213 continue; 1214 } else { 1215 if (laddr.s_addr != INADDR_ANY) 1216 wildcard++; 1217 } 1218 if (wildcard && !wild_okay) 1219 continue; 1220 if (wildcard < matchwild && 1221 (cred == NULL || 1222 cred->cr_prison == 1223 inp->inp_socket->so_cred->cr_prison)) { 1224 match = inp; 1225 matchwild = wildcard; 1226 if (matchwild == 0) { 1227 break; 1228 } 1229 } 1230 } 1231 } 1232 if (pcbinfo->porttoken) 1233 lwkt_reltoken(pcbinfo->porttoken); 1234 return (match); 1235 } 1236 1237 /* 1238 * Lookup PCB in hash list. 1239 */ 1240 struct inpcb * 1241 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1242 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1243 boolean_t wildcard, struct ifnet *ifp) 1244 { 1245 struct inpcbhead *head; 1246 struct inpcb *inp, *jinp=NULL; 1247 u_short fport = fport_arg, lport = lport_arg; 1248 1249 /* 1250 * First look for an exact match. 1251 */ 1252 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1253 laddr.s_addr, lport, pcbinfo->hashmask)]; 1254 LIST_FOREACH(inp, head, inp_hash) { 1255 #ifdef INET6 1256 if (!(inp->inp_vflag & INP_IPV4)) 1257 continue; 1258 #endif 1259 if (in_hosteq(inp->inp_faddr, faddr) && 1260 in_hosteq(inp->inp_laddr, laddr) && 1261 inp->inp_fport == fport && inp->inp_lport == lport) { 1262 /* found */ 1263 if (inp->inp_socket == NULL || 1264 inp->inp_socket->so_cred->cr_prison == NULL) { 1265 return (inp); 1266 } else { 1267 if (jinp == NULL) 1268 jinp = inp; 1269 } 1270 } 1271 } 1272 if (jinp != NULL) 1273 return (jinp); 1274 if (wildcard) { 1275 struct inpcb *local_wild = NULL; 1276 struct inpcb *jinp_wild = NULL; 1277 #ifdef INET6 1278 struct inpcb *local_wild_mapped = NULL; 1279 #endif 1280 struct inpcontainer *ic; 1281 struct inpcontainerhead *chead; 1282 struct sockaddr_in jsin; 1283 struct ucred *cred; 1284 1285 /* 1286 * Order of socket selection: 1287 * 1. non-jailed, non-wild. 1288 * 2. non-jailed, wild. 1289 * 3. jailed, non-wild. 1290 * 4. jailed, wild. 1291 */ 1292 jsin.sin_family = AF_INET; 1293 chead = &pcbinfo->wildcardhashbase[ 1294 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1295 LIST_FOREACH(ic, chead, ic_list) { 1296 inp = ic->ic_inp; 1297 jsin.sin_addr.s_addr = laddr.s_addr; 1298 #ifdef INET6 1299 if (!(inp->inp_vflag & INP_IPV4)) 1300 continue; 1301 #endif 1302 if (inp->inp_socket != NULL) 1303 cred = inp->inp_socket->so_cred; 1304 else 1305 cred = NULL; 1306 if (cred != NULL && jailed(cred)) { 1307 if (jinp != NULL) 1308 continue; 1309 else 1310 if (!jailed_ip(cred->cr_prison, 1311 (struct sockaddr *)&jsin)) 1312 continue; 1313 } 1314 if (inp->inp_lport == lport) { 1315 if (ifp && ifp->if_type == IFT_FAITH && 1316 !(inp->inp_flags & INP_FAITH)) 1317 continue; 1318 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1319 if (cred != NULL && jailed(cred)) 1320 jinp = inp; 1321 else 1322 return (inp); 1323 } 1324 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1325 #ifdef INET6 1326 if (INP_CHECK_SOCKAF(inp->inp_socket, 1327 AF_INET6)) 1328 local_wild_mapped = inp; 1329 else 1330 #endif 1331 if (cred != NULL && 1332 jailed(cred)) 1333 jinp_wild = inp; 1334 else 1335 local_wild = inp; 1336 } 1337 } 1338 } 1339 if (local_wild != NULL) 1340 return (local_wild); 1341 #ifdef INET6 1342 if (local_wild_mapped != NULL) 1343 return (local_wild_mapped); 1344 #endif 1345 if (jinp != NULL) 1346 return (jinp); 1347 return (jinp_wild); 1348 } 1349 1350 /* 1351 * Not found. 1352 */ 1353 return (NULL); 1354 } 1355 1356 /* 1357 * Insert PCB into connection hash table. 1358 */ 1359 void 1360 in_pcbinsconnhash(struct inpcb *inp) 1361 { 1362 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1363 struct inpcbhead *bucket; 1364 u_int32_t hashkey_faddr, hashkey_laddr; 1365 1366 #ifdef INET6 1367 if (inp->inp_vflag & INP_IPV6) { 1368 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1369 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1370 } else { 1371 #endif 1372 hashkey_faddr = inp->inp_faddr.s_addr; 1373 hashkey_laddr = inp->inp_laddr.s_addr; 1374 #ifdef INET6 1375 } 1376 #endif 1377 1378 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1379 ("already on wildcardhash\n")); 1380 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1381 ("already on connhash\n")); 1382 inp->inp_flags |= INP_CONNECTED; 1383 1384 /* 1385 * Insert into the connection hash table. 1386 */ 1387 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1388 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1389 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1390 } 1391 1392 /* 1393 * Remove PCB from connection hash table. 1394 */ 1395 void 1396 in_pcbremconnhash(struct inpcb *inp) 1397 { 1398 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1399 LIST_REMOVE(inp, inp_hash); 1400 inp->inp_flags &= ~INP_CONNECTED; 1401 } 1402 1403 /* 1404 * Insert PCB into port hash table. 1405 */ 1406 int 1407 in_pcbinsporthash(struct inpcb *inp) 1408 { 1409 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1410 struct inpcbporthead *pcbporthash; 1411 struct inpcbport *phd; 1412 1413 /* 1414 * If the porthashbase is shared across several cpus we need 1415 * to lock. 1416 */ 1417 if (pcbinfo->porttoken) 1418 lwkt_gettoken(pcbinfo->porttoken); 1419 1420 /* 1421 * Insert into the port hash table. 1422 */ 1423 pcbporthash = &pcbinfo->porthashbase[ 1424 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1425 1426 /* Go through port list and look for a head for this lport. */ 1427 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1428 if (phd->phd_port == inp->inp_lport) 1429 break; 1430 } 1431 1432 /* If none exists, malloc one and tack it on. */ 1433 if (phd == NULL) { 1434 KKASSERT(pcbinfo->portsave != NULL); 1435 phd = pcbinfo->portsave; 1436 pcbinfo->portsave = NULL; 1437 phd->phd_port = inp->inp_lport; 1438 LIST_INIT(&phd->phd_pcblist); 1439 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1440 } 1441 1442 inp->inp_phd = phd; 1443 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1444 1445 if (pcbinfo->porttoken) 1446 lwkt_reltoken(pcbinfo->porttoken); 1447 if (pcbinfo->portsave == NULL) { 1448 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1449 M_PCB, M_INTWAIT | M_ZERO); 1450 } 1451 return (0); 1452 } 1453 1454 void 1455 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1456 { 1457 struct inpcontainer *ic; 1458 struct inpcontainerhead *bucket; 1459 1460 bucket = &pcbinfo->wildcardhashbase[ 1461 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1462 1463 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1464 ic->ic_inp = inp; 1465 LIST_INSERT_HEAD(bucket, ic, ic_list); 1466 } 1467 1468 /* 1469 * Insert PCB into wildcard hash table. 1470 */ 1471 void 1472 in_pcbinswildcardhash(struct inpcb *inp) 1473 { 1474 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1475 1476 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1477 ("already on connhash\n")); 1478 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1479 ("already on wildcardhash\n")); 1480 inp->inp_flags |= INP_WILDCARD; 1481 1482 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1483 } 1484 1485 void 1486 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1487 { 1488 struct inpcontainer *ic; 1489 struct inpcontainerhead *head; 1490 1491 /* find bucket */ 1492 head = &pcbinfo->wildcardhashbase[ 1493 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1494 1495 LIST_FOREACH(ic, head, ic_list) { 1496 if (ic->ic_inp == inp) 1497 goto found; 1498 } 1499 return; /* not found! */ 1500 1501 found: 1502 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1503 kfree(ic, M_TEMP); /* deallocate container */ 1504 } 1505 1506 /* 1507 * Remove PCB from wildcard hash table. 1508 */ 1509 void 1510 in_pcbremwildcardhash(struct inpcb *inp) 1511 { 1512 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1513 1514 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1515 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1516 inp->inp_flags &= ~INP_WILDCARD; 1517 } 1518 1519 /* 1520 * Remove PCB from various lists. 1521 */ 1522 void 1523 in_pcbremlists(struct inpcb *inp) 1524 { 1525 struct inpcbinfo *pcbinfo; 1526 1527 if (inp->inp_lport) { 1528 struct inpcbport *phd; 1529 1530 pcbinfo = inp->inp_pcbinfo; 1531 if (pcbinfo->porttoken) 1532 lwkt_gettoken(pcbinfo->porttoken); 1533 1534 phd = inp->inp_phd; 1535 LIST_REMOVE(inp, inp_portlist); 1536 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1537 LIST_REMOVE(phd, phd_hash); 1538 kfree(phd, M_PCB); 1539 } 1540 if (pcbinfo->porttoken) 1541 lwkt_reltoken(pcbinfo->porttoken); 1542 } 1543 if (inp->inp_flags & INP_WILDCARD) { 1544 in_pcbremwildcardhash(inp); 1545 } else if (inp->inp_flags & INP_CONNECTED) { 1546 in_pcbremconnhash(inp); 1547 } 1548 LIST_REMOVE(inp, inp_list); 1549 inp->inp_pcbinfo->ipi_count--; 1550 } 1551 1552 int 1553 prison_xinpcb(struct thread *td, struct inpcb *inp) 1554 { 1555 struct ucred *cr; 1556 1557 if (td->td_proc == NULL) 1558 return (0); 1559 cr = td->td_proc->p_ucred; 1560 if (cr->cr_prison == NULL) 1561 return (0); 1562 if (inp->inp_socket && inp->inp_socket->so_cred && 1563 inp->inp_socket->so_cred->cr_prison && 1564 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1565 return (0); 1566 return (1); 1567 } 1568 1569 int 1570 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1571 { 1572 struct inpcbinfo *pcbinfo = arg1; 1573 struct inpcb *inp, *marker; 1574 struct xinpcb xi; 1575 int error, i, n; 1576 1577 /* 1578 * The process of preparing the TCB list is too time-consuming and 1579 * resource-intensive to repeat twice on every request. 1580 */ 1581 if (req->oldptr == NULL) { 1582 n = pcbinfo->ipi_count; 1583 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1584 return 0; 1585 } 1586 1587 if (req->newptr != NULL) 1588 return EPERM; 1589 1590 /* 1591 * OK, now we're committed to doing something. Re-fetch ipi_count 1592 * after obtaining the generation count. 1593 */ 1594 n = pcbinfo->ipi_count; 1595 1596 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1597 marker->inp_flags |= INP_PLACEMARKER; 1598 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1599 1600 i = 0; 1601 error = 0; 1602 1603 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1604 LIST_REMOVE(marker, inp_list); 1605 LIST_INSERT_AFTER(inp, marker, inp_list); 1606 1607 if (inp->inp_flags & INP_PLACEMARKER) 1608 continue; 1609 if (prison_xinpcb(req->td, inp)) 1610 continue; 1611 bzero(&xi, sizeof xi); 1612 xi.xi_len = sizeof xi; 1613 bcopy(inp, &xi.xi_inp, sizeof *inp); 1614 if (inp->inp_socket) 1615 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1616 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1617 break; 1618 ++i; 1619 } 1620 LIST_REMOVE(marker, inp_list); 1621 if (error == 0 && i < n) { 1622 bzero(&xi, sizeof xi); 1623 xi.xi_len = sizeof xi; 1624 while (i < n) { 1625 error = SYSCTL_OUT(req, &xi, sizeof xi); 1626 ++i; 1627 } 1628 } 1629 kfree(marker, M_TEMP); 1630 return(error); 1631 } 1632 1633 int 1634 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0) 1635 { 1636 struct inpcbinfo *pcbinfo = arg1; 1637 struct inpcb *inp; 1638 struct xinpcb *xi; 1639 int nxi; 1640 1641 *nxi0 = 0; 1642 *xi0 = NULL; 1643 1644 /* 1645 * The process of preparing the PCB list is too time-consuming and 1646 * resource-intensive to repeat twice on every request. 1647 */ 1648 if (req->oldptr == NULL) { 1649 int n = pcbinfo->ipi_count; 1650 1651 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1652 return 0; 1653 } 1654 1655 if (req->newptr != NULL) 1656 return EPERM; 1657 1658 if (pcbinfo->ipi_count == 0) 1659 return 0; 1660 1661 nxi = 0; 1662 xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP, 1663 M_WAITOK | M_ZERO | M_NULLOK); 1664 if (xi == NULL) 1665 return ENOMEM; 1666 1667 LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) { 1668 struct xinpcb *xi_ptr = &xi[nxi]; 1669 1670 if (prison_xinpcb(req->td, inp)) 1671 continue; 1672 1673 xi_ptr->xi_len = sizeof(*xi_ptr); 1674 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp)); 1675 if (inp->inp_socket) 1676 sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket); 1677 ++nxi; 1678 } 1679 1680 if (nxi == 0) { 1681 kfree(xi, M_TEMP); 1682 return 0; 1683 } 1684 1685 *nxi0 = nxi; 1686 *xi0 = xi; 1687 1688 return 0; 1689 } 1690