1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 68 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.48 2008/11/08 03:38:23 sephe Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/proc.h> 83 #include <sys/priv.h> 84 #include <sys/jail.h> 85 #include <sys/kernel.h> 86 #include <sys/sysctl.h> 87 88 #include <sys/thread2.h> 89 #include <sys/socketvar2.h> 90 91 #include <machine/limits.h> 92 93 #include <net/if.h> 94 #include <net/if_types.h> 95 #include <net/route.h> 96 97 #include <netinet/in.h> 98 #include <netinet/in_pcb.h> 99 #include <netinet/in_var.h> 100 #include <netinet/ip_var.h> 101 #ifdef INET6 102 #include <netinet/ip6.h> 103 #include <netinet6/ip6_var.h> 104 #endif /* INET6 */ 105 106 #ifdef IPSEC 107 #include <netinet6/ipsec.h> 108 #include <netproto/key/key.h> 109 #endif 110 111 #ifdef FAST_IPSEC 112 #if defined(IPSEC) || defined(IPSEC_ESP) 113 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 114 #endif 115 116 #include <netproto/ipsec/ipsec.h> 117 #include <netproto/ipsec/key.h> 118 #define IPSEC 119 #endif /* FAST_IPSEC */ 120 121 struct in_addr zeroin_addr; 122 123 /* 124 * These configure the range of local port addresses assigned to 125 * "unspecified" outgoing connections/packets/whatever. 126 */ 127 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 128 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 129 130 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 131 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 132 133 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 134 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 135 136 #define RANGECHK(var, min, max) \ 137 if ((var) < (min)) { (var) = (min); } \ 138 else if ((var) > (max)) { (var) = (max); } 139 140 static int 141 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 142 { 143 int error; 144 145 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 146 if (!error) { 147 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 148 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 149 150 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 151 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 152 153 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 154 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 155 } 156 return (error); 157 } 158 159 #undef RANGECHK 160 161 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 162 163 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 164 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 165 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 166 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 167 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 168 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 169 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 170 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 171 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 172 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 173 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 174 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 175 176 /* 177 * in_pcb.c: manage the Protocol Control Blocks. 178 * 179 * NOTE: It is assumed that most of these functions will be called from 180 * a critical section. XXX - There are, unfortunately, a few exceptions 181 * to this rule that should be fixed. 182 * 183 * NOTE: The caller should initialize the cpu field to the cpu running the 184 * protocol stack associated with this inpcbinfo. 185 */ 186 187 void 188 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 189 { 190 LIST_INIT(&pcbinfo->pcblisthead); 191 pcbinfo->cpu = -1; 192 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 193 M_WAITOK | M_ZERO); 194 } 195 196 /* 197 * Allocate a PCB and associate it with the socket. 198 */ 199 int 200 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 201 { 202 struct inpcb *inp; 203 #ifdef IPSEC 204 int error; 205 #endif 206 207 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO); 208 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 209 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 210 inp->inp_socket = so; 211 #ifdef IPSEC 212 error = ipsec_init_policy(so, &inp->inp_sp); 213 if (error != 0) { 214 kfree(inp, M_PCB); 215 return (error); 216 } 217 #endif 218 #ifdef INET6 219 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 220 inp->inp_flags |= IN6P_IPV6_V6ONLY; 221 if (ip6_auto_flowlabel) 222 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 223 #endif 224 soreference(so); 225 so->so_pcb = inp; 226 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 227 pcbinfo->ipi_count++; 228 return (0); 229 } 230 231 /* 232 * Unlink a pcb with the intention of moving it to another cpu with a 233 * different pcbinfo. While unlinked nothing should attempt to dereference 234 * inp_pcbinfo, NULL it out so we assert if it does. 235 */ 236 void 237 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 238 { 239 KKASSERT(inp->inp_pcbinfo == pcbinfo); 240 241 LIST_REMOVE(inp, inp_list); 242 pcbinfo->ipi_count--; 243 inp->inp_pcbinfo = NULL; 244 } 245 246 /* 247 * Relink a pcb into a new pcbinfo. 248 */ 249 void 250 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 251 { 252 KKASSERT(inp->inp_pcbinfo == NULL); 253 inp->inp_pcbinfo = pcbinfo; 254 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 255 pcbinfo->ipi_count++; 256 } 257 258 int 259 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 260 { 261 struct socket *so = inp->inp_socket; 262 struct proc *p = td->td_proc; 263 unsigned short *lastport; 264 struct sockaddr_in *sin; 265 struct sockaddr_in jsin; 266 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 267 struct ucred *cred = NULL; 268 u_short lport = 0; 269 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 270 int error; 271 272 KKASSERT(p); 273 274 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 275 return (EADDRNOTAVAIL); 276 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 277 return (EINVAL); /* already bound */ 278 279 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 280 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 281 if (p) 282 cred = p->p_ucred; 283 284 /* 285 * This has to be atomic. If the porthash is shared across multiple 286 * protocol threads (aka tcp) then the token will be non-NULL. 287 */ 288 if (pcbinfo->porttoken) 289 lwkt_gettoken(pcbinfo->porttoken); 290 291 if (nam != NULL) { 292 sin = (struct sockaddr_in *)nam; 293 if (nam->sa_len != sizeof *sin) { 294 error = EINVAL; 295 goto done; 296 } 297 #ifdef notdef 298 /* 299 * We should check the family, but old programs 300 * incorrectly fail to initialize it. 301 */ 302 if (sin->sin_family != AF_INET) { 303 error = EAFNOSUPPORT; 304 goto done; 305 } 306 #endif 307 if (!prison_replace_wildcards(td, nam)) { 308 error = EINVAL; 309 goto done; 310 } 311 lport = sin->sin_port; 312 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 313 /* 314 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 315 * allow complete duplication of binding if 316 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 317 * and a multicast address is bound on both 318 * new and duplicated sockets. 319 */ 320 if (so->so_options & SO_REUSEADDR) 321 reuseport = SO_REUSEADDR | SO_REUSEPORT; 322 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 323 sin->sin_port = 0; /* yech... */ 324 bzero(&sin->sin_zero, sizeof sin->sin_zero); 325 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 326 error = EADDRNOTAVAIL; 327 goto done; 328 } 329 } 330 if (lport != 0) { 331 struct inpcb *t; 332 333 /* GROSS */ 334 if (ntohs(lport) < IPPORT_RESERVED && 335 cred && 336 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 337 error = EACCES; 338 goto done; 339 } 340 if (so->so_cred->cr_uid != 0 && 341 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 342 t = in_pcblookup_local(pcbinfo, 343 sin->sin_addr, 344 lport, 345 INPLOOKUP_WILDCARD, 346 cred); 347 if (t && 348 (!in_nullhost(sin->sin_addr) || 349 !in_nullhost(t->inp_laddr) || 350 (t->inp_socket->so_options & 351 SO_REUSEPORT) == 0) && 352 (so->so_cred->cr_uid != 353 t->inp_socket->so_cred->cr_uid)) { 354 #ifdef INET6 355 if (!in_nullhost(sin->sin_addr) || 356 !in_nullhost(t->inp_laddr) || 357 INP_SOCKAF(so) == 358 INP_SOCKAF(t->inp_socket)) 359 #endif 360 { 361 error = EADDRINUSE; 362 goto done; 363 } 364 } 365 } 366 if (cred && !prison_replace_wildcards(td, nam)) { 367 error = EADDRNOTAVAIL; 368 goto done; 369 } 370 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 371 wild, cred); 372 if (t && !(reuseport & t->inp_socket->so_options)) { 373 #ifdef INET6 374 if (!in_nullhost(sin->sin_addr) || 375 !in_nullhost(t->inp_laddr) || 376 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 377 #endif 378 { 379 error = EADDRINUSE; 380 goto done; 381 } 382 } 383 } 384 inp->inp_laddr = sin->sin_addr; 385 } 386 if (lport == 0) { 387 ushort first, last; 388 int count; 389 390 jsin.sin_family = AF_INET; 391 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 392 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 393 inp->inp_laddr.s_addr = INADDR_ANY; 394 error = EINVAL; 395 goto done; 396 } 397 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 398 399 inp->inp_flags |= INP_ANONPORT; 400 401 if (inp->inp_flags & INP_HIGHPORT) { 402 first = ipport_hifirstauto; /* sysctl */ 403 last = ipport_hilastauto; 404 lastport = &pcbinfo->lasthi; 405 } else if (inp->inp_flags & INP_LOWPORT) { 406 if (cred && 407 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 408 inp->inp_laddr.s_addr = INADDR_ANY; 409 goto done; 410 } 411 first = ipport_lowfirstauto; /* 1023 */ 412 last = ipport_lowlastauto; /* 600 */ 413 lastport = &pcbinfo->lastlow; 414 } else { 415 first = ipport_firstauto; /* sysctl */ 416 last = ipport_lastauto; 417 lastport = &pcbinfo->lastport; 418 } 419 /* 420 * Simple check to ensure all ports are not used up causing 421 * a deadlock here. 422 * 423 * We split the two cases (up and down) so that the direction 424 * is not being tested on each round of the loop. 425 */ 426 if (first > last) { 427 /* 428 * counting down 429 */ 430 count = first - last; 431 432 do { 433 if (count-- < 0) { /* completely used? */ 434 inp->inp_laddr.s_addr = INADDR_ANY; 435 error = EADDRNOTAVAIL; 436 goto done; 437 } 438 --*lastport; 439 if (*lastport > first || *lastport < last) 440 *lastport = first; 441 lport = htons(*lastport); 442 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 443 lport, wild, cred)); 444 } else { 445 /* 446 * counting up 447 */ 448 count = last - first; 449 450 do { 451 if (count-- < 0) { /* completely used? */ 452 inp->inp_laddr.s_addr = INADDR_ANY; 453 error = EADDRNOTAVAIL; 454 goto done; 455 } 456 ++*lastport; 457 if (*lastport < first || *lastport > last) 458 *lastport = first; 459 lport = htons(*lastport); 460 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 461 lport, wild, cred)); 462 } 463 } 464 inp->inp_lport = lport; 465 466 jsin.sin_family = AF_INET; 467 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 468 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 469 inp->inp_laddr.s_addr = INADDR_ANY; 470 inp->inp_lport = 0; 471 error = EINVAL; 472 goto done; 473 } 474 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 475 476 if (in_pcbinsporthash(inp) != 0) { 477 inp->inp_laddr.s_addr = INADDR_ANY; 478 inp->inp_lport = 0; 479 error = EAGAIN; 480 goto done; 481 } 482 error = 0; 483 done: 484 if (pcbinfo->porttoken) 485 lwkt_reltoken(pcbinfo->porttoken); 486 return error; 487 } 488 489 /* 490 * Transform old in_pcbconnect() into an inner subroutine for new 491 * in_pcbconnect(): Do some validity-checking on the remote 492 * address (in mbuf 'nam') and then determine local host address 493 * (i.e., which interface) to use to access that remote host. 494 * 495 * This preserves definition of in_pcbconnect(), while supporting a 496 * slightly different version for T/TCP. (This is more than 497 * a bit of a kludge, but cleaning up the internal interfaces would 498 * have forced minor changes in every protocol). 499 */ 500 int 501 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 502 struct sockaddr_in **plocal_sin, struct thread *td) 503 { 504 struct in_ifaddr *ia; 505 struct ucred *cred = NULL; 506 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 507 struct sockaddr *jsin; 508 int jailed = 0, alloc_route = 0; 509 510 if (nam->sa_len != sizeof *sin) 511 return (EINVAL); 512 if (sin->sin_family != AF_INET) 513 return (EAFNOSUPPORT); 514 if (sin->sin_port == 0) 515 return (EADDRNOTAVAIL); 516 if (td && td->td_proc && td->td_proc->p_ucred) 517 cred = td->td_proc->p_ucred; 518 if (cred && cred->cr_prison) 519 jailed = 1; 520 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 521 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 522 /* 523 * If the destination address is INADDR_ANY, 524 * use the primary local address. 525 * If the supplied address is INADDR_BROADCAST, 526 * and the primary interface supports broadcast, 527 * choose the broadcast address for that interface. 528 */ 529 if (sin->sin_addr.s_addr == INADDR_ANY) 530 sin->sin_addr = IA_SIN(ia)->sin_addr; 531 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 532 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 533 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 534 } 535 if (inp->inp_laddr.s_addr == INADDR_ANY) { 536 struct route *ro; 537 538 ia = NULL; 539 /* 540 * If route is known or can be allocated now, 541 * our src addr is taken from the i/f, else punt. 542 * Note that we should check the address family of the cached 543 * destination, in case of sharing the cache with IPv6. 544 */ 545 ro = &inp->inp_route; 546 if (ro->ro_rt && 547 (!(ro->ro_rt->rt_flags & RTF_UP) || 548 ro->ro_dst.sa_family != AF_INET || 549 satosin(&ro->ro_dst)->sin_addr.s_addr != 550 sin->sin_addr.s_addr || 551 inp->inp_socket->so_options & SO_DONTROUTE)) { 552 RTFREE(ro->ro_rt); 553 ro->ro_rt = NULL; 554 } 555 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 556 (ro->ro_rt == NULL || 557 ro->ro_rt->rt_ifp == NULL)) { 558 /* No route yet, so try to acquire one */ 559 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 560 ro->ro_dst.sa_family = AF_INET; 561 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 562 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 563 sin->sin_addr; 564 rtalloc(ro); 565 alloc_route = 1; 566 } 567 /* 568 * If we found a route, use the address 569 * corresponding to the outgoing interface 570 * unless it is the loopback (in case a route 571 * to our address on another net goes to loopback). 572 */ 573 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 574 if (jailed) { 575 if (jailed_ip(cred->cr_prison, 576 ro->ro_rt->rt_ifa->ifa_addr)) { 577 ia = ifatoia(ro->ro_rt->rt_ifa); 578 } 579 } else { 580 ia = ifatoia(ro->ro_rt->rt_ifa); 581 } 582 } 583 if (ia == NULL) { 584 u_short fport = sin->sin_port; 585 586 sin->sin_port = 0; 587 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 588 if (ia && jailed && !jailed_ip(cred->cr_prison, 589 sintosa(&ia->ia_addr))) 590 ia = NULL; 591 if (ia == NULL) 592 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 593 if (ia && jailed && !jailed_ip(cred->cr_prison, 594 sintosa(&ia->ia_addr))) 595 ia = NULL; 596 sin->sin_port = fport; 597 if (ia == NULL && 598 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 599 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 600 if (ia && jailed && !jailed_ip(cred->cr_prison, 601 sintosa(&ia->ia_addr))) 602 ia = NULL; 603 604 if (!jailed && ia == NULL) 605 goto fail; 606 } 607 /* 608 * If the destination address is multicast and an outgoing 609 * interface has been set as a multicast option, use the 610 * address of that interface as our source address. 611 */ 612 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 613 inp->inp_moptions != NULL) { 614 struct ip_moptions *imo; 615 struct ifnet *ifp; 616 617 imo = inp->inp_moptions; 618 if (imo->imo_multicast_ifp != NULL) { 619 struct in_ifaddr_container *iac; 620 621 ifp = imo->imo_multicast_ifp; 622 ia = NULL; 623 TAILQ_FOREACH(iac, 624 &in_ifaddrheads[mycpuid], ia_link) { 625 if (iac->ia->ia_ifp == ifp) { 626 ia = iac->ia; 627 break; 628 } 629 } 630 if (ia == NULL) 631 goto fail; 632 } 633 } 634 /* 635 * Don't do pcblookup call here; return interface in plocal_sin 636 * and exit to caller, that will do the lookup. 637 */ 638 if (ia == NULL && jailed) { 639 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 640 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 641 *plocal_sin = satosin(jsin); 642 } else { 643 /* IPv6 only Jail */ 644 goto fail; 645 } 646 } else { 647 *plocal_sin = &ia->ia_addr; 648 } 649 } 650 return (0); 651 fail: 652 if (alloc_route) { 653 struct route *ro = &inp->inp_route; 654 655 if (ro->ro_rt != NULL) 656 RTFREE(ro->ro_rt); 657 bzero(ro, sizeof(*ro)); 658 } 659 return (EADDRNOTAVAIL); 660 } 661 662 /* 663 * Outer subroutine: 664 * Connect from a socket to a specified address. 665 * Both address and port must be specified in argument sin. 666 * If don't have a local address for this socket yet, 667 * then pick one. 668 */ 669 int 670 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 671 { 672 struct sockaddr_in *if_sin; 673 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 674 int error; 675 676 /* Call inner routine to assign local interface address. */ 677 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 678 return (error); 679 680 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 681 inp->inp_laddr.s_addr ? 682 inp->inp_laddr : if_sin->sin_addr, 683 inp->inp_lport, FALSE, NULL) != NULL) { 684 return (EADDRINUSE); 685 } 686 if (inp->inp_laddr.s_addr == INADDR_ANY) { 687 if (inp->inp_lport == 0) { 688 error = in_pcbbind(inp, NULL, td); 689 if (error) 690 return (error); 691 } 692 inp->inp_laddr = if_sin->sin_addr; 693 } 694 inp->inp_faddr = sin->sin_addr; 695 inp->inp_fport = sin->sin_port; 696 in_pcbinsconnhash(inp); 697 return (0); 698 } 699 700 void 701 in_pcbdisconnect(struct inpcb *inp) 702 { 703 704 inp->inp_faddr.s_addr = INADDR_ANY; 705 inp->inp_fport = 0; 706 in_pcbremconnhash(inp); 707 if (inp->inp_socket->so_state & SS_NOFDREF) 708 in_pcbdetach(inp); 709 } 710 711 void 712 in_pcbdetach(struct inpcb *inp) 713 { 714 struct socket *so = inp->inp_socket; 715 struct inpcbinfo *ipi = inp->inp_pcbinfo; 716 717 #ifdef IPSEC 718 ipsec4_delete_pcbpolicy(inp); 719 #endif /*IPSEC*/ 720 inp->inp_gencnt = ++ipi->ipi_gencnt; 721 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 722 in_pcbremlists(inp); 723 so->so_pcb = NULL; 724 sofree(so); /* remove pcb ref */ 725 if (inp->inp_options) 726 m_free(inp->inp_options); 727 if (inp->inp_route.ro_rt) 728 rtfree(inp->inp_route.ro_rt); 729 ip_freemoptions(inp->inp_moptions); 730 inp->inp_vflag = 0; 731 kfree(inp, M_PCB); 732 } 733 734 /* 735 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 736 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 737 * in struct pr_usrreqs, so that protocols can just reference then directly 738 * without the need for a wrapper function. The socket must have a valid 739 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 740 * except through a kernel programming error, so it is acceptable to panic 741 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 742 * because there actually /is/ a programming error somewhere... XXX) 743 */ 744 int 745 in_setsockaddr(struct socket *so, struct sockaddr **nam) 746 { 747 struct inpcb *inp; 748 struct sockaddr_in *sin; 749 750 /* 751 * Do the malloc first in case it blocks. 752 */ 753 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 754 M_WAITOK | M_ZERO); 755 sin->sin_family = AF_INET; 756 sin->sin_len = sizeof *sin; 757 758 crit_enter(); 759 inp = so->so_pcb; 760 if (!inp) { 761 crit_exit(); 762 kfree(sin, M_SONAME); 763 return (ECONNRESET); 764 } 765 sin->sin_port = inp->inp_lport; 766 sin->sin_addr = inp->inp_laddr; 767 crit_exit(); 768 769 *nam = (struct sockaddr *)sin; 770 return (0); 771 } 772 773 int 774 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 775 { 776 struct inpcb *inp; 777 struct sockaddr_in *sin; 778 779 /* 780 * Do the malloc first in case it blocks. 781 */ 782 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 783 M_WAITOK | M_ZERO); 784 sin->sin_family = AF_INET; 785 sin->sin_len = sizeof *sin; 786 787 crit_enter(); 788 inp = so->so_pcb; 789 if (!inp) { 790 crit_exit(); 791 kfree(sin, M_SONAME); 792 return (ECONNRESET); 793 } 794 sin->sin_port = inp->inp_fport; 795 sin->sin_addr = inp->inp_faddr; 796 crit_exit(); 797 798 *nam = (struct sockaddr *)sin; 799 return (0); 800 } 801 802 void 803 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 804 void (*notify)(struct inpcb *, int)) 805 { 806 struct inpcb *inp, *ninp; 807 808 /* 809 * note: if INP_PLACEMARKER is set we must ignore the rest of 810 * the structure and skip it. 811 */ 812 crit_enter(); 813 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 814 if (inp->inp_flags & INP_PLACEMARKER) 815 continue; 816 #ifdef INET6 817 if (!(inp->inp_vflag & INP_IPV4)) 818 continue; 819 #endif 820 if (inp->inp_faddr.s_addr != faddr.s_addr || 821 inp->inp_socket == NULL) 822 continue; 823 (*notify)(inp, err); /* can remove inp from list! */ 824 } 825 crit_exit(); 826 } 827 828 void 829 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 830 { 831 struct inpcb *inp; 832 struct ip_moptions *imo; 833 int i, gap; 834 835 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 836 if (inp->inp_flags & INP_PLACEMARKER) 837 continue; 838 imo = inp->inp_moptions; 839 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 840 /* 841 * Unselect the outgoing interface if it is being 842 * detached. 843 */ 844 if (imo->imo_multicast_ifp == ifp) 845 imo->imo_multicast_ifp = NULL; 846 847 /* 848 * Drop multicast group membership if we joined 849 * through the interface being detached. 850 */ 851 for (i = 0, gap = 0; i < imo->imo_num_memberships; 852 i++) { 853 if (imo->imo_membership[i]->inm_ifp == ifp) { 854 in_delmulti(imo->imo_membership[i]); 855 gap++; 856 } else if (gap != 0) 857 imo->imo_membership[i - gap] = 858 imo->imo_membership[i]; 859 } 860 imo->imo_num_memberships -= gap; 861 } 862 } 863 } 864 865 /* 866 * Check for alternatives when higher level complains 867 * about service problems. For now, invalidate cached 868 * routing information. If the route was created dynamically 869 * (by a redirect), time to try a default gateway again. 870 */ 871 void 872 in_losing(struct inpcb *inp) 873 { 874 struct rtentry *rt; 875 struct rt_addrinfo rtinfo; 876 877 if ((rt = inp->inp_route.ro_rt)) { 878 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 879 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 880 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 881 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 882 rtinfo.rti_flags = rt->rt_flags; 883 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 884 if (rt->rt_flags & RTF_DYNAMIC) 885 rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL); 886 inp->inp_route.ro_rt = NULL; 887 rtfree(rt); 888 /* 889 * A new route can be allocated 890 * the next time output is attempted. 891 */ 892 } 893 } 894 895 /* 896 * After a routing change, flush old routing 897 * and allocate a (hopefully) better one. 898 */ 899 void 900 in_rtchange(struct inpcb *inp, int err) 901 { 902 if (inp->inp_route.ro_rt) { 903 rtfree(inp->inp_route.ro_rt); 904 inp->inp_route.ro_rt = NULL; 905 /* 906 * A new route can be allocated the next time 907 * output is attempted. 908 */ 909 } 910 } 911 912 /* 913 * Lookup a PCB based on the local address and port. 914 */ 915 struct inpcb * 916 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 917 u_int lport_arg, int wild_okay, struct ucred *cred) 918 { 919 struct inpcb *inp; 920 int matchwild = 3, wildcard; 921 u_short lport = lport_arg; 922 struct inpcbporthead *porthash; 923 struct inpcbport *phd; 924 struct inpcb *match = NULL; 925 926 /* 927 * If the porthashbase is shared across several cpus we need 928 * to lock. 929 */ 930 if (pcbinfo->porttoken) 931 lwkt_gettoken(pcbinfo->porttoken); 932 933 /* 934 * Best fit PCB lookup. 935 * 936 * First see if this local port is in use by looking on the 937 * port hash list. 938 */ 939 porthash = &pcbinfo->porthashbase[ 940 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 941 LIST_FOREACH(phd, porthash, phd_hash) { 942 if (phd->phd_port == lport) 943 break; 944 } 945 if (phd != NULL) { 946 /* 947 * Port is in use by one or more PCBs. Look for best 948 * fit. 949 */ 950 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 951 wildcard = 0; 952 #ifdef INET6 953 if ((inp->inp_vflag & INP_IPV4) == 0) 954 continue; 955 #endif 956 if (inp->inp_faddr.s_addr != INADDR_ANY) 957 wildcard++; 958 if (inp->inp_laddr.s_addr != INADDR_ANY) { 959 if (laddr.s_addr == INADDR_ANY) 960 wildcard++; 961 else if (inp->inp_laddr.s_addr != laddr.s_addr) 962 continue; 963 } else { 964 if (laddr.s_addr != INADDR_ANY) 965 wildcard++; 966 } 967 if (wildcard && !wild_okay) 968 continue; 969 if (wildcard < matchwild && 970 (cred == NULL || 971 cred->cr_prison == 972 inp->inp_socket->so_cred->cr_prison)) { 973 match = inp; 974 matchwild = wildcard; 975 if (matchwild == 0) { 976 break; 977 } 978 } 979 } 980 } 981 if (pcbinfo->porttoken) 982 lwkt_reltoken(pcbinfo->porttoken); 983 return (match); 984 } 985 986 /* 987 * Lookup PCB in hash list. 988 */ 989 struct inpcb * 990 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 991 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 992 boolean_t wildcard, struct ifnet *ifp) 993 { 994 struct inpcbhead *head; 995 struct inpcb *inp, *jinp=NULL; 996 u_short fport = fport_arg, lport = lport_arg; 997 998 /* 999 * First look for an exact match. 1000 */ 1001 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1002 laddr.s_addr, lport, pcbinfo->hashmask)]; 1003 LIST_FOREACH(inp, head, inp_hash) { 1004 #ifdef INET6 1005 if (!(inp->inp_vflag & INP_IPV4)) 1006 continue; 1007 #endif 1008 if (in_hosteq(inp->inp_faddr, faddr) && 1009 in_hosteq(inp->inp_laddr, laddr) && 1010 inp->inp_fport == fport && inp->inp_lport == lport) { 1011 /* found */ 1012 if (inp->inp_socket == NULL || 1013 inp->inp_socket->so_cred->cr_prison == NULL) { 1014 return (inp); 1015 } else { 1016 if (jinp == NULL) 1017 jinp = inp; 1018 } 1019 } 1020 } 1021 if (jinp != NULL) 1022 return (jinp); 1023 if (wildcard) { 1024 struct inpcb *local_wild = NULL; 1025 struct inpcb *jinp_wild = NULL; 1026 #ifdef INET6 1027 struct inpcb *local_wild_mapped = NULL; 1028 #endif 1029 struct inpcontainer *ic; 1030 struct inpcontainerhead *chead; 1031 struct sockaddr_in jsin; 1032 struct ucred *cred; 1033 1034 /* 1035 * Order of socket selection: 1036 * 1. non-jailed, non-wild. 1037 * 2. non-jailed, wild. 1038 * 3. jailed, non-wild. 1039 * 4. jailed, wild. 1040 */ 1041 jsin.sin_family = AF_INET; 1042 chead = &pcbinfo->wildcardhashbase[ 1043 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1044 LIST_FOREACH(ic, chead, ic_list) { 1045 inp = ic->ic_inp; 1046 jsin.sin_addr.s_addr = laddr.s_addr; 1047 #ifdef INET6 1048 if (!(inp->inp_vflag & INP_IPV4)) 1049 continue; 1050 #endif 1051 if (inp->inp_socket != NULL) 1052 cred = inp->inp_socket->so_cred; 1053 else 1054 cred = NULL; 1055 if (cred != NULL && jailed(cred)) { 1056 if (jinp != NULL) 1057 continue; 1058 else 1059 if (!jailed_ip(cred->cr_prison, 1060 (struct sockaddr *)&jsin)) 1061 continue; 1062 } 1063 if (inp->inp_lport == lport) { 1064 if (ifp && ifp->if_type == IFT_FAITH && 1065 !(inp->inp_flags & INP_FAITH)) 1066 continue; 1067 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1068 if (cred != NULL && jailed(cred)) 1069 jinp = inp; 1070 else 1071 return (inp); 1072 } 1073 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1074 #ifdef INET6 1075 if (INP_CHECK_SOCKAF(inp->inp_socket, 1076 AF_INET6)) 1077 local_wild_mapped = inp; 1078 else 1079 #endif 1080 if (cred != NULL && 1081 jailed(cred)) 1082 jinp_wild = inp; 1083 else 1084 local_wild = inp; 1085 } 1086 } 1087 } 1088 if (local_wild != NULL) 1089 return (local_wild); 1090 #ifdef INET6 1091 if (local_wild_mapped != NULL) 1092 return (local_wild_mapped); 1093 #endif 1094 if (jinp != NULL) 1095 return (jinp); 1096 return (jinp_wild); 1097 } 1098 1099 /* 1100 * Not found. 1101 */ 1102 return (NULL); 1103 } 1104 1105 /* 1106 * Insert PCB into connection hash table. 1107 */ 1108 void 1109 in_pcbinsconnhash(struct inpcb *inp) 1110 { 1111 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1112 struct inpcbhead *bucket; 1113 u_int32_t hashkey_faddr, hashkey_laddr; 1114 1115 #ifdef INET6 1116 if (inp->inp_vflag & INP_IPV6) { 1117 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1118 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1119 } else { 1120 #endif 1121 hashkey_faddr = inp->inp_faddr.s_addr; 1122 hashkey_laddr = inp->inp_laddr.s_addr; 1123 #ifdef INET6 1124 } 1125 #endif 1126 1127 KASSERT(!(inp->inp_flags & INP_CONNECTED), ("already on hash list")); 1128 inp->inp_flags |= INP_CONNECTED; 1129 1130 /* 1131 * Insert into the connection hash table. 1132 */ 1133 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1134 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1135 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1136 } 1137 1138 /* 1139 * Remove PCB from connection hash table. 1140 */ 1141 void 1142 in_pcbremconnhash(struct inpcb *inp) 1143 { 1144 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1145 LIST_REMOVE(inp, inp_hash); 1146 inp->inp_flags &= ~INP_CONNECTED; 1147 } 1148 1149 /* 1150 * Insert PCB into port hash table. 1151 */ 1152 int 1153 in_pcbinsporthash(struct inpcb *inp) 1154 { 1155 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1156 struct inpcbporthead *pcbporthash; 1157 struct inpcbport *phd; 1158 1159 /* 1160 * If the porthashbase is shared across several cpus we need 1161 * to lock. 1162 */ 1163 if (pcbinfo->porttoken) 1164 lwkt_gettoken(pcbinfo->porttoken); 1165 1166 /* 1167 * Insert into the port hash table. 1168 */ 1169 pcbporthash = &pcbinfo->porthashbase[ 1170 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1171 1172 /* Go through port list and look for a head for this lport. */ 1173 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1174 if (phd->phd_port == inp->inp_lport) 1175 break; 1176 } 1177 1178 /* If none exists, malloc one and tack it on. */ 1179 if (phd == NULL) { 1180 KKASSERT(pcbinfo->portsave != NULL); 1181 phd = pcbinfo->portsave; 1182 pcbinfo->portsave = NULL; 1183 phd->phd_port = inp->inp_lport; 1184 LIST_INIT(&phd->phd_pcblist); 1185 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1186 } 1187 1188 inp->inp_phd = phd; 1189 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1190 1191 if (pcbinfo->porttoken) 1192 lwkt_reltoken(pcbinfo->porttoken); 1193 if (pcbinfo->portsave == NULL) { 1194 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1195 M_PCB, M_INTWAIT | M_ZERO); 1196 } 1197 return (0); 1198 } 1199 1200 void 1201 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1202 { 1203 struct inpcontainer *ic; 1204 struct inpcontainerhead *bucket; 1205 1206 bucket = &pcbinfo->wildcardhashbase[ 1207 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1208 1209 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1210 ic->ic_inp = inp; 1211 LIST_INSERT_HEAD(bucket, ic, ic_list); 1212 } 1213 1214 /* 1215 * Insert PCB into wildcard hash table. 1216 */ 1217 void 1218 in_pcbinswildcardhash(struct inpcb *inp) 1219 { 1220 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1221 1222 KKASSERT(pcbinfo != NULL); 1223 1224 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1225 inp->inp_flags |= INP_WILDCARD; 1226 } 1227 1228 void 1229 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1230 { 1231 struct inpcontainer *ic; 1232 struct inpcontainerhead *head; 1233 1234 /* find bucket */ 1235 head = &pcbinfo->wildcardhashbase[ 1236 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1237 1238 LIST_FOREACH(ic, head, ic_list) { 1239 if (ic->ic_inp == inp) 1240 goto found; 1241 } 1242 return; /* not found! */ 1243 1244 found: 1245 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1246 kfree(ic, M_TEMP); /* deallocate container */ 1247 } 1248 1249 /* 1250 * Remove PCB from wildcard hash table. 1251 */ 1252 void 1253 in_pcbremwildcardhash(struct inpcb *inp) 1254 { 1255 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1256 1257 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1258 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1259 inp->inp_flags &= ~INP_WILDCARD; 1260 } 1261 1262 /* 1263 * Remove PCB from various lists. 1264 */ 1265 void 1266 in_pcbremlists(struct inpcb *inp) 1267 { 1268 struct inpcbinfo *pcbinfo; 1269 1270 if (inp->inp_lport) { 1271 struct inpcbport *phd; 1272 1273 pcbinfo = inp->inp_pcbinfo; 1274 if (pcbinfo->porttoken) 1275 lwkt_gettoken(pcbinfo->porttoken); 1276 1277 phd = inp->inp_phd; 1278 LIST_REMOVE(inp, inp_portlist); 1279 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1280 LIST_REMOVE(phd, phd_hash); 1281 kfree(phd, M_PCB); 1282 } 1283 if (pcbinfo->porttoken) 1284 lwkt_reltoken(pcbinfo->porttoken); 1285 } 1286 if (inp->inp_flags & INP_WILDCARD) { 1287 in_pcbremwildcardhash(inp); 1288 } else if (inp->inp_flags & INP_CONNECTED) { 1289 in_pcbremconnhash(inp); 1290 } 1291 LIST_REMOVE(inp, inp_list); 1292 inp->inp_pcbinfo->ipi_count--; 1293 } 1294 1295 int 1296 prison_xinpcb(struct thread *td, struct inpcb *inp) 1297 { 1298 struct ucred *cr; 1299 1300 if (td->td_proc == NULL) 1301 return (0); 1302 cr = td->td_proc->p_ucred; 1303 if (cr->cr_prison == NULL) 1304 return (0); 1305 if (inp->inp_socket && inp->inp_socket->so_cred && 1306 inp->inp_socket->so_cred->cr_prison && 1307 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1308 return (0); 1309 return (1); 1310 } 1311 1312 int 1313 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1314 { 1315 struct inpcbinfo *pcbinfo = arg1; 1316 struct inpcb *inp, *marker; 1317 struct xinpcb xi; 1318 int error, i, n; 1319 inp_gen_t gencnt; 1320 1321 /* 1322 * The process of preparing the TCB list is too time-consuming and 1323 * resource-intensive to repeat twice on every request. 1324 */ 1325 if (req->oldptr == NULL) { 1326 n = pcbinfo->ipi_count; 1327 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1328 return 0; 1329 } 1330 1331 if (req->newptr != NULL) 1332 return EPERM; 1333 1334 /* 1335 * OK, now we're committed to doing something. Re-fetch ipi_count 1336 * after obtaining the generation count. 1337 */ 1338 gencnt = pcbinfo->ipi_gencnt; 1339 n = pcbinfo->ipi_count; 1340 1341 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1342 marker->inp_flags |= INP_PLACEMARKER; 1343 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1344 1345 i = 0; 1346 error = 0; 1347 1348 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1349 LIST_REMOVE(marker, inp_list); 1350 LIST_INSERT_AFTER(inp, marker, inp_list); 1351 1352 if (inp->inp_flags & INP_PLACEMARKER) 1353 continue; 1354 if (inp->inp_gencnt > gencnt) 1355 continue; 1356 if (prison_xinpcb(req->td, inp)) 1357 continue; 1358 bzero(&xi, sizeof xi); 1359 xi.xi_len = sizeof xi; 1360 bcopy(inp, &xi.xi_inp, sizeof *inp); 1361 if (inp->inp_socket) 1362 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1363 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1364 break; 1365 ++i; 1366 } 1367 LIST_REMOVE(marker, inp_list); 1368 if (error == 0 && i < n) { 1369 bzero(&xi, sizeof xi); 1370 xi.xi_len = sizeof xi; 1371 while (i < n) { 1372 error = SYSCTL_OUT(req, &xi, sizeof xi); 1373 ++i; 1374 } 1375 } 1376 kfree(marker, M_TEMP); 1377 return(error); 1378 } 1379