1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 63 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/protosw.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/proc.h> 78 #include <sys/priv.h> 79 #include <sys/jail.h> 80 #include <sys/kernel.h> 81 #include <sys/sysctl.h> 82 83 #include <sys/thread2.h> 84 #include <sys/socketvar2.h> 85 #include <sys/msgport2.h> 86 87 #include <machine/limits.h> 88 89 #include <net/if.h> 90 #include <net/if_types.h> 91 #include <net/route.h> 92 93 #include <netinet/in.h> 94 #include <netinet/in_pcb.h> 95 #include <netinet/in_var.h> 96 #include <netinet/ip_var.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #include <netinet6/ip6_var.h> 100 #endif /* INET6 */ 101 102 #ifdef IPSEC 103 #include <netinet6/ipsec.h> 104 #include <netproto/key/key.h> 105 #include <netproto/ipsec/esp_var.h> 106 #endif 107 108 #ifdef FAST_IPSEC 109 #if defined(IPSEC) || defined(IPSEC_ESP) 110 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 111 #endif 112 113 #include <netproto/ipsec/ipsec.h> 114 #include <netproto/ipsec/key.h> 115 #define IPSEC 116 #endif /* FAST_IPSEC */ 117 118 #define INP_LOCALGROUP_SIZMIN 8 119 #define INP_LOCALGROUP_SIZMAX 256 120 121 struct in_addr zeroin_addr; 122 123 /* 124 * These configure the range of local port addresses assigned to 125 * "unspecified" outgoing connections/packets/whatever. 126 */ 127 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 128 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 129 130 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 131 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 132 133 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 134 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 135 136 #define RANGECHK(var, min, max) \ 137 if ((var) < (min)) { (var) = (min); } \ 138 else if ((var) > (max)) { (var) = (max); } 139 140 int udpencap_enable = 1; /* enabled by default */ 141 int udpencap_port = 4500; /* triggers decapsulation */ 142 143 static int 144 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 145 { 146 int error; 147 148 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 149 if (!error) { 150 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 151 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 152 153 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 154 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 155 156 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 157 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 158 } 159 return (error); 160 } 161 162 #undef RANGECHK 163 164 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 165 166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 167 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 169 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 171 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 173 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 175 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 177 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 178 179 /* 180 * in_pcb.c: manage the Protocol Control Blocks. 181 * 182 * NOTE: It is assumed that most of these functions will be called from 183 * a critical section. XXX - There are, unfortunately, a few exceptions 184 * to this rule that should be fixed. 185 * 186 * NOTE: The caller should initialize the cpu field to the cpu running the 187 * protocol stack associated with this inpcbinfo. 188 */ 189 190 void 191 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 192 { 193 LIST_INIT(&pcbinfo->pcblisthead); 194 pcbinfo->cpu = -1; 195 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 196 M_WAITOK | M_ZERO); 197 } 198 199 struct baddynamicports baddynamicports; 200 201 /* 202 * Check if the specified port is invalid for dynamic allocation. 203 */ 204 int 205 in_baddynamic(u_int16_t port, u_int16_t proto) 206 { 207 switch (proto) { 208 case IPPROTO_TCP: 209 return (DP_ISSET(baddynamicports.tcp, port)); 210 case IPPROTO_UDP: 211 #ifdef IPSEC 212 /* Cannot preset this as it is a sysctl */ 213 if (port == udpencap_port) 214 return (1); 215 #endif 216 return (DP_ISSET(baddynamicports.udp, port)); 217 default: 218 return (0); 219 } 220 } 221 222 223 /* 224 * Allocate a PCB and associate it with the socket. 225 */ 226 int 227 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 228 { 229 struct inpcb *inp; 230 #ifdef IPSEC 231 int error; 232 #endif 233 234 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO|M_NULLOK); 235 if (inp == NULL) 236 return (ENOMEM); 237 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 238 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 239 inp->inp_socket = so; 240 #ifdef IPSEC 241 error = ipsec_init_policy(so, &inp->inp_sp); 242 if (error != 0) { 243 kfree(inp, M_PCB); 244 return (error); 245 } 246 #endif 247 #ifdef INET6 248 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 249 inp->inp_flags |= IN6P_IPV6_V6ONLY; 250 if (ip6_auto_flowlabel) 251 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 252 #endif 253 soreference(so); 254 so->so_pcb = inp; 255 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 256 pcbinfo->ipi_count++; 257 return (0); 258 } 259 260 /* 261 * Unlink a pcb with the intention of moving it to another cpu with a 262 * different pcbinfo. While unlinked nothing should attempt to dereference 263 * inp_pcbinfo, NULL it out so we assert if it does. 264 */ 265 void 266 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 267 { 268 KASSERT(inp->inp_pcbinfo == pcbinfo, ("pcbinfo mismatch")); 269 KASSERT(inp->inp_cpcbinfo == pcbinfo, ("cpcbinfo mismatch")); 270 KASSERT((inp->inp_flags & (INP_WILDCARD | INP_CONNECTED)) == 0, 271 ("already linked")); 272 273 LIST_REMOVE(inp, inp_list); 274 pcbinfo->ipi_count--; 275 inp->inp_pcbinfo = NULL; 276 inp->inp_cpcbinfo = NULL; 277 } 278 279 /* 280 * Relink a pcb into a new pcbinfo. 281 */ 282 void 283 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 284 { 285 KASSERT(inp->inp_pcbinfo == NULL, ("has pcbinfo")); 286 KASSERT(inp->inp_cpcbinfo == NULL, ("has cpcbinfo")); 287 KASSERT((inp->inp_flags & (INP_WILDCARD | INP_CONNECTED)) == 0, 288 ("already linked")); 289 290 inp->inp_cpcbinfo = pcbinfo; 291 inp->inp_pcbinfo = pcbinfo; 292 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 293 pcbinfo->ipi_count++; 294 } 295 296 static int 297 in_pcbsetlport(struct inpcb *inp, int wild, struct ucred *cred) 298 { 299 struct inpcbportinfo *portinfo = inp->inp_pcbinfo->portinfo; 300 u_short first, last, lport; 301 u_short *lastport; 302 int count, error; 303 304 inp->inp_flags |= INP_ANONPORT; 305 306 if (inp->inp_flags & INP_HIGHPORT) { 307 first = ipport_hifirstauto; /* sysctl */ 308 last = ipport_hilastauto; 309 lastport = &portinfo->lasthi; 310 } else if (inp->inp_flags & INP_LOWPORT) { 311 if (cred && 312 (error = 313 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 314 inp->inp_laddr.s_addr = INADDR_ANY; 315 return error; 316 } 317 first = ipport_lowfirstauto; /* 1023 */ 318 last = ipport_lowlastauto; /* 600 */ 319 lastport = &portinfo->lastlow; 320 } else { 321 first = ipport_firstauto; /* sysctl */ 322 last = ipport_lastauto; 323 lastport = &portinfo->lastport; 324 } 325 326 /* 327 * This has to be atomic. If the porthash is shared across multiple 328 * protocol threads (aka tcp) then the token will be non-NULL. 329 */ 330 if (portinfo->porttoken) 331 lwkt_gettoken(portinfo->porttoken); 332 333 /* 334 * Simple check to ensure all ports are not used up causing 335 * a deadlock here. 336 * 337 * We split the two cases (up and down) so that the direction 338 * is not being tested on each round of the loop. 339 */ 340 if (first > last) { 341 /* 342 * counting down 343 */ 344 count = first - last; 345 346 do { 347 if (count-- < 0) { /* completely used? */ 348 inp->inp_laddr.s_addr = INADDR_ANY; 349 error = EADDRNOTAVAIL; 350 goto done; 351 } 352 --*lastport; 353 if (*lastport > first || *lastport < last) 354 *lastport = first; 355 lport = htons(*lastport); 356 } while (in_pcblookup_local(portinfo, inp->inp_laddr, lport, 357 wild, cred)); 358 } else { 359 /* 360 * counting up 361 */ 362 count = last - first; 363 364 do { 365 if (count-- < 0) { /* completely used? */ 366 inp->inp_laddr.s_addr = INADDR_ANY; 367 error = EADDRNOTAVAIL; 368 goto done; 369 } 370 ++*lastport; 371 if (*lastport < first || *lastport > last) 372 *lastport = first; 373 lport = htons(*lastport); 374 } while (in_pcblookup_local(portinfo, inp->inp_laddr, lport, 375 wild, cred)); 376 } 377 inp->inp_lport = lport; 378 in_pcbinsporthash(portinfo, inp); 379 error = 0; 380 done: 381 if (portinfo->porttoken) 382 lwkt_reltoken(portinfo->porttoken); 383 return error; 384 } 385 386 int 387 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 388 { 389 struct socket *so = inp->inp_socket; 390 struct sockaddr_in jsin; 391 struct ucred *cred = NULL; 392 int wild = 0; 393 394 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 395 return (EADDRNOTAVAIL); 396 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 397 return (EINVAL); /* already bound */ 398 399 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 400 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 401 if (td->td_proc) 402 cred = td->td_proc->p_ucred; 403 404 if (nam != NULL) { 405 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 406 struct inpcbportinfo *portinfo; 407 struct inpcb *t; 408 u_short lport; 409 int reuseport = (so->so_options & SO_REUSEPORT); 410 int error; 411 412 if (nam->sa_len != sizeof *sin) 413 return (EINVAL); 414 #ifdef notdef 415 /* 416 * We should check the family, but old programs 417 * incorrectly fail to initialize it. 418 */ 419 if (sin->sin_family != AF_INET) 420 return (EAFNOSUPPORT); 421 #endif 422 if (!prison_replace_wildcards(td, nam)) 423 return (EINVAL); 424 425 lport = sin->sin_port; 426 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 427 /* 428 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 429 * allow complete duplication of binding if 430 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 431 * and a multicast address is bound on both 432 * new and duplicated sockets. 433 */ 434 if (so->so_options & SO_REUSEADDR) 435 reuseport = SO_REUSEADDR | SO_REUSEPORT; 436 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 437 sin->sin_port = 0; /* yech... */ 438 bzero(&sin->sin_zero, sizeof sin->sin_zero); 439 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) 440 return (EADDRNOTAVAIL); 441 } 442 443 inp->inp_laddr = sin->sin_addr; 444 445 jsin.sin_family = AF_INET; 446 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 447 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 448 inp->inp_laddr.s_addr = INADDR_ANY; 449 return (EINVAL); 450 } 451 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 452 453 if (lport == 0) { 454 /* Auto-select local port */ 455 return in_pcbsetlport(inp, wild, cred); 456 } 457 458 /* GROSS */ 459 if (ntohs(lport) < IPPORT_RESERVED && cred && 460 (error = 461 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 462 inp->inp_laddr.s_addr = INADDR_ANY; 463 return (error); 464 } 465 466 portinfo = inp->inp_pcbinfo->portinfo; 467 468 /* 469 * This has to be atomic. If the porthash is shared across 470 * multiple protocol threads (aka tcp) then the token will 471 * be non-NULL. 472 */ 473 if (portinfo->porttoken) 474 lwkt_gettoken(portinfo->porttoken); 475 476 if (so->so_cred->cr_uid != 0 && 477 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 478 t = in_pcblookup_local(portinfo, sin->sin_addr, lport, 479 INPLOOKUP_WILDCARD, cred); 480 if (t && 481 (!in_nullhost(sin->sin_addr) || 482 !in_nullhost(t->inp_laddr) || 483 (t->inp_socket->so_options & SO_REUSEPORT) == 0) && 484 (so->so_cred->cr_uid != 485 t->inp_socket->so_cred->cr_uid)) { 486 #ifdef INET6 487 if (!in_nullhost(sin->sin_addr) || 488 !in_nullhost(t->inp_laddr) || 489 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 490 #endif 491 { 492 inp->inp_laddr.s_addr = INADDR_ANY; 493 error = EADDRINUSE; 494 goto done; 495 } 496 } 497 } 498 if (cred && !prison_replace_wildcards(td, nam)) { 499 inp->inp_laddr.s_addr = INADDR_ANY; 500 error = EADDRNOTAVAIL; 501 goto done; 502 } 503 t = in_pcblookup_local(portinfo, sin->sin_addr, lport, 504 wild, cred); 505 if (t && !(reuseport & t->inp_socket->so_options)) { 506 #ifdef INET6 507 if (!in_nullhost(sin->sin_addr) || 508 !in_nullhost(t->inp_laddr) || 509 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 510 #endif 511 { 512 inp->inp_laddr.s_addr = INADDR_ANY; 513 error = EADDRINUSE; 514 goto done; 515 } 516 } 517 inp->inp_lport = lport; 518 in_pcbinsporthash(portinfo, inp); 519 error = 0; 520 done: 521 if (portinfo->porttoken) 522 lwkt_reltoken(portinfo->porttoken); 523 return (error); 524 } else { 525 jsin.sin_family = AF_INET; 526 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 527 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 528 inp->inp_laddr.s_addr = INADDR_ANY; 529 return (EINVAL); 530 } 531 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 532 533 return in_pcbsetlport(inp, wild, cred); 534 } 535 } 536 537 static struct inpcb * 538 in_pcblookup_localremote(struct inpcbportinfo *portinfo, struct in_addr laddr, 539 u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred) 540 { 541 struct inpcb *inp; 542 struct inpcbporthead *porthash; 543 struct inpcbport *phd; 544 struct inpcb *match = NULL; 545 546 /* 547 * If the porthashbase is shared across several cpus, it must 548 * have been locked. 549 */ 550 if (portinfo->porttoken) 551 ASSERT_LWKT_TOKEN_HELD(portinfo->porttoken); 552 553 /* 554 * Best fit PCB lookup. 555 * 556 * First see if this local port is in use by looking on the 557 * port hash list. 558 */ 559 porthash = &portinfo->porthashbase[ 560 INP_PCBPORTHASH(lport, portinfo->porthashmask)]; 561 LIST_FOREACH(phd, porthash, phd_hash) { 562 if (phd->phd_port == lport) 563 break; 564 } 565 if (phd != NULL) { 566 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 567 #ifdef INET6 568 if ((inp->inp_vflag & INP_IPV4) == 0) 569 continue; 570 #endif 571 if (inp->inp_laddr.s_addr != INADDR_ANY && 572 inp->inp_laddr.s_addr != laddr.s_addr) 573 continue; 574 575 if (inp->inp_faddr.s_addr != INADDR_ANY && 576 inp->inp_faddr.s_addr != faddr.s_addr) 577 continue; 578 579 if (inp->inp_fport != 0 && inp->inp_fport != fport) 580 continue; 581 582 if (cred == NULL || 583 cred->cr_prison == 584 inp->inp_socket->so_cred->cr_prison) { 585 match = inp; 586 break; 587 } 588 } 589 } 590 return (match); 591 } 592 593 int 594 in_pcbsetlport_remote(struct inpcb *inp, const struct sockaddr *remote, 595 struct thread *td) 596 { 597 struct proc *p = td->td_proc; 598 unsigned short *lastport; 599 const struct sockaddr_in *sin = (const struct sockaddr_in *)remote; 600 struct sockaddr_in jsin; 601 struct inpcbportinfo *portinfo = inp->inp_pcbinfo->portinfo; 602 struct ucred *cred = NULL; 603 u_short lport = 0; 604 ushort first, last; 605 int count, error, dup = 0; 606 607 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 608 return (EADDRNOTAVAIL); 609 610 KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY); 611 if (inp->inp_lport != 0) 612 return (EINVAL); /* already bound */ 613 614 KKASSERT(p); 615 cred = p->p_ucred; 616 617 jsin.sin_family = AF_INET; 618 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 619 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 620 inp->inp_laddr.s_addr = INADDR_ANY; 621 return (EINVAL); 622 } 623 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 624 625 inp->inp_flags |= INP_ANONPORT; 626 627 if (inp->inp_flags & INP_HIGHPORT) { 628 first = ipport_hifirstauto; /* sysctl */ 629 last = ipport_hilastauto; 630 lastport = &portinfo->lasthi; 631 } else if (inp->inp_flags & INP_LOWPORT) { 632 if (cred && 633 (error = 634 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 635 inp->inp_laddr.s_addr = INADDR_ANY; 636 return (error); 637 } 638 first = ipport_lowfirstauto; /* 1023 */ 639 last = ipport_lowlastauto; /* 600 */ 640 lastport = &portinfo->lastlow; 641 } else { 642 first = ipport_firstauto; /* sysctl */ 643 last = ipport_lastauto; 644 lastport = &portinfo->lastport; 645 } 646 647 /* 648 * This has to be atomic. If the porthash is shared across multiple 649 * protocol threads (aka tcp) then the token will be non-NULL. 650 */ 651 if (portinfo->porttoken) 652 lwkt_gettoken(portinfo->porttoken); 653 654 again: 655 /* 656 * Simple check to ensure all ports are not used up causing 657 * a deadlock here. 658 * 659 * We split the two cases (up and down) so that the direction 660 * is not being tested on each round of the loop. 661 */ 662 if (first > last) { 663 /* 664 * counting down 665 */ 666 count = first - last; 667 668 do { 669 if (count-- < 0) { /* completely used? */ 670 inp->inp_laddr.s_addr = INADDR_ANY; 671 error = EADDRNOTAVAIL; 672 goto done; 673 } 674 --*lastport; 675 if (*lastport > first || *lastport < last) 676 *lastport = first; 677 lport = htons(*lastport); 678 } while (in_pcblookup_localremote(portinfo, inp->inp_laddr, 679 lport, sin->sin_addr, sin->sin_port, cred)); 680 } else { 681 /* 682 * counting up 683 */ 684 count = last - first; 685 686 do { 687 if (count-- < 0) { /* completely used? */ 688 inp->inp_laddr.s_addr = INADDR_ANY; 689 error = EADDRNOTAVAIL; 690 goto done; 691 } 692 ++*lastport; 693 if (*lastport < first || *lastport > last) 694 *lastport = first; 695 lport = htons(*lastport); 696 } while (in_pcblookup_localremote(portinfo, inp->inp_laddr, 697 lport, sin->sin_addr, sin->sin_port, cred)); 698 } 699 700 /* This could happen on loopback interface */ 701 if (sin->sin_port == lport && 702 sin->sin_addr.s_addr == inp->inp_laddr.s_addr) { 703 if (dup) { 704 /* 705 * Duplicate again; give up 706 */ 707 inp->inp_laddr.s_addr = INADDR_ANY; 708 error = EADDRNOTAVAIL; 709 goto done; 710 } 711 dup = 1; 712 goto again; 713 } 714 inp->inp_lport = lport; 715 in_pcbinsporthash(portinfo, inp); 716 error = 0; 717 done: 718 if (portinfo->porttoken) 719 lwkt_reltoken(portinfo->porttoken); 720 return error; 721 } 722 723 /* 724 * Transform old in_pcbconnect() into an inner subroutine for new 725 * in_pcbconnect(): Do some validity-checking on the remote 726 * address (in mbuf 'nam') and then determine local host address 727 * (i.e., which interface) to use to access that remote host. 728 * 729 * This preserves definition of in_pcbconnect(), while supporting a 730 * slightly different version for T/TCP. (This is more than 731 * a bit of a kludge, but cleaning up the internal interfaces would 732 * have forced minor changes in every protocol). 733 */ 734 int 735 in_pcbladdr_find(struct inpcb *inp, struct sockaddr *nam, 736 struct sockaddr_in **plocal_sin, struct thread *td, int find) 737 { 738 struct in_ifaddr *ia; 739 struct ucred *cred = NULL; 740 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 741 struct sockaddr *jsin; 742 int jailed = 0, alloc_route = 0; 743 744 if (nam->sa_len != sizeof *sin) 745 return (EINVAL); 746 if (sin->sin_family != AF_INET) 747 return (EAFNOSUPPORT); 748 if (sin->sin_port == 0) 749 return (EADDRNOTAVAIL); 750 if (td && td->td_proc && td->td_proc->p_ucred) 751 cred = td->td_proc->p_ucred; 752 if (cred && cred->cr_prison) 753 jailed = 1; 754 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 755 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 756 /* 757 * If the destination address is INADDR_ANY, 758 * use the primary local address. 759 * If the supplied address is INADDR_BROADCAST, 760 * and the primary interface supports broadcast, 761 * choose the broadcast address for that interface. 762 */ 763 if (sin->sin_addr.s_addr == INADDR_ANY) 764 sin->sin_addr = IA_SIN(ia)->sin_addr; 765 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 766 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 767 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 768 } 769 if (find) { 770 struct route *ro; 771 772 ia = NULL; 773 /* 774 * If route is known or can be allocated now, 775 * our src addr is taken from the i/f, else punt. 776 * Note that we should check the address family of the cached 777 * destination, in case of sharing the cache with IPv6. 778 */ 779 ro = &inp->inp_route; 780 if (ro->ro_rt && 781 (!(ro->ro_rt->rt_flags & RTF_UP) || 782 ro->ro_dst.sa_family != AF_INET || 783 satosin(&ro->ro_dst)->sin_addr.s_addr != 784 sin->sin_addr.s_addr || 785 inp->inp_socket->so_options & SO_DONTROUTE)) { 786 RTFREE(ro->ro_rt); 787 ro->ro_rt = NULL; 788 } 789 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 790 (ro->ro_rt == NULL || 791 ro->ro_rt->rt_ifp == NULL)) { 792 /* No route yet, so try to acquire one */ 793 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 794 ro->ro_dst.sa_family = AF_INET; 795 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 796 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 797 sin->sin_addr; 798 rtalloc(ro); 799 alloc_route = 1; 800 } 801 /* 802 * If we found a route, use the address 803 * corresponding to the outgoing interface 804 * unless it is the loopback (in case a route 805 * to our address on another net goes to loopback). 806 */ 807 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 808 if (jailed) { 809 if (jailed_ip(cred->cr_prison, 810 ro->ro_rt->rt_ifa->ifa_addr)) { 811 ia = ifatoia(ro->ro_rt->rt_ifa); 812 } 813 } else { 814 ia = ifatoia(ro->ro_rt->rt_ifa); 815 } 816 } 817 if (ia == NULL) { 818 u_short fport = sin->sin_port; 819 820 sin->sin_port = 0; 821 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 822 if (ia && jailed && !jailed_ip(cred->cr_prison, 823 sintosa(&ia->ia_addr))) 824 ia = NULL; 825 if (ia == NULL) 826 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 827 if (ia && jailed && !jailed_ip(cred->cr_prison, 828 sintosa(&ia->ia_addr))) 829 ia = NULL; 830 sin->sin_port = fport; 831 if (ia == NULL && 832 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 833 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 834 if (ia && jailed && !jailed_ip(cred->cr_prison, 835 sintosa(&ia->ia_addr))) 836 ia = NULL; 837 838 if (!jailed && ia == NULL) 839 goto fail; 840 } 841 /* 842 * If the destination address is multicast and an outgoing 843 * interface has been set as a multicast option, use the 844 * address of that interface as our source address. 845 */ 846 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 847 inp->inp_moptions != NULL) { 848 struct ip_moptions *imo; 849 struct ifnet *ifp; 850 851 imo = inp->inp_moptions; 852 if (imo->imo_multicast_ifp != NULL) { 853 struct in_ifaddr_container *iac; 854 855 ifp = imo->imo_multicast_ifp; 856 ia = NULL; 857 TAILQ_FOREACH(iac, 858 &in_ifaddrheads[mycpuid], ia_link) { 859 if (iac->ia->ia_ifp == ifp) { 860 ia = iac->ia; 861 break; 862 } 863 } 864 if (ia == NULL) 865 goto fail; 866 } 867 } 868 /* 869 * Don't do pcblookup call here; return interface in plocal_sin 870 * and exit to caller, that will do the lookup. 871 */ 872 if (ia == NULL && jailed) { 873 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 874 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 875 *plocal_sin = satosin(jsin); 876 } else { 877 /* IPv6 only Jail */ 878 goto fail; 879 } 880 } else { 881 *plocal_sin = &ia->ia_addr; 882 } 883 } 884 return (0); 885 fail: 886 if (alloc_route) { 887 struct route *ro = &inp->inp_route; 888 889 if (ro->ro_rt != NULL) 890 RTFREE(ro->ro_rt); 891 bzero(ro, sizeof(*ro)); 892 } 893 return (EADDRNOTAVAIL); 894 } 895 896 int 897 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 898 struct sockaddr_in **plocal_sin, struct thread *td) 899 { 900 return in_pcbladdr_find(inp, nam, plocal_sin, td, 901 (inp->inp_laddr.s_addr == INADDR_ANY)); 902 } 903 904 /* 905 * Outer subroutine: 906 * Connect from a socket to a specified address. 907 * Both address and port must be specified in argument sin. 908 * If don't have a local address for this socket yet, 909 * then pick one. 910 */ 911 int 912 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 913 { 914 struct sockaddr_in *if_sin; 915 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 916 int error; 917 918 /* Call inner routine to assign local interface address. */ 919 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 920 return (error); 921 922 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 923 inp->inp_laddr.s_addr ? 924 inp->inp_laddr : if_sin->sin_addr, 925 inp->inp_lport, FALSE, NULL) != NULL) { 926 return (EADDRINUSE); 927 } 928 if (inp->inp_laddr.s_addr == INADDR_ANY) { 929 if (inp->inp_lport == 0) { 930 error = in_pcbbind(inp, NULL, td); 931 if (error) 932 return (error); 933 } 934 inp->inp_laddr = if_sin->sin_addr; 935 } 936 inp->inp_faddr = sin->sin_addr; 937 inp->inp_fport = sin->sin_port; 938 in_pcbinsconnhash(inp); 939 return (0); 940 } 941 942 void 943 in_pcbdisconnect(struct inpcb *inp) 944 { 945 946 inp->inp_faddr.s_addr = INADDR_ANY; 947 inp->inp_fport = 0; 948 in_pcbremconnhash(inp); 949 if (inp->inp_socket->so_state & SS_NOFDREF) 950 in_pcbdetach(inp); 951 } 952 953 void 954 in_pcbdetach(struct inpcb *inp) 955 { 956 struct socket *so = inp->inp_socket; 957 struct inpcbinfo *ipi = inp->inp_pcbinfo; 958 959 #ifdef IPSEC 960 ipsec4_delete_pcbpolicy(inp); 961 #endif /*IPSEC*/ 962 inp->inp_gencnt = ++ipi->ipi_gencnt; 963 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 964 in_pcbremlists(inp); 965 so->so_pcb = NULL; 966 sofree(so); /* remove pcb ref */ 967 if (inp->inp_options) 968 m_free(inp->inp_options); 969 if (inp->inp_route.ro_rt) 970 rtfree(inp->inp_route.ro_rt); 971 ip_freemoptions(inp->inp_moptions); 972 inp->inp_vflag = 0; 973 kfree(inp, M_PCB); 974 } 975 976 /* 977 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 978 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 979 * in struct pr_usrreqs, so that protocols can just reference then directly 980 * without the need for a wrapper function. The socket must have a valid 981 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 982 * except through a kernel programming error, so it is acceptable to panic 983 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 984 * because there actually /is/ a programming error somewhere... XXX) 985 */ 986 int 987 in_setsockaddr(struct socket *so, struct sockaddr **nam) 988 { 989 struct inpcb *inp; 990 struct sockaddr_in *sin; 991 992 /* 993 * Do the malloc first in case it blocks. 994 */ 995 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 996 sin->sin_family = AF_INET; 997 sin->sin_len = sizeof *sin; 998 999 crit_enter(); 1000 inp = so->so_pcb; 1001 if (!inp) { 1002 crit_exit(); 1003 kfree(sin, M_SONAME); 1004 return (ECONNRESET); 1005 } 1006 sin->sin_port = inp->inp_lport; 1007 sin->sin_addr = inp->inp_laddr; 1008 crit_exit(); 1009 1010 *nam = (struct sockaddr *)sin; 1011 return (0); 1012 } 1013 1014 void 1015 in_setsockaddr_dispatch(netmsg_t msg) 1016 { 1017 int error; 1018 1019 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1020 lwkt_replymsg(&msg->lmsg, error); 1021 } 1022 1023 int 1024 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1025 { 1026 struct inpcb *inp; 1027 struct sockaddr_in *sin; 1028 1029 /* 1030 * Do the malloc first in case it blocks. 1031 */ 1032 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 1033 sin->sin_family = AF_INET; 1034 sin->sin_len = sizeof *sin; 1035 1036 crit_enter(); 1037 inp = so->so_pcb; 1038 if (!inp) { 1039 crit_exit(); 1040 kfree(sin, M_SONAME); 1041 return (ECONNRESET); 1042 } 1043 sin->sin_port = inp->inp_fport; 1044 sin->sin_addr = inp->inp_faddr; 1045 crit_exit(); 1046 1047 *nam = (struct sockaddr *)sin; 1048 return (0); 1049 } 1050 1051 void 1052 in_setpeeraddr_dispatch(netmsg_t msg) 1053 { 1054 int error; 1055 1056 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1057 lwkt_replymsg(&msg->lmsg, error); 1058 } 1059 1060 void 1061 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 1062 void (*notify)(struct inpcb *, int)) 1063 { 1064 struct inpcb *inp, *ninp; 1065 1066 /* 1067 * note: if INP_PLACEMARKER is set we must ignore the rest of 1068 * the structure and skip it. 1069 */ 1070 crit_enter(); 1071 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 1072 if (inp->inp_flags & INP_PLACEMARKER) 1073 continue; 1074 #ifdef INET6 1075 if (!(inp->inp_vflag & INP_IPV4)) 1076 continue; 1077 #endif 1078 if (inp->inp_faddr.s_addr != faddr.s_addr || 1079 inp->inp_socket == NULL) 1080 continue; 1081 (*notify)(inp, err); /* can remove inp from list! */ 1082 } 1083 crit_exit(); 1084 } 1085 1086 void 1087 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 1088 { 1089 struct inpcb *inp; 1090 struct ip_moptions *imo; 1091 int i, gap; 1092 1093 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 1094 if (inp->inp_flags & INP_PLACEMARKER) 1095 continue; 1096 imo = inp->inp_moptions; 1097 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 1098 /* 1099 * Unselect the outgoing interface if it is being 1100 * detached. 1101 */ 1102 if (imo->imo_multicast_ifp == ifp) 1103 imo->imo_multicast_ifp = NULL; 1104 1105 /* 1106 * Drop multicast group membership if we joined 1107 * through the interface being detached. 1108 */ 1109 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1110 i++) { 1111 if (imo->imo_membership[i]->inm_ifp == ifp) { 1112 in_delmulti(imo->imo_membership[i]); 1113 gap++; 1114 } else if (gap != 0) 1115 imo->imo_membership[i - gap] = 1116 imo->imo_membership[i]; 1117 } 1118 imo->imo_num_memberships -= gap; 1119 } 1120 } 1121 } 1122 1123 /* 1124 * Check for alternatives when higher level complains 1125 * about service problems. For now, invalidate cached 1126 * routing information. If the route was created dynamically 1127 * (by a redirect), time to try a default gateway again. 1128 */ 1129 void 1130 in_losing(struct inpcb *inp) 1131 { 1132 struct rtentry *rt; 1133 struct rt_addrinfo rtinfo; 1134 1135 if ((rt = inp->inp_route.ro_rt)) { 1136 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1137 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 1138 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1139 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 1140 rtinfo.rti_flags = rt->rt_flags; 1141 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 1142 if (rt->rt_flags & RTF_DYNAMIC) { 1143 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1144 rt_mask(rt), rt->rt_flags, NULL); 1145 } 1146 inp->inp_route.ro_rt = NULL; 1147 rtfree(rt); 1148 /* 1149 * A new route can be allocated 1150 * the next time output is attempted. 1151 */ 1152 } 1153 } 1154 1155 /* 1156 * After a routing change, flush old routing 1157 * and allocate a (hopefully) better one. 1158 */ 1159 void 1160 in_rtchange(struct inpcb *inp, int err) 1161 { 1162 if (inp->inp_route.ro_rt) { 1163 rtfree(inp->inp_route.ro_rt); 1164 inp->inp_route.ro_rt = NULL; 1165 /* 1166 * A new route can be allocated the next time 1167 * output is attempted. 1168 */ 1169 } 1170 } 1171 1172 /* 1173 * Lookup a PCB based on the local address and port. 1174 */ 1175 struct inpcb * 1176 in_pcblookup_local(struct inpcbportinfo *portinfo, struct in_addr laddr, 1177 u_int lport_arg, int wild_okay, struct ucred *cred) 1178 { 1179 struct inpcb *inp; 1180 int matchwild = 3, wildcard; 1181 u_short lport = lport_arg; 1182 struct inpcbporthead *porthash; 1183 struct inpcbport *phd; 1184 struct inpcb *match = NULL; 1185 1186 /* 1187 * If the porthashbase is shared across several cpus, it must 1188 * have been locked. 1189 */ 1190 if (portinfo->porttoken) 1191 ASSERT_LWKT_TOKEN_HELD(portinfo->porttoken); 1192 1193 /* 1194 * Best fit PCB lookup. 1195 * 1196 * First see if this local port is in use by looking on the 1197 * port hash list. 1198 */ 1199 porthash = &portinfo->porthashbase[ 1200 INP_PCBPORTHASH(lport, portinfo->porthashmask)]; 1201 LIST_FOREACH(phd, porthash, phd_hash) { 1202 if (phd->phd_port == lport) 1203 break; 1204 } 1205 if (phd != NULL) { 1206 /* 1207 * Port is in use by one or more PCBs. Look for best 1208 * fit. 1209 */ 1210 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1211 wildcard = 0; 1212 #ifdef INET6 1213 if ((inp->inp_vflag & INP_IPV4) == 0) 1214 continue; 1215 #endif 1216 if (inp->inp_faddr.s_addr != INADDR_ANY) 1217 wildcard++; 1218 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1219 if (laddr.s_addr == INADDR_ANY) 1220 wildcard++; 1221 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1222 continue; 1223 } else { 1224 if (laddr.s_addr != INADDR_ANY) 1225 wildcard++; 1226 } 1227 if (wildcard && !wild_okay) 1228 continue; 1229 if (wildcard < matchwild && 1230 (cred == NULL || 1231 cred->cr_prison == 1232 inp->inp_socket->so_cred->cr_prison)) { 1233 match = inp; 1234 matchwild = wildcard; 1235 if (matchwild == 0) { 1236 break; 1237 } 1238 } 1239 } 1240 } 1241 return (match); 1242 } 1243 1244 struct inpcb * 1245 in_pcblocalgroup_last(const struct inpcbinfo *pcbinfo, 1246 const struct inpcb *inp) 1247 { 1248 const struct inp_localgrphead *hdr; 1249 const struct inp_localgroup *grp; 1250 int i; 1251 1252 if (pcbinfo->localgrphashbase == NULL) 1253 return NULL; 1254 1255 hdr = &pcbinfo->localgrphashbase[ 1256 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1257 1258 LIST_FOREACH(grp, hdr, il_list) { 1259 if (grp->il_vflag == inp->inp_vflag && 1260 grp->il_lport == inp->inp_lport && 1261 memcmp(&grp->il_dependladdr, 1262 &inp->inp_inc.inc_ie.ie_dependladdr, 1263 sizeof(grp->il_dependladdr)) == 0) { 1264 break; 1265 } 1266 } 1267 if (grp == NULL || grp->il_inpcnt == 1) 1268 return NULL; 1269 1270 KASSERT(grp->il_inpcnt >= 2, 1271 ("invalid localgroup inp count %d", grp->il_inpcnt)); 1272 for (i = 0; i < grp->il_inpcnt; ++i) { 1273 if (grp->il_inp[i] == inp) { 1274 int last = grp->il_inpcnt - 1; 1275 1276 if (i == last) 1277 last = grp->il_inpcnt - 2; 1278 return grp->il_inp[last]; 1279 } 1280 } 1281 return NULL; 1282 } 1283 1284 static struct inpcb * 1285 inp_localgroup_lookup(const struct inpcbinfo *pcbinfo, 1286 struct in_addr laddr, uint16_t lport, uint32_t pkt_hash) 1287 { 1288 struct inpcb *local_wild = NULL; 1289 const struct inp_localgrphead *hdr; 1290 const struct inp_localgroup *grp; 1291 1292 hdr = &pcbinfo->localgrphashbase[ 1293 INP_PCBLOCALGRPHASH(lport, pcbinfo->localgrphashmask)]; 1294 #ifdef INP_LOCALGROUP_HASHTHR 1295 pkt_hash >>= ncpus2_shift; 1296 #endif 1297 1298 /* 1299 * Order of socket selection: 1300 * 1. non-wild. 1301 * 2. wild. 1302 * 1303 * NOTE: 1304 * - Local group does not contain jailed sockets 1305 * - Local group does not contain IPv4 mapped INET6 wild sockets 1306 */ 1307 LIST_FOREACH(grp, hdr, il_list) { 1308 #ifdef INET6 1309 if (!(grp->il_vflag & INP_IPV4)) 1310 continue; 1311 #endif 1312 if (grp->il_lport == lport) { 1313 int idx; 1314 1315 #ifdef INP_LOCALGROUP_HASHTHR 1316 idx = pkt_hash / grp->il_factor; 1317 KASSERT(idx < grp->il_inpcnt && idx >= 0, 1318 ("invalid hash %04x, cnt %d or fact %d", 1319 pkt_hash, grp->il_inpcnt, grp->il_factor)); 1320 #else 1321 /* 1322 * Modulo-N is used here, which greatly reduces 1323 * completion queue token contention, thus more 1324 * cpu time is saved. 1325 */ 1326 idx = pkt_hash % grp->il_inpcnt; 1327 #endif 1328 1329 if (grp->il_laddr.s_addr == laddr.s_addr) 1330 return grp->il_inp[idx]; 1331 else if (grp->il_laddr.s_addr == INADDR_ANY) 1332 local_wild = grp->il_inp[idx]; 1333 } 1334 } 1335 if (local_wild != NULL) 1336 return local_wild; 1337 return NULL; 1338 } 1339 1340 /* 1341 * Lookup PCB in hash list. 1342 */ 1343 struct inpcb * 1344 in_pcblookup_pkthash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1345 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1346 boolean_t wildcard, struct ifnet *ifp, const struct mbuf *m) 1347 { 1348 struct inpcbhead *head; 1349 struct inpcb *inp, *jinp=NULL; 1350 u_short fport = fport_arg, lport = lport_arg; 1351 1352 /* 1353 * First look for an exact match. 1354 */ 1355 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1356 laddr.s_addr, lport, pcbinfo->hashmask)]; 1357 LIST_FOREACH(inp, head, inp_hash) { 1358 #ifdef INET6 1359 if (!(inp->inp_vflag & INP_IPV4)) 1360 continue; 1361 #endif 1362 if (in_hosteq(inp->inp_faddr, faddr) && 1363 in_hosteq(inp->inp_laddr, laddr) && 1364 inp->inp_fport == fport && inp->inp_lport == lport) { 1365 /* found */ 1366 if (inp->inp_socket == NULL || 1367 inp->inp_socket->so_cred->cr_prison == NULL) { 1368 return (inp); 1369 } else { 1370 if (jinp == NULL) 1371 jinp = inp; 1372 } 1373 } 1374 } 1375 if (jinp != NULL) 1376 return (jinp); 1377 if (wildcard) { 1378 struct inpcb *local_wild = NULL; 1379 struct inpcb *jinp_wild = NULL; 1380 #ifdef INET6 1381 struct inpcb *local_wild_mapped = NULL; 1382 #endif 1383 struct inpcontainer *ic; 1384 struct inpcontainerhead *chead; 1385 struct sockaddr_in jsin; 1386 struct ucred *cred; 1387 1388 /* 1389 * Check local group first 1390 */ 1391 if (pcbinfo->localgrphashbase != NULL && 1392 m != NULL && (m->m_flags & M_HASH) && 1393 !(ifp && ifp->if_type == IFT_FAITH)) { 1394 inp = inp_localgroup_lookup(pcbinfo, 1395 laddr, lport, m->m_pkthdr.hash); 1396 if (inp != NULL) 1397 return inp; 1398 } 1399 1400 /* 1401 * Order of socket selection: 1402 * 1. non-jailed, non-wild. 1403 * 2. non-jailed, wild. 1404 * 3. jailed, non-wild. 1405 * 4. jailed, wild. 1406 */ 1407 jsin.sin_family = AF_INET; 1408 chead = &pcbinfo->wildcardhashbase[ 1409 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1410 LIST_FOREACH(ic, chead, ic_list) { 1411 inp = ic->ic_inp; 1412 jsin.sin_addr.s_addr = laddr.s_addr; 1413 #ifdef INET6 1414 if (!(inp->inp_vflag & INP_IPV4)) 1415 continue; 1416 #endif 1417 if (inp->inp_socket != NULL) 1418 cred = inp->inp_socket->so_cred; 1419 else 1420 cred = NULL; 1421 if (cred != NULL && jailed(cred)) { 1422 if (jinp != NULL) 1423 continue; 1424 else 1425 if (!jailed_ip(cred->cr_prison, 1426 (struct sockaddr *)&jsin)) 1427 continue; 1428 } 1429 if (inp->inp_lport == lport) { 1430 if (ifp && ifp->if_type == IFT_FAITH && 1431 !(inp->inp_flags & INP_FAITH)) 1432 continue; 1433 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1434 if (cred != NULL && jailed(cred)) 1435 jinp = inp; 1436 else 1437 return (inp); 1438 } 1439 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1440 #ifdef INET6 1441 if (INP_CHECK_SOCKAF(inp->inp_socket, 1442 AF_INET6)) 1443 local_wild_mapped = inp; 1444 else 1445 #endif 1446 if (cred != NULL && 1447 jailed(cred)) 1448 jinp_wild = inp; 1449 else 1450 local_wild = inp; 1451 } 1452 } 1453 } 1454 if (local_wild != NULL) 1455 return (local_wild); 1456 #ifdef INET6 1457 if (local_wild_mapped != NULL) 1458 return (local_wild_mapped); 1459 #endif 1460 if (jinp != NULL) 1461 return (jinp); 1462 return (jinp_wild); 1463 } 1464 1465 /* 1466 * Not found. 1467 */ 1468 return (NULL); 1469 } 1470 1471 struct inpcb * 1472 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1473 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1474 boolean_t wildcard, struct ifnet *ifp) 1475 { 1476 return in_pcblookup_pkthash(pcbinfo, faddr, fport_arg, 1477 laddr, lport_arg, wildcard, ifp, NULL); 1478 } 1479 1480 /* 1481 * Insert PCB into connection hash table. 1482 */ 1483 void 1484 in_pcbinsconnhash(struct inpcb *inp) 1485 { 1486 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1487 struct inpcbhead *bucket; 1488 u_int32_t hashkey_faddr, hashkey_laddr; 1489 1490 #ifdef INET6 1491 if (inp->inp_vflag & INP_IPV6) { 1492 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1493 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1494 } else { 1495 #endif 1496 hashkey_faddr = inp->inp_faddr.s_addr; 1497 hashkey_laddr = inp->inp_laddr.s_addr; 1498 #ifdef INET6 1499 } 1500 #endif 1501 1502 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1503 ("already on wildcardhash")); 1504 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1505 ("already on connhash")); 1506 inp->inp_flags |= INP_CONNECTED; 1507 1508 /* 1509 * Insert into the connection hash table. 1510 */ 1511 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1512 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1513 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1514 } 1515 1516 /* 1517 * Remove PCB from connection hash table. 1518 */ 1519 void 1520 in_pcbremconnhash(struct inpcb *inp) 1521 { 1522 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1523 LIST_REMOVE(inp, inp_hash); 1524 inp->inp_flags &= ~INP_CONNECTED; 1525 } 1526 1527 /* 1528 * Insert PCB into port hash table. 1529 */ 1530 void 1531 in_pcbinsporthash(struct inpcbportinfo *portinfo, struct inpcb *inp) 1532 { 1533 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1534 struct inpcbporthead *pcbporthash; 1535 struct inpcbport *phd; 1536 1537 /* 1538 * If the porthashbase is shared across several cpus, it must 1539 * have been locked. 1540 */ 1541 if (portinfo->porttoken) 1542 ASSERT_LWKT_TOKEN_HELD(portinfo->porttoken); 1543 1544 /* 1545 * Insert into the port hash table. 1546 */ 1547 pcbporthash = &portinfo->porthashbase[ 1548 INP_PCBPORTHASH(inp->inp_lport, portinfo->porthashmask)]; 1549 1550 /* Go through port list and look for a head for this lport. */ 1551 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1552 if (phd->phd_port == inp->inp_lport) 1553 break; 1554 } 1555 1556 /* If none exists, malloc one and tack it on. */ 1557 if (phd == NULL) { 1558 KKASSERT(pcbinfo->portsave != NULL); 1559 phd = pcbinfo->portsave; 1560 pcbinfo->portsave = NULL; 1561 phd->phd_port = inp->inp_lport; 1562 LIST_INIT(&phd->phd_pcblist); 1563 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1564 } 1565 1566 inp->inp_portinfo = portinfo; 1567 inp->inp_phd = phd; 1568 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1569 1570 if (pcbinfo->portsave == NULL) { 1571 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1572 M_PCB, M_INTWAIT | M_ZERO); 1573 } 1574 } 1575 1576 void 1577 in_pcbinsporthash_lport(struct inpcb *inp) 1578 { 1579 struct inpcbportinfo *portinfo = inp->inp_pcbinfo->portinfo; 1580 1581 if (portinfo->porttoken) 1582 lwkt_gettoken(portinfo->porttoken); 1583 1584 in_pcbinsporthash(portinfo, inp); 1585 1586 if (portinfo->porttoken) 1587 lwkt_reltoken(portinfo->porttoken); 1588 } 1589 1590 static struct inp_localgroup * 1591 inp_localgroup_alloc(struct inp_localgrphead *hdr, u_char vflag, 1592 uint16_t port, const union in_dependaddr *addr, int size) 1593 { 1594 struct inp_localgroup *grp; 1595 1596 grp = kmalloc(__offsetof(struct inp_localgroup, il_inp[size]), 1597 M_TEMP, M_INTWAIT | M_ZERO); 1598 grp->il_vflag = vflag; 1599 grp->il_lport = port; 1600 grp->il_dependladdr = *addr; 1601 grp->il_inpsiz = size; 1602 1603 LIST_INSERT_HEAD(hdr, grp, il_list); 1604 1605 return grp; 1606 } 1607 1608 static void 1609 inp_localgroup_free(struct inp_localgroup *grp) 1610 { 1611 LIST_REMOVE(grp, il_list); 1612 kfree(grp, M_TEMP); 1613 } 1614 1615 static struct inp_localgroup * 1616 inp_localgroup_resize(struct inp_localgrphead *hdr, 1617 struct inp_localgroup *old_grp, int size) 1618 { 1619 struct inp_localgroup *grp; 1620 int i; 1621 1622 grp = inp_localgroup_alloc(hdr, old_grp->il_vflag, 1623 old_grp->il_lport, &old_grp->il_dependladdr, size); 1624 1625 KASSERT(old_grp->il_inpcnt < grp->il_inpsiz, 1626 ("invalid new local group size %d and old local group count %d", 1627 grp->il_inpsiz, old_grp->il_inpcnt)); 1628 for (i = 0; i < old_grp->il_inpcnt; ++i) 1629 grp->il_inp[i] = old_grp->il_inp[i]; 1630 grp->il_inpcnt = old_grp->il_inpcnt; 1631 grp->il_factor = old_grp->il_factor; 1632 1633 inp_localgroup_free(old_grp); 1634 1635 return grp; 1636 } 1637 1638 static void 1639 inp_localgroup_factor(struct inp_localgroup *grp) 1640 { 1641 grp->il_factor = 1642 ((uint32_t)(0xffff >> ncpus2_shift) / grp->il_inpcnt) + 1; 1643 KASSERT(grp->il_factor != 0, ("invalid local group factor, " 1644 "ncpus2_shift %d, inpcnt %d", ncpus2_shift, grp->il_inpcnt)); 1645 } 1646 1647 static void 1648 in_pcbinslocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1649 { 1650 struct inp_localgrphead *hdr; 1651 struct inp_localgroup *grp; 1652 struct ucred *cred; 1653 1654 if (pcbinfo->localgrphashbase == NULL) 1655 return; 1656 1657 /* 1658 * XXX don't allow jailed socket to join local group 1659 */ 1660 if (inp->inp_socket != NULL) 1661 cred = inp->inp_socket->so_cred; 1662 else 1663 cred = NULL; 1664 if (cred != NULL && jailed(cred)) 1665 return; 1666 1667 #ifdef INET6 1668 /* 1669 * XXX don't allow IPv4 mapped INET6 wild socket 1670 */ 1671 if ((inp->inp_vflag & INP_IPV4) && 1672 inp->inp_laddr.s_addr == INADDR_ANY && 1673 INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) 1674 return; 1675 #endif 1676 1677 hdr = &pcbinfo->localgrphashbase[ 1678 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1679 1680 LIST_FOREACH(grp, hdr, il_list) { 1681 if (grp->il_vflag == inp->inp_vflag && 1682 grp->il_lport == inp->inp_lport && 1683 memcmp(&grp->il_dependladdr, 1684 &inp->inp_inc.inc_ie.ie_dependladdr, 1685 sizeof(grp->il_dependladdr)) == 0) { 1686 break; 1687 } 1688 } 1689 if (grp == NULL) { 1690 /* Create new local group */ 1691 grp = inp_localgroup_alloc(hdr, inp->inp_vflag, 1692 inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr, 1693 INP_LOCALGROUP_SIZMIN); 1694 } else if (grp->il_inpcnt == grp->il_inpsiz) { 1695 if (grp->il_inpsiz >= INP_LOCALGROUP_SIZMAX) { 1696 static int limit_logged = 0; 1697 1698 if (!limit_logged) { 1699 limit_logged = 1; 1700 kprintf("local group port %d, " 1701 "limit reached\n", ntohs(grp->il_lport)); 1702 } 1703 return; 1704 } 1705 1706 /* Expand this local group */ 1707 grp = inp_localgroup_resize(hdr, grp, grp->il_inpsiz * 2); 1708 } 1709 1710 KASSERT(grp->il_inpcnt < grp->il_inpsiz, 1711 ("invalid local group size %d and count %d", 1712 grp->il_inpsiz, grp->il_inpcnt)); 1713 grp->il_inp[grp->il_inpcnt] = inp; 1714 grp->il_inpcnt++; 1715 inp_localgroup_factor(grp); 1716 } 1717 1718 void 1719 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1720 { 1721 struct inpcontainer *ic; 1722 struct inpcontainerhead *bucket; 1723 1724 in_pcbinslocalgrphash_oncpu(inp, pcbinfo); 1725 1726 bucket = &pcbinfo->wildcardhashbase[ 1727 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1728 1729 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1730 ic->ic_inp = inp; 1731 LIST_INSERT_HEAD(bucket, ic, ic_list); 1732 } 1733 1734 /* 1735 * Insert PCB into wildcard hash table. 1736 */ 1737 void 1738 in_pcbinswildcardhash(struct inpcb *inp) 1739 { 1740 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1741 1742 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1743 ("already on connhash")); 1744 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1745 ("already on wildcardhash")); 1746 inp->inp_flags |= INP_WILDCARD; 1747 1748 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1749 } 1750 1751 static void 1752 in_pcbremlocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1753 { 1754 struct inp_localgrphead *hdr; 1755 struct inp_localgroup *grp; 1756 1757 if (pcbinfo->localgrphashbase == NULL) 1758 return; 1759 1760 hdr = &pcbinfo->localgrphashbase[ 1761 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1762 1763 LIST_FOREACH(grp, hdr, il_list) { 1764 int i; 1765 1766 for (i = 0; i < grp->il_inpcnt; ++i) { 1767 if (grp->il_inp[i] != inp) 1768 continue; 1769 1770 if (grp->il_inpcnt == 1) { 1771 /* Free this local group */ 1772 inp_localgroup_free(grp); 1773 } else { 1774 /* Pull up inpcbs */ 1775 for (; i + 1 < grp->il_inpcnt; ++i) 1776 grp->il_inp[i] = grp->il_inp[i + 1]; 1777 grp->il_inpcnt--; 1778 inp_localgroup_factor(grp); 1779 1780 if (grp->il_inpsiz > INP_LOCALGROUP_SIZMIN && 1781 grp->il_inpcnt <= (grp->il_inpsiz / 4)) { 1782 /* Shrink this local group */ 1783 grp = inp_localgroup_resize(hdr, grp, 1784 grp->il_inpsiz / 2); 1785 } 1786 } 1787 return; 1788 } 1789 } 1790 } 1791 1792 void 1793 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1794 { 1795 struct inpcontainer *ic; 1796 struct inpcontainerhead *head; 1797 1798 in_pcbremlocalgrphash_oncpu(inp, pcbinfo); 1799 1800 /* find bucket */ 1801 head = &pcbinfo->wildcardhashbase[ 1802 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1803 1804 LIST_FOREACH(ic, head, ic_list) { 1805 if (ic->ic_inp == inp) 1806 goto found; 1807 } 1808 return; /* not found! */ 1809 1810 found: 1811 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1812 kfree(ic, M_TEMP); /* deallocate container */ 1813 } 1814 1815 /* 1816 * Remove PCB from wildcard hash table. 1817 */ 1818 void 1819 in_pcbremwildcardhash(struct inpcb *inp) 1820 { 1821 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1822 1823 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1824 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1825 inp->inp_flags &= ~INP_WILDCARD; 1826 } 1827 1828 /* 1829 * Remove PCB from various lists. 1830 */ 1831 void 1832 in_pcbremlists(struct inpcb *inp) 1833 { 1834 if (inp->inp_lport) { 1835 struct inpcbportinfo *portinfo; 1836 struct inpcbport *phd; 1837 1838 /* 1839 * NOTE: 1840 * inp->inp_portinfo is _not_ necessary same as 1841 * inp->inp_pcbinfo->portinfo. 1842 */ 1843 portinfo = inp->inp_portinfo; 1844 if (portinfo->porttoken) 1845 lwkt_gettoken(portinfo->porttoken); 1846 1847 phd = inp->inp_phd; 1848 LIST_REMOVE(inp, inp_portlist); 1849 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1850 LIST_REMOVE(phd, phd_hash); 1851 kfree(phd, M_PCB); 1852 } 1853 if (portinfo->porttoken) 1854 lwkt_reltoken(portinfo->porttoken); 1855 } 1856 if (inp->inp_flags & INP_WILDCARD) { 1857 in_pcbremwildcardhash(inp); 1858 } else if (inp->inp_flags & INP_CONNECTED) { 1859 in_pcbremconnhash(inp); 1860 } 1861 LIST_REMOVE(inp, inp_list); 1862 inp->inp_pcbinfo->ipi_count--; 1863 } 1864 1865 int 1866 prison_xinpcb(struct thread *td, struct inpcb *inp) 1867 { 1868 struct ucred *cr; 1869 1870 if (td->td_proc == NULL) 1871 return (0); 1872 cr = td->td_proc->p_ucred; 1873 if (cr->cr_prison == NULL) 1874 return (0); 1875 if (inp->inp_socket && inp->inp_socket->so_cred && 1876 inp->inp_socket->so_cred->cr_prison && 1877 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1878 return (0); 1879 return (1); 1880 } 1881 1882 int 1883 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1884 { 1885 struct inpcbinfo *pcbinfo = arg1; 1886 struct inpcb *inp, *marker; 1887 struct xinpcb xi; 1888 int error, i, n; 1889 1890 /* 1891 * The process of preparing the TCB list is too time-consuming and 1892 * resource-intensive to repeat twice on every request. 1893 */ 1894 if (req->oldptr == NULL) { 1895 n = pcbinfo->ipi_count; 1896 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1897 return 0; 1898 } 1899 1900 if (req->newptr != NULL) 1901 return EPERM; 1902 1903 /* 1904 * OK, now we're committed to doing something. Re-fetch ipi_count 1905 * after obtaining the generation count. 1906 */ 1907 n = pcbinfo->ipi_count; 1908 1909 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1910 marker->inp_flags |= INP_PLACEMARKER; 1911 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1912 1913 i = 0; 1914 error = 0; 1915 1916 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1917 LIST_REMOVE(marker, inp_list); 1918 LIST_INSERT_AFTER(inp, marker, inp_list); 1919 1920 if (inp->inp_flags & INP_PLACEMARKER) 1921 continue; 1922 if (prison_xinpcb(req->td, inp)) 1923 continue; 1924 bzero(&xi, sizeof xi); 1925 xi.xi_len = sizeof xi; 1926 bcopy(inp, &xi.xi_inp, sizeof *inp); 1927 if (inp->inp_socket) 1928 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1929 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1930 break; 1931 ++i; 1932 } 1933 LIST_REMOVE(marker, inp_list); 1934 if (error == 0 && i < n) { 1935 bzero(&xi, sizeof xi); 1936 xi.xi_len = sizeof xi; 1937 while (i < n) { 1938 error = SYSCTL_OUT(req, &xi, sizeof xi); 1939 ++i; 1940 } 1941 } 1942 kfree(marker, M_TEMP); 1943 return(error); 1944 } 1945 1946 int 1947 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0) 1948 { 1949 struct inpcbinfo *pcbinfo = arg1; 1950 struct inpcb *inp; 1951 struct xinpcb *xi; 1952 int nxi; 1953 1954 *nxi0 = 0; 1955 *xi0 = NULL; 1956 1957 /* 1958 * The process of preparing the PCB list is too time-consuming and 1959 * resource-intensive to repeat twice on every request. 1960 */ 1961 if (req->oldptr == NULL) { 1962 int n = pcbinfo->ipi_count; 1963 1964 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1965 return 0; 1966 } 1967 1968 if (req->newptr != NULL) 1969 return EPERM; 1970 1971 if (pcbinfo->ipi_count == 0) 1972 return 0; 1973 1974 nxi = 0; 1975 xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP, 1976 M_WAITOK | M_ZERO | M_NULLOK); 1977 if (xi == NULL) 1978 return ENOMEM; 1979 1980 LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) { 1981 struct xinpcb *xi_ptr = &xi[nxi]; 1982 1983 if (prison_xinpcb(req->td, inp)) 1984 continue; 1985 1986 xi_ptr->xi_len = sizeof(*xi_ptr); 1987 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp)); 1988 if (inp->inp_socket) 1989 sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket); 1990 ++nxi; 1991 } 1992 1993 if (nxi == 0) { 1994 kfree(xi, M_TEMP); 1995 return 0; 1996 } 1997 1998 *nxi0 = nxi; 1999 *xi0 = xi; 2000 2001 return 0; 2002 } 2003 2004 void 2005 in_savefaddr(struct socket *so, const struct sockaddr *faddr) 2006 { 2007 struct sockaddr_in *sin; 2008 2009 KASSERT(faddr->sa_family == AF_INET, 2010 ("not AF_INET faddr %d", faddr->sa_family)); 2011 2012 sin = kmalloc(sizeof(*sin), M_SONAME, M_WAITOK | M_ZERO); 2013 sin->sin_family = AF_INET; 2014 sin->sin_len = sizeof(*sin); 2015 sin->sin_port = ((const struct sockaddr_in *)faddr)->sin_port; 2016 sin->sin_addr = ((const struct sockaddr_in *)faddr)->sin_addr; 2017 2018 so->so_faddr = (struct sockaddr *)sin; 2019 } 2020 2021 void 2022 in_pcbportinfo_init(struct inpcbportinfo *portinfo, int hashsize, 2023 boolean_t shared) 2024 { 2025 memset(portinfo, 0, sizeof(*portinfo)); 2026 portinfo->porthashbase = hashinit(hashsize, M_PCB, 2027 &portinfo->porthashmask); 2028 if (shared) { 2029 portinfo->porttoken = kmalloc(sizeof(struct lwkt_token), 2030 M_PCB, M_WAITOK); 2031 lwkt_token_init(portinfo->porttoken, "porttoken"); 2032 } 2033 } 2034