1 /* $OpenBSD: tcp_usrreq.c,v 1.118 2014/04/06 16:49:40 chrisz Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/proc.h> 79 #include <sys/sysctl.h> 80 #include <sys/domain.h> 81 #include <sys/kernel.h> 82 #include <sys/pool.h> 83 84 #include <dev/rndvar.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip_var.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_timer.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #include <netinet/tcp_debug.h> 102 103 #ifdef INET6 104 #include <netinet6/in6_var.h> 105 #endif 106 107 #ifndef TCP_SENDSPACE 108 #define TCP_SENDSPACE 1024*16 109 #endif 110 u_int tcp_sendspace = TCP_SENDSPACE; 111 #ifndef TCP_RECVSPACE 112 #define TCP_RECVSPACE 1024*16 113 #endif 114 u_int tcp_recvspace = TCP_RECVSPACE; 115 u_int tcp_autorcvbuf_inc = 16 * 1024; 116 117 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 118 119 struct inpcbtable tcbtable; 120 121 int tcp_ident(void *, size_t *, void *, size_t, int); 122 123 /* 124 * Process a TCP user request for TCP tb. If this is a send request 125 * then m is the mbuf chain of send data. If this is a timer expiration 126 * (called from the software clock routine), then timertype tells which timer. 127 */ 128 /*ARGSUSED*/ 129 int 130 tcp_usrreq(so, req, m, nam, control, p) 131 struct socket *so; 132 int req; 133 struct mbuf *m, *nam, *control; 134 struct proc *p; 135 { 136 struct sockaddr_in *sin; 137 struct inpcb *inp; 138 struct tcpcb *tp = NULL; 139 int s; 140 int error = 0; 141 short ostate; 142 143 if (req == PRU_CONTROL) { 144 #ifdef INET6 145 if (sotopf(so) == PF_INET6) 146 return in6_control(so, (u_long)m, (caddr_t)nam, 147 (struct ifnet *)control); 148 else 149 #endif /* INET6 */ 150 return (in_control(so, (u_long)m, (caddr_t)nam, 151 (struct ifnet *)control)); 152 } 153 if (control && control->m_len) { 154 m_freem(control); 155 if (m) 156 m_freem(m); 157 return (EINVAL); 158 } 159 160 s = splsoftnet(); 161 inp = sotoinpcb(so); 162 /* 163 * When a TCP is attached to a socket, then there will be 164 * a (struct inpcb) pointed at by the socket, and this 165 * structure will point at a subsidiary (struct tcpcb). 166 */ 167 if (inp == 0 && req != PRU_ATTACH) { 168 error = so->so_error; 169 if (error == 0) 170 error = EINVAL; 171 splx(s); 172 /* 173 * The following corrects an mbuf leak under rare 174 * circumstances 175 */ 176 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 177 m_freem(m); 178 return (error); 179 } 180 if (inp) { 181 tp = intotcpcb(inp); 182 /* tp might get 0 when using socket splicing */ 183 if (tp == NULL) { 184 splx(s); 185 return (0); 186 } 187 #ifdef KPROF 188 tcp_acounts[tp->t_state][req]++; 189 #endif 190 ostate = tp->t_state; 191 } else 192 ostate = 0; 193 switch (req) { 194 195 /* 196 * TCP attaches to socket via PRU_ATTACH, reserving space, 197 * and an internet control block. 198 */ 199 case PRU_ATTACH: 200 if (inp) { 201 error = EISCONN; 202 break; 203 } 204 error = tcp_attach(so); 205 if (error) 206 break; 207 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 208 so->so_linger = TCP_LINGERTIME; 209 tp = sototcpcb(so); 210 break; 211 212 /* 213 * PRU_DETACH detaches the TCP protocol from the socket. 214 * If the protocol state is non-embryonic, then can't 215 * do this directly: have to initiate a PRU_DISCONNECT, 216 * which may finish later; embryonic TCB's can just 217 * be discarded here. 218 */ 219 case PRU_DETACH: 220 tp = tcp_disconnect(tp); 221 break; 222 223 /* 224 * Give the socket an address. 225 */ 226 case PRU_BIND: 227 #ifdef INET6 228 if (inp->inp_flags & INP_IPV6) 229 error = in6_pcbbind(inp, nam, p); 230 else 231 #endif 232 error = in_pcbbind(inp, nam, p); 233 if (error) 234 break; 235 break; 236 237 /* 238 * Prepare to accept connections. 239 */ 240 case PRU_LISTEN: 241 if (inp->inp_lport == 0) { 242 #ifdef INET6 243 if (inp->inp_flags & INP_IPV6) 244 error = in6_pcbbind(inp, NULL, p); 245 else 246 #endif 247 error = in_pcbbind(inp, NULL, p); 248 } 249 /* If the in_pcbbind() above is called, the tp->pf 250 should still be whatever it was before. */ 251 if (error == 0) 252 tp->t_state = TCPS_LISTEN; 253 break; 254 255 /* 256 * Initiate connection to peer. 257 * Create a template for use in transmissions on this connection. 258 * Enter SYN_SENT state, and mark socket as connecting. 259 * Start keep-alive timer, and seed output sequence space. 260 * Send initial segment on connection. 261 */ 262 case PRU_CONNECT: 263 sin = mtod(nam, struct sockaddr_in *); 264 265 #ifdef INET6 266 if (sin->sin_family == AF_INET6) { 267 struct in6_addr *in6_addr = &mtod(nam, 268 struct sockaddr_in6 *)->sin6_addr; 269 270 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 271 IN6_IS_ADDR_MULTICAST(in6_addr) || 272 IN6_IS_ADDR_V4MAPPED(in6_addr)) { 273 error = EINVAL; 274 break; 275 } 276 277 error = in6_pcbconnect(inp, nam); 278 } else if (sin->sin_family == AF_INET) 279 #endif /* INET6 */ 280 { 281 if ((sin->sin_addr.s_addr == INADDR_ANY) || 282 IN_MULTICAST(sin->sin_addr.s_addr) || 283 in_broadcast(sin->sin_addr, NULL, 284 inp->inp_rtableid)) { 285 error = EINVAL; 286 break; 287 } 288 289 error = in_pcbconnect(inp, nam); 290 } 291 292 if (error) 293 break; 294 295 tp->t_template = tcp_template(tp); 296 if (tp->t_template == 0) { 297 in_pcbdisconnect(inp); 298 error = ENOBUFS; 299 break; 300 } 301 302 so->so_state |= SS_CONNECTOUT; 303 304 /* Compute window scaling to request. */ 305 tcp_rscale(tp, sb_max); 306 307 soisconnecting(so); 308 tcpstat.tcps_connattempt++; 309 tp->t_state = TCPS_SYN_SENT; 310 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 311 tcp_set_iss_tsm(tp); 312 tcp_sendseqinit(tp); 313 #if defined(TCP_SACK) 314 tp->snd_last = tp->snd_una; 315 #endif 316 #if defined(TCP_SACK) && defined(TCP_FACK) 317 tp->snd_fack = tp->snd_una; 318 tp->retran_data = 0; 319 tp->snd_awnd = 0; 320 #endif 321 error = tcp_output(tp); 322 break; 323 324 /* 325 * Create a TCP connection between two sockets. 326 */ 327 case PRU_CONNECT2: 328 error = EOPNOTSUPP; 329 break; 330 331 /* 332 * Initiate disconnect from peer. 333 * If connection never passed embryonic stage, just drop; 334 * else if don't need to let data drain, then can just drop anyways, 335 * else have to begin TCP shutdown process: mark socket disconnecting, 336 * drain unread data, state switch to reflect user close, and 337 * send segment (e.g. FIN) to peer. Socket will be really disconnected 338 * when peer sends FIN and acks ours. 339 * 340 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 341 */ 342 case PRU_DISCONNECT: 343 tp = tcp_disconnect(tp); 344 break; 345 346 /* 347 * Accept a connection. Essentially all the work is 348 * done at higher levels; just return the address 349 * of the peer, storing through addr. 350 */ 351 case PRU_ACCEPT: 352 #ifdef INET6 353 if (inp->inp_flags & INP_IPV6) 354 in6_setpeeraddr(inp, nam); 355 else 356 #endif 357 in_setpeeraddr(inp, nam); 358 break; 359 360 /* 361 * Mark the connection as being incapable of further output. 362 */ 363 case PRU_SHUTDOWN: 364 if (so->so_state & SS_CANTSENDMORE) 365 break; 366 socantsendmore(so); 367 tp = tcp_usrclosed(tp); 368 if (tp) 369 error = tcp_output(tp); 370 break; 371 372 /* 373 * After a receive, possibly send window update to peer. 374 */ 375 case PRU_RCVD: 376 /* 377 * soreceive() calls this function when a user receives 378 * ancillary data on a listening socket. We don't call 379 * tcp_output in such a case, since there is no header 380 * template for a listening socket and hence the kernel 381 * will panic. 382 */ 383 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 384 (void) tcp_output(tp); 385 break; 386 387 /* 388 * Do a send by putting data in output queue and updating urgent 389 * marker if URG set. Possibly send more data. 390 */ 391 case PRU_SEND: 392 sbappendstream(&so->so_snd, m); 393 error = tcp_output(tp); 394 break; 395 396 /* 397 * Abort the TCP. 398 */ 399 case PRU_ABORT: 400 tp = tcp_drop(tp, ECONNABORTED); 401 break; 402 403 case PRU_SENSE: 404 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 405 splx(s); 406 return (0); 407 408 case PRU_RCVOOB: 409 if ((so->so_oobmark == 0 && 410 (so->so_state & SS_RCVATMARK) == 0) || 411 so->so_options & SO_OOBINLINE || 412 tp->t_oobflags & TCPOOB_HADDATA) { 413 error = EINVAL; 414 break; 415 } 416 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 417 error = EWOULDBLOCK; 418 break; 419 } 420 m->m_len = 1; 421 *mtod(m, caddr_t) = tp->t_iobc; 422 if (((long)nam & MSG_PEEK) == 0) 423 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 424 break; 425 426 case PRU_SENDOOB: 427 if (sbspace(&so->so_snd) < -512) { 428 m_freem(m); 429 error = ENOBUFS; 430 break; 431 } 432 /* 433 * According to RFC961 (Assigned Protocols), 434 * the urgent pointer points to the last octet 435 * of urgent data. We continue, however, 436 * to consider it to indicate the first octet 437 * of data past the urgent section. 438 * Otherwise, snd_up should be one lower. 439 */ 440 sbappendstream(&so->so_snd, m); 441 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 442 tp->t_force = 1; 443 error = tcp_output(tp); 444 tp->t_force = 0; 445 break; 446 447 case PRU_SOCKADDR: 448 #ifdef INET6 449 if (inp->inp_flags & INP_IPV6) 450 in6_setsockaddr(inp, nam); 451 else 452 #endif 453 in_setsockaddr(inp, nam); 454 break; 455 456 case PRU_PEERADDR: 457 #ifdef INET6 458 if (inp->inp_flags & INP_IPV6) 459 in6_setpeeraddr(inp, nam); 460 else 461 #endif 462 in_setpeeraddr(inp, nam); 463 break; 464 465 default: 466 panic("tcp_usrreq"); 467 } 468 if (tp && (so->so_options & SO_DEBUG)) 469 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 470 splx(s); 471 return (error); 472 } 473 474 int 475 tcp_ctloutput(op, so, level, optname, mp) 476 int op; 477 struct socket *so; 478 int level, optname; 479 struct mbuf **mp; 480 { 481 int error = 0, s; 482 struct inpcb *inp; 483 struct tcpcb *tp; 484 struct mbuf *m; 485 int i; 486 487 s = splsoftnet(); 488 inp = sotoinpcb(so); 489 if (inp == NULL) { 490 splx(s); 491 if (op == PRCO_SETOPT && *mp) 492 (void) m_free(*mp); 493 return (ECONNRESET); 494 } 495 if (level != IPPROTO_TCP) { 496 switch (so->so_proto->pr_domain->dom_family) { 497 #ifdef INET6 498 case PF_INET6: 499 error = ip6_ctloutput(op, so, level, optname, mp); 500 break; 501 #endif /* INET6 */ 502 case PF_INET: 503 error = ip_ctloutput(op, so, level, optname, mp); 504 break; 505 default: 506 error = EAFNOSUPPORT; /*?*/ 507 break; 508 } 509 splx(s); 510 return (error); 511 } 512 tp = intotcpcb(inp); 513 514 switch (op) { 515 516 case PRCO_SETOPT: 517 m = *mp; 518 switch (optname) { 519 520 case TCP_NODELAY: 521 if (m == NULL || m->m_len < sizeof (int)) 522 error = EINVAL; 523 else if (*mtod(m, int *)) 524 tp->t_flags |= TF_NODELAY; 525 else 526 tp->t_flags &= ~TF_NODELAY; 527 break; 528 529 case TCP_NOPUSH: 530 if (m == NULL || m->m_len < sizeof (int)) 531 error = EINVAL; 532 else if (*mtod(m, int *)) 533 tp->t_flags |= TF_NOPUSH; 534 else if (tp->t_flags & TF_NOPUSH) { 535 tp->t_flags &= ~TF_NOPUSH; 536 if (TCPS_HAVEESTABLISHED(tp->t_state)) 537 error = tcp_output(tp); 538 } 539 break; 540 541 case TCP_MAXSEG: 542 if (m == NULL || m->m_len < sizeof (int)) { 543 error = EINVAL; 544 break; 545 } 546 547 i = *mtod(m, int *); 548 if (i > 0 && i <= tp->t_maxseg) 549 tp->t_maxseg = i; 550 else 551 error = EINVAL; 552 break; 553 554 #ifdef TCP_SACK 555 case TCP_SACK_ENABLE: 556 if (m == NULL || m->m_len < sizeof (int)) { 557 error = EINVAL; 558 break; 559 } 560 561 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 562 error = EPERM; 563 break; 564 } 565 566 if (tp->t_flags & TF_SIGNATURE) { 567 error = EPERM; 568 break; 569 } 570 571 if (*mtod(m, int *)) 572 tp->sack_enable = 1; 573 else 574 tp->sack_enable = 0; 575 break; 576 #endif 577 #ifdef TCP_SIGNATURE 578 case TCP_MD5SIG: 579 if (m == NULL || m->m_len < sizeof (int)) { 580 error = EINVAL; 581 break; 582 } 583 584 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 585 error = EPERM; 586 break; 587 } 588 589 if (*mtod(m, int *)) { 590 tp->t_flags |= TF_SIGNATURE; 591 #ifdef TCP_SACK 592 tp->sack_enable = 0; 593 #endif /* TCP_SACK */ 594 } else 595 tp->t_flags &= ~TF_SIGNATURE; 596 break; 597 #endif /* TCP_SIGNATURE */ 598 default: 599 error = ENOPROTOOPT; 600 break; 601 } 602 if (m) 603 (void) m_free(m); 604 break; 605 606 case PRCO_GETOPT: 607 *mp = m = m_get(M_WAIT, MT_SOOPTS); 608 m->m_len = sizeof(int); 609 610 switch (optname) { 611 case TCP_NODELAY: 612 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 613 break; 614 case TCP_NOPUSH: 615 *mtod(m, int *) = tp->t_flags & TF_NOPUSH; 616 break; 617 case TCP_MAXSEG: 618 *mtod(m, int *) = tp->t_maxseg; 619 break; 620 #ifdef TCP_SACK 621 case TCP_SACK_ENABLE: 622 *mtod(m, int *) = tp->sack_enable; 623 break; 624 #endif 625 #ifdef TCP_SIGNATURE 626 case TCP_MD5SIG: 627 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 628 break; 629 #endif 630 default: 631 error = ENOPROTOOPT; 632 break; 633 } 634 break; 635 } 636 splx(s); 637 return (error); 638 } 639 640 /* 641 * Attach TCP protocol to socket, allocating 642 * internet protocol control block, tcp control block, 643 * bufer space, and entering LISTEN state if to accept connections. 644 */ 645 int 646 tcp_attach(so) 647 struct socket *so; 648 { 649 struct tcpcb *tp; 650 struct inpcb *inp; 651 int error; 652 653 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 || 654 sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) || 655 sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) { 656 error = soreserve(so, tcp_sendspace, tcp_recvspace); 657 if (error) 658 return (error); 659 } 660 661 error = in_pcballoc(so, &tcbtable); 662 if (error) 663 return (error); 664 inp = sotoinpcb(so); 665 tp = tcp_newtcpcb(inp); 666 if (tp == NULL) { 667 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 668 669 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 670 in_pcbdetach(inp); 671 so->so_state |= nofd; 672 return (ENOBUFS); 673 } 674 tp->t_state = TCPS_CLOSED; 675 #ifdef INET6 676 /* we disallow IPv4 mapped address completely. */ 677 if (inp->inp_flags & INP_IPV6) 678 tp->pf = PF_INET6; 679 else 680 tp->pf = PF_INET; 681 #else 682 tp->pf = PF_INET; 683 #endif 684 return (0); 685 } 686 687 /* 688 * Initiate (or continue) disconnect. 689 * If embryonic state, just send reset (once). 690 * If in ``let data drain'' option and linger null, just drop. 691 * Otherwise (hard), mark socket disconnecting and drop 692 * current input data; switch states based on user close, and 693 * send segment to peer (with FIN). 694 */ 695 struct tcpcb * 696 tcp_disconnect(tp) 697 struct tcpcb *tp; 698 { 699 struct socket *so = tp->t_inpcb->inp_socket; 700 701 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 702 tp = tcp_close(tp); 703 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 704 tp = tcp_drop(tp, 0); 705 else { 706 soisdisconnecting(so); 707 sbflush(&so->so_rcv); 708 tp = tcp_usrclosed(tp); 709 if (tp) 710 (void) tcp_output(tp); 711 } 712 return (tp); 713 } 714 715 /* 716 * User issued close, and wish to trail through shutdown states: 717 * if never received SYN, just forget it. If got a SYN from peer, 718 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 719 * If already got a FIN from peer, then almost done; go to LAST_ACK 720 * state. In all other cases, have already sent FIN to peer (e.g. 721 * after PRU_SHUTDOWN), and just have to play tedious game waiting 722 * for peer to send FIN or not respond to keep-alives, etc. 723 * We can let the user exit from the close as soon as the FIN is acked. 724 */ 725 struct tcpcb * 726 tcp_usrclosed(tp) 727 struct tcpcb *tp; 728 { 729 730 switch (tp->t_state) { 731 732 case TCPS_CLOSED: 733 case TCPS_LISTEN: 734 case TCPS_SYN_SENT: 735 tp->t_state = TCPS_CLOSED; 736 tp = tcp_close(tp); 737 break; 738 739 case TCPS_SYN_RECEIVED: 740 case TCPS_ESTABLISHED: 741 tp->t_state = TCPS_FIN_WAIT_1; 742 break; 743 744 case TCPS_CLOSE_WAIT: 745 tp->t_state = TCPS_LAST_ACK; 746 break; 747 } 748 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 749 soisdisconnected(tp->t_inpcb->inp_socket); 750 /* 751 * If we are in FIN_WAIT_2, we arrived here because the 752 * application did a shutdown of the send side. Like the 753 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 754 * a full close, we start a timer to make sure sockets are 755 * not left in FIN_WAIT_2 forever. 756 */ 757 if (tp->t_state == TCPS_FIN_WAIT_2) 758 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 759 } 760 return (tp); 761 } 762 763 /* 764 * Look up a socket for ident or tcpdrop, ... 765 */ 766 int 767 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 768 { 769 int error = 0, s; 770 struct tcp_ident_mapping tir; 771 struct inpcb *inp; 772 struct tcpcb *tp = NULL; 773 struct sockaddr_in *fin, *lin; 774 #ifdef INET6 775 struct sockaddr_in6 *fin6, *lin6; 776 struct in6_addr f6, l6; 777 #endif 778 if (dodrop) { 779 if (oldp != NULL || *oldlenp != 0) 780 return (EINVAL); 781 if (newp == NULL) 782 return (EPERM); 783 if (newlen < sizeof(tir)) 784 return (ENOMEM); 785 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 786 return (error); 787 } else { 788 if (oldp == NULL) 789 return (EINVAL); 790 if (*oldlenp < sizeof(tir)) 791 return (ENOMEM); 792 if (newp != NULL || newlen != 0) 793 return (EINVAL); 794 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 795 return (error); 796 } 797 switch (tir.faddr.ss_family) { 798 #ifdef INET6 799 case AF_INET6: 800 fin6 = (struct sockaddr_in6 *)&tir.faddr; 801 error = in6_embedscope(&f6, fin6, NULL, NULL); 802 if (error) 803 return EINVAL; /*?*/ 804 lin6 = (struct sockaddr_in6 *)&tir.laddr; 805 error = in6_embedscope(&l6, lin6, NULL, NULL); 806 if (error) 807 return EINVAL; /*?*/ 808 break; 809 #endif 810 case AF_INET: 811 fin = (struct sockaddr_in *)&tir.faddr; 812 lin = (struct sockaddr_in *)&tir.laddr; 813 break; 814 default: 815 return (EINVAL); 816 } 817 818 s = splsoftnet(); 819 switch (tir.faddr.ss_family) { 820 #ifdef INET6 821 case AF_INET6: 822 inp = in6_pcbhashlookup(&tcbtable, &f6, 823 fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain); 824 break; 825 #endif 826 case AF_INET: 827 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 828 fin->sin_port, lin->sin_addr, lin->sin_port, tir.rdomain); 829 break; 830 } 831 832 if (dodrop) { 833 if (inp && (tp = intotcpcb(inp)) && 834 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 835 tp = tcp_drop(tp, ECONNABORTED); 836 else 837 error = ESRCH; 838 splx(s); 839 return (error); 840 } 841 842 if (inp == NULL) { 843 ++tcpstat.tcps_pcbhashmiss; 844 switch (tir.faddr.ss_family) { 845 #ifdef INET6 846 case AF_INET6: 847 inp = in6_pcblookup_listen(&tcbtable, 848 &l6, lin6->sin6_port, 0, NULL, tir.rdomain); 849 break; 850 #endif 851 case AF_INET: 852 inp = in_pcblookup_listen(&tcbtable, 853 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 854 break; 855 } 856 } 857 858 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 859 tir.ruid = inp->inp_socket->so_ruid; 860 tir.euid = inp->inp_socket->so_euid; 861 } else { 862 tir.ruid = -1; 863 tir.euid = -1; 864 } 865 splx(s); 866 867 *oldlenp = sizeof (tir); 868 error = copyout((void *)&tir, oldp, sizeof (tir)); 869 return (error); 870 } 871 872 /* 873 * Sysctl for tcp variables. 874 */ 875 int 876 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 877 int *name; 878 u_int namelen; 879 void *oldp; 880 size_t *oldlenp; 881 void *newp; 882 size_t newlen; 883 { 884 int error, nval; 885 886 /* All sysctl names at this level are terminal. */ 887 if (namelen != 1) 888 return (ENOTDIR); 889 890 switch (name[0]) { 891 #ifdef TCP_SACK 892 case TCPCTL_SACK: 893 return (sysctl_int(oldp, oldlenp, newp, newlen, 894 &tcp_do_sack)); 895 #endif 896 case TCPCTL_SLOWHZ: 897 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 898 899 case TCPCTL_BADDYNAMIC: 900 return (sysctl_struct(oldp, oldlenp, newp, newlen, 901 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 902 903 case TCPCTL_IDENT: 904 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 905 906 case TCPCTL_DROP: 907 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 908 909 case TCPCTL_ALWAYS_KEEPALIVE: 910 return (sysctl_int(oldp, oldlenp, newp, newlen, 911 &tcp_always_keepalive)); 912 913 #ifdef TCP_ECN 914 case TCPCTL_ECN: 915 return (sysctl_int(oldp, oldlenp, newp, newlen, 916 &tcp_do_ecn)); 917 #endif 918 case TCPCTL_REASS_LIMIT: 919 nval = tcp_reass_limit; 920 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 921 if (error) 922 return (error); 923 if (nval != tcp_reass_limit) { 924 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 925 if (error) 926 return (error); 927 tcp_reass_limit = nval; 928 } 929 return (0); 930 #ifdef TCP_SACK 931 case TCPCTL_SACKHOLE_LIMIT: 932 nval = tcp_sackhole_limit; 933 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 934 if (error) 935 return (error); 936 if (nval != tcp_sackhole_limit) { 937 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 938 if (error) 939 return (error); 940 tcp_sackhole_limit = nval; 941 } 942 return (0); 943 #endif 944 945 case TCPCTL_STATS: 946 if (newp != NULL) 947 return (EPERM); 948 return (sysctl_struct(oldp, oldlenp, newp, newlen, 949 &tcpstat, sizeof(tcpstat))); 950 951 default: 952 if (name[0] < TCPCTL_MAXID) 953 return (sysctl_int_arr(tcpctl_vars, name, namelen, 954 oldp, oldlenp, newp, newlen)); 955 return (ENOPROTOOPT); 956 } 957 /* NOTREACHED */ 958 } 959 960 /* 961 * Scale the send buffer so that inflight data is not accounted against 962 * the limit. The buffer will scale with the congestion window, if the 963 * the receiver stops acking data the window will shrink and therefor 964 * the buffer size will shrink as well. 965 * In low memory situation try to shrink the buffer to the initial size 966 * disabling the send buffer scaling as long as the situation persists. 967 */ 968 void 969 tcp_update_sndspace(struct tcpcb *tp) 970 { 971 struct socket *so = tp->t_inpcb->inp_socket; 972 u_long nmax; 973 974 if (sbchecklowmem()) 975 /* low on memory try to get rid of some */ 976 nmax = tcp_sendspace; 977 else if (so->so_snd.sb_wat != tcp_sendspace) 978 /* user requested buffer size, auto-scaling disabled */ 979 nmax = so->so_snd.sb_wat; 980 else 981 /* automatic buffer scaling */ 982 nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max - 983 tp->snd_una); 984 985 /* round to MSS boundary */ 986 nmax = roundup(nmax, tp->t_maxseg); 987 988 if (nmax != so->so_snd.sb_hiwat) 989 sbreserve(&so->so_snd, nmax); 990 } 991 992 /* 993 * Scale the recv buffer by looking at how much data was transferred in 994 * on approximated RTT. If more then a big part of the recv buffer was 995 * transferred during that time we increase the buffer by a constant. 996 * In low memory situation try to shrink the buffer to the initial size. 997 */ 998 void 999 tcp_update_rcvspace(struct tcpcb *tp) 1000 { 1001 struct socket *so = tp->t_inpcb->inp_socket; 1002 u_long nmax = so->so_rcv.sb_hiwat; 1003 1004 if (sbchecklowmem()) 1005 /* low on memory try to get rid of some */ 1006 nmax = tcp_recvspace; 1007 else if (so->so_rcv.sb_wat != tcp_recvspace) 1008 /* user requested buffer size, auto-scaling disabled */ 1009 nmax = so->so_rcv.sb_wat; 1010 else { 1011 /* automatic buffer scaling */ 1012 if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7) 1013 nmax = MIN(sb_max, so->so_rcv.sb_hiwat + 1014 tcp_autorcvbuf_inc); 1015 } 1016 1017 if (nmax == so->so_rcv.sb_hiwat) 1018 return; 1019 1020 /* round to MSS boundary */ 1021 nmax = roundup(nmax, tp->t_maxseg); 1022 sbreserve(&so->so_rcv, nmax); 1023 } 1024