1 /* $OpenBSD: tcp_usrreq.c,v 1.112 2013/05/17 09:04:30 mpi Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/proc.h> 79 #include <sys/sysctl.h> 80 #include <sys/domain.h> 81 #include <sys/kernel.h> 82 #include <sys/pool.h> 83 84 #include <dev/rndvar.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip_var.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_timer.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #include <netinet/tcp_debug.h> 102 103 #ifndef TCP_SENDSPACE 104 #define TCP_SENDSPACE 1024*16 105 #endif 106 u_int tcp_sendspace = TCP_SENDSPACE; 107 #ifndef TCP_RECVSPACE 108 #define TCP_RECVSPACE 1024*16 109 #endif 110 u_int tcp_recvspace = TCP_RECVSPACE; 111 u_int tcp_autorcvbuf_inc = 16 * 1024; 112 113 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 114 115 struct inpcbtable tcbtable; 116 117 int tcp_ident(void *, size_t *, void *, size_t, int); 118 119 /* 120 * Process a TCP user request for TCP tb. If this is a send request 121 * then m is the mbuf chain of send data. If this is a timer expiration 122 * (called from the software clock routine), then timertype tells which timer. 123 */ 124 /*ARGSUSED*/ 125 int 126 tcp_usrreq(so, req, m, nam, control, p) 127 struct socket *so; 128 int req; 129 struct mbuf *m, *nam, *control; 130 struct proc *p; 131 { 132 struct sockaddr_in *sin; 133 struct inpcb *inp; 134 struct tcpcb *tp = NULL; 135 int s; 136 int error = 0; 137 short ostate; 138 139 if (req == PRU_CONTROL) { 140 #ifdef INET6 141 if (sotopf(so) == PF_INET6) 142 return in6_control(so, (u_long)m, (caddr_t)nam, 143 (struct ifnet *)control, 0); 144 else 145 #endif /* INET6 */ 146 return (in_control(so, (u_long)m, (caddr_t)nam, 147 (struct ifnet *)control)); 148 } 149 if (control && control->m_len) { 150 m_freem(control); 151 if (m) 152 m_freem(m); 153 return (EINVAL); 154 } 155 156 s = splsoftnet(); 157 inp = sotoinpcb(so); 158 /* 159 * When a TCP is attached to a socket, then there will be 160 * a (struct inpcb) pointed at by the socket, and this 161 * structure will point at a subsidiary (struct tcpcb). 162 */ 163 if (inp == 0 && req != PRU_ATTACH) { 164 error = so->so_error; 165 if (error == 0) 166 error = EINVAL; 167 splx(s); 168 /* 169 * The following corrects an mbuf leak under rare 170 * circumstances 171 */ 172 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 173 m_freem(m); 174 return (error); 175 } 176 if (inp) { 177 tp = intotcpcb(inp); 178 /* tp might get 0 when using socket splicing */ 179 if (tp == NULL) { 180 splx(s); 181 return (0); 182 } 183 #ifdef KPROF 184 tcp_acounts[tp->t_state][req]++; 185 #endif 186 ostate = tp->t_state; 187 } else 188 ostate = 0; 189 switch (req) { 190 191 /* 192 * TCP attaches to socket via PRU_ATTACH, reserving space, 193 * and an internet control block. 194 */ 195 case PRU_ATTACH: 196 if (inp) { 197 error = EISCONN; 198 break; 199 } 200 error = tcp_attach(so); 201 if (error) 202 break; 203 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 204 so->so_linger = TCP_LINGERTIME; 205 tp = sototcpcb(so); 206 break; 207 208 /* 209 * PRU_DETACH detaches the TCP protocol from the socket. 210 * If the protocol state is non-embryonic, then can't 211 * do this directly: have to initiate a PRU_DISCONNECT, 212 * which may finish later; embryonic TCB's can just 213 * be discarded here. 214 */ 215 case PRU_DETACH: 216 tp = tcp_disconnect(tp); 217 break; 218 219 /* 220 * Give the socket an address. 221 */ 222 case PRU_BIND: 223 #ifdef INET6 224 if (inp->inp_flags & INP_IPV6) 225 error = in6_pcbbind(inp, nam, p); 226 else 227 #endif 228 error = in_pcbbind(inp, nam, p); 229 if (error) 230 break; 231 break; 232 233 /* 234 * Prepare to accept connections. 235 */ 236 case PRU_LISTEN: 237 if (inp->inp_lport == 0) { 238 #ifdef INET6 239 if (inp->inp_flags & INP_IPV6) 240 error = in6_pcbbind(inp, NULL, p); 241 else 242 #endif 243 error = in_pcbbind(inp, NULL, p); 244 } 245 /* If the in_pcbbind() above is called, the tp->pf 246 should still be whatever it was before. */ 247 if (error == 0) 248 tp->t_state = TCPS_LISTEN; 249 break; 250 251 /* 252 * Initiate connection to peer. 253 * Create a template for use in transmissions on this connection. 254 * Enter SYN_SENT state, and mark socket as connecting. 255 * Start keep-alive timer, and seed output sequence space. 256 * Send initial segment on connection. 257 */ 258 case PRU_CONNECT: 259 sin = mtod(nam, struct sockaddr_in *); 260 261 #ifdef INET6 262 if (sin->sin_family == AF_INET6) { 263 struct in6_addr *in6_addr = &mtod(nam, 264 struct sockaddr_in6 *)->sin6_addr; 265 266 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 267 IN6_IS_ADDR_MULTICAST(in6_addr) || 268 IN6_IS_ADDR_V4MAPPED(in6_addr)) { 269 error = EINVAL; 270 break; 271 } 272 273 if (inp->inp_lport == 0) { 274 error = in6_pcbbind(inp, NULL, p); 275 if (error) 276 break; 277 } 278 error = in6_pcbconnect(inp, nam); 279 } else if (sin->sin_family == AF_INET) 280 #endif /* INET6 */ 281 { 282 if ((sin->sin_addr.s_addr == INADDR_ANY) || 283 IN_MULTICAST(sin->sin_addr.s_addr) || 284 in_broadcast(sin->sin_addr, NULL, 285 inp->inp_rtableid)) { 286 error = EINVAL; 287 break; 288 } 289 290 if (inp->inp_lport == 0) { 291 error = in_pcbbind(inp, NULL, p); 292 if (error) 293 break; 294 } 295 error = in_pcbconnect(inp, nam); 296 } 297 298 if (error) 299 break; 300 301 tp->t_template = tcp_template(tp); 302 if (tp->t_template == 0) { 303 in_pcbdisconnect(inp); 304 error = ENOBUFS; 305 break; 306 } 307 308 so->so_state |= SS_CONNECTOUT; 309 310 /* Compute window scaling to request. */ 311 tcp_rscale(tp, sb_max); 312 313 soisconnecting(so); 314 tcpstat.tcps_connattempt++; 315 tp->t_state = TCPS_SYN_SENT; 316 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 317 tcp_set_iss_tsm(tp); 318 tcp_sendseqinit(tp); 319 #if defined(TCP_SACK) 320 tp->snd_last = tp->snd_una; 321 #endif 322 #if defined(TCP_SACK) && defined(TCP_FACK) 323 tp->snd_fack = tp->snd_una; 324 tp->retran_data = 0; 325 tp->snd_awnd = 0; 326 #endif 327 error = tcp_output(tp); 328 break; 329 330 /* 331 * Create a TCP connection between two sockets. 332 */ 333 case PRU_CONNECT2: 334 error = EOPNOTSUPP; 335 break; 336 337 /* 338 * Initiate disconnect from peer. 339 * If connection never passed embryonic stage, just drop; 340 * else if don't need to let data drain, then can just drop anyways, 341 * else have to begin TCP shutdown process: mark socket disconnecting, 342 * drain unread data, state switch to reflect user close, and 343 * send segment (e.g. FIN) to peer. Socket will be really disconnected 344 * when peer sends FIN and acks ours. 345 * 346 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 347 */ 348 case PRU_DISCONNECT: 349 tp = tcp_disconnect(tp); 350 break; 351 352 /* 353 * Accept a connection. Essentially all the work is 354 * done at higher levels; just return the address 355 * of the peer, storing through addr. 356 */ 357 case PRU_ACCEPT: 358 #ifdef INET6 359 if (inp->inp_flags & INP_IPV6) 360 in6_setpeeraddr(inp, nam); 361 else 362 #endif 363 in_setpeeraddr(inp, nam); 364 break; 365 366 /* 367 * Mark the connection as being incapable of further output. 368 */ 369 case PRU_SHUTDOWN: 370 if (so->so_state & SS_CANTSENDMORE) 371 break; 372 socantsendmore(so); 373 tp = tcp_usrclosed(tp); 374 if (tp) 375 error = tcp_output(tp); 376 break; 377 378 /* 379 * After a receive, possibly send window update to peer. 380 */ 381 case PRU_RCVD: 382 /* 383 * soreceive() calls this function when a user receives 384 * ancillary data on a listening socket. We don't call 385 * tcp_output in such a case, since there is no header 386 * template for a listening socket and hence the kernel 387 * will panic. 388 */ 389 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 390 (void) tcp_output(tp); 391 break; 392 393 /* 394 * Do a send by putting data in output queue and updating urgent 395 * marker if URG set. Possibly send more data. 396 */ 397 case PRU_SEND: 398 sbappendstream(&so->so_snd, m); 399 error = tcp_output(tp); 400 break; 401 402 /* 403 * Abort the TCP. 404 */ 405 case PRU_ABORT: 406 tp = tcp_drop(tp, ECONNABORTED); 407 break; 408 409 case PRU_SENSE: 410 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 411 splx(s); 412 return (0); 413 414 case PRU_RCVOOB: 415 if ((so->so_oobmark == 0 && 416 (so->so_state & SS_RCVATMARK) == 0) || 417 so->so_options & SO_OOBINLINE || 418 tp->t_oobflags & TCPOOB_HADDATA) { 419 error = EINVAL; 420 break; 421 } 422 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 423 error = EWOULDBLOCK; 424 break; 425 } 426 m->m_len = 1; 427 *mtod(m, caddr_t) = tp->t_iobc; 428 if (((long)nam & MSG_PEEK) == 0) 429 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 430 break; 431 432 case PRU_SENDOOB: 433 if (sbspace(&so->so_snd) < -512) { 434 m_freem(m); 435 error = ENOBUFS; 436 break; 437 } 438 /* 439 * According to RFC961 (Assigned Protocols), 440 * the urgent pointer points to the last octet 441 * of urgent data. We continue, however, 442 * to consider it to indicate the first octet 443 * of data past the urgent section. 444 * Otherwise, snd_up should be one lower. 445 */ 446 sbappendstream(&so->so_snd, m); 447 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 448 tp->t_force = 1; 449 error = tcp_output(tp); 450 tp->t_force = 0; 451 break; 452 453 case PRU_SOCKADDR: 454 #ifdef INET6 455 if (inp->inp_flags & INP_IPV6) 456 in6_setsockaddr(inp, nam); 457 else 458 #endif 459 in_setsockaddr(inp, nam); 460 break; 461 462 case PRU_PEERADDR: 463 #ifdef INET6 464 if (inp->inp_flags & INP_IPV6) 465 in6_setpeeraddr(inp, nam); 466 else 467 #endif 468 in_setpeeraddr(inp, nam); 469 break; 470 471 default: 472 panic("tcp_usrreq"); 473 } 474 if (tp && (so->so_options & SO_DEBUG)) 475 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 476 splx(s); 477 return (error); 478 } 479 480 int 481 tcp_ctloutput(op, so, level, optname, mp) 482 int op; 483 struct socket *so; 484 int level, optname; 485 struct mbuf **mp; 486 { 487 int error = 0, s; 488 struct inpcb *inp; 489 struct tcpcb *tp; 490 struct mbuf *m; 491 int i; 492 493 s = splsoftnet(); 494 inp = sotoinpcb(so); 495 if (inp == NULL) { 496 splx(s); 497 if (op == PRCO_SETOPT && *mp) 498 (void) m_free(*mp); 499 return (ECONNRESET); 500 } 501 #ifdef INET6 502 tp = intotcpcb(inp); 503 #endif /* INET6 */ 504 if (level != IPPROTO_TCP) { 505 switch (so->so_proto->pr_domain->dom_family) { 506 #ifdef INET6 507 case PF_INET6: 508 error = ip6_ctloutput(op, so, level, optname, mp); 509 break; 510 #endif /* INET6 */ 511 case PF_INET: 512 error = ip_ctloutput(op, so, level, optname, mp); 513 break; 514 default: 515 error = EAFNOSUPPORT; /*?*/ 516 break; 517 } 518 splx(s); 519 return (error); 520 } 521 #ifndef INET6 522 tp = intotcpcb(inp); 523 #endif /* !INET6 */ 524 525 switch (op) { 526 527 case PRCO_SETOPT: 528 m = *mp; 529 switch (optname) { 530 531 case TCP_NODELAY: 532 if (m == NULL || m->m_len < sizeof (int)) 533 error = EINVAL; 534 else if (*mtod(m, int *)) 535 tp->t_flags |= TF_NODELAY; 536 else 537 tp->t_flags &= ~TF_NODELAY; 538 break; 539 540 case TCP_MAXSEG: 541 if (m == NULL || m->m_len < sizeof (int)) { 542 error = EINVAL; 543 break; 544 } 545 546 i = *mtod(m, int *); 547 if (i > 0 && i <= tp->t_maxseg) 548 tp->t_maxseg = i; 549 else 550 error = EINVAL; 551 break; 552 553 #ifdef TCP_SACK 554 case TCP_SACK_ENABLE: 555 if (m == NULL || m->m_len < sizeof (int)) { 556 error = EINVAL; 557 break; 558 } 559 560 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 561 error = EPERM; 562 break; 563 } 564 565 if (tp->t_flags & TF_SIGNATURE) { 566 error = EPERM; 567 break; 568 } 569 570 if (*mtod(m, int *)) 571 tp->sack_enable = 1; 572 else 573 tp->sack_enable = 0; 574 break; 575 #endif 576 #ifdef TCP_SIGNATURE 577 case TCP_MD5SIG: 578 if (m == NULL || m->m_len < sizeof (int)) { 579 error = EINVAL; 580 break; 581 } 582 583 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 584 error = EPERM; 585 break; 586 } 587 588 if (*mtod(m, int *)) { 589 tp->t_flags |= TF_SIGNATURE; 590 #ifdef TCP_SACK 591 tp->sack_enable = 0; 592 #endif /* TCP_SACK */ 593 } else 594 tp->t_flags &= ~TF_SIGNATURE; 595 break; 596 #endif /* TCP_SIGNATURE */ 597 default: 598 error = ENOPROTOOPT; 599 break; 600 } 601 if (m) 602 (void) m_free(m); 603 break; 604 605 case PRCO_GETOPT: 606 *mp = m = m_get(M_WAIT, MT_SOOPTS); 607 m->m_len = sizeof(int); 608 609 switch (optname) { 610 case TCP_NODELAY: 611 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 612 break; 613 case TCP_MAXSEG: 614 *mtod(m, int *) = tp->t_maxseg; 615 break; 616 #ifdef TCP_SACK 617 case TCP_SACK_ENABLE: 618 *mtod(m, int *) = tp->sack_enable; 619 break; 620 #endif 621 #ifdef TCP_SIGNATURE 622 case TCP_MD5SIG: 623 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 624 break; 625 #endif 626 default: 627 error = ENOPROTOOPT; 628 break; 629 } 630 break; 631 } 632 splx(s); 633 return (error); 634 } 635 636 /* 637 * Attach TCP protocol to socket, allocating 638 * internet protocol control block, tcp control block, 639 * bufer space, and entering LISTEN state if to accept connections. 640 */ 641 int 642 tcp_attach(so) 643 struct socket *so; 644 { 645 struct tcpcb *tp; 646 struct inpcb *inp; 647 int error; 648 649 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 || 650 sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) || 651 sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) { 652 error = soreserve(so, tcp_sendspace, tcp_recvspace); 653 if (error) 654 return (error); 655 } 656 657 error = in_pcballoc(so, &tcbtable); 658 if (error) 659 return (error); 660 inp = sotoinpcb(so); 661 tp = tcp_newtcpcb(inp); 662 if (tp == NULL) { 663 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 664 665 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 666 in_pcbdetach(inp); 667 so->so_state |= nofd; 668 return (ENOBUFS); 669 } 670 tp->t_state = TCPS_CLOSED; 671 #ifdef INET6 672 /* we disallow IPv4 mapped address completely. */ 673 if (inp->inp_flags & INP_IPV6) 674 tp->pf = PF_INET6; 675 else 676 tp->pf = PF_INET; 677 #else 678 tp->pf = PF_INET; 679 #endif 680 return (0); 681 } 682 683 /* 684 * Initiate (or continue) disconnect. 685 * If embryonic state, just send reset (once). 686 * If in ``let data drain'' option and linger null, just drop. 687 * Otherwise (hard), mark socket disconnecting and drop 688 * current input data; switch states based on user close, and 689 * send segment to peer (with FIN). 690 */ 691 struct tcpcb * 692 tcp_disconnect(tp) 693 struct tcpcb *tp; 694 { 695 struct socket *so = tp->t_inpcb->inp_socket; 696 697 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 698 tp = tcp_close(tp); 699 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 700 tp = tcp_drop(tp, 0); 701 else { 702 soisdisconnecting(so); 703 sbflush(&so->so_rcv); 704 tp = tcp_usrclosed(tp); 705 if (tp) 706 (void) tcp_output(tp); 707 } 708 return (tp); 709 } 710 711 /* 712 * User issued close, and wish to trail through shutdown states: 713 * if never received SYN, just forget it. If got a SYN from peer, 714 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 715 * If already got a FIN from peer, then almost done; go to LAST_ACK 716 * state. In all other cases, have already sent FIN to peer (e.g. 717 * after PRU_SHUTDOWN), and just have to play tedious game waiting 718 * for peer to send FIN or not respond to keep-alives, etc. 719 * We can let the user exit from the close as soon as the FIN is acked. 720 */ 721 struct tcpcb * 722 tcp_usrclosed(tp) 723 struct tcpcb *tp; 724 { 725 726 switch (tp->t_state) { 727 728 case TCPS_CLOSED: 729 case TCPS_LISTEN: 730 case TCPS_SYN_SENT: 731 tp->t_state = TCPS_CLOSED; 732 tp = tcp_close(tp); 733 break; 734 735 case TCPS_SYN_RECEIVED: 736 case TCPS_ESTABLISHED: 737 tp->t_state = TCPS_FIN_WAIT_1; 738 break; 739 740 case TCPS_CLOSE_WAIT: 741 tp->t_state = TCPS_LAST_ACK; 742 break; 743 } 744 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 745 soisdisconnected(tp->t_inpcb->inp_socket); 746 /* 747 * If we are in FIN_WAIT_2, we arrived here because the 748 * application did a shutdown of the send side. Like the 749 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 750 * a full close, we start a timer to make sure sockets are 751 * not left in FIN_WAIT_2 forever. 752 */ 753 if (tp->t_state == TCPS_FIN_WAIT_2) 754 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 755 } 756 return (tp); 757 } 758 759 /* 760 * Look up a socket for ident or tcpdrop, ... 761 */ 762 int 763 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 764 { 765 int error = 0, s; 766 struct tcp_ident_mapping tir; 767 struct inpcb *inp; 768 struct tcpcb *tp = NULL; 769 struct sockaddr_in *fin, *lin; 770 #ifdef INET6 771 struct sockaddr_in6 *fin6, *lin6; 772 struct in6_addr f6, l6; 773 #endif 774 if (dodrop) { 775 if (oldp != NULL || *oldlenp != 0) 776 return (EINVAL); 777 if (newp == NULL) 778 return (EPERM); 779 if (newlen < sizeof(tir)) 780 return (ENOMEM); 781 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 782 return (error); 783 } else { 784 if (oldp == NULL) 785 return (EINVAL); 786 if (*oldlenp < sizeof(tir)) 787 return (ENOMEM); 788 if (newp != NULL || newlen != 0) 789 return (EINVAL); 790 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 791 return (error); 792 } 793 switch (tir.faddr.ss_family) { 794 #ifdef INET6 795 case AF_INET6: 796 fin6 = (struct sockaddr_in6 *)&tir.faddr; 797 error = in6_embedscope(&f6, fin6, NULL, NULL); 798 if (error) 799 return EINVAL; /*?*/ 800 lin6 = (struct sockaddr_in6 *)&tir.laddr; 801 error = in6_embedscope(&l6, lin6, NULL, NULL); 802 if (error) 803 return EINVAL; /*?*/ 804 break; 805 #endif 806 case AF_INET: 807 fin = (struct sockaddr_in *)&tir.faddr; 808 lin = (struct sockaddr_in *)&tir.laddr; 809 break; 810 default: 811 return (EINVAL); 812 } 813 814 s = splsoftnet(); 815 switch (tir.faddr.ss_family) { 816 #ifdef INET6 817 case AF_INET6: 818 inp = in6_pcbhashlookup(&tcbtable, &f6, 819 fin6->sin6_port, &l6, lin6->sin6_port); 820 break; 821 #endif 822 case AF_INET: 823 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 824 fin->sin_port, lin->sin_addr, lin->sin_port , tir.rdomain); 825 break; 826 } 827 828 if (dodrop) { 829 if (inp && (tp = intotcpcb(inp)) && 830 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 831 tp = tcp_drop(tp, ECONNABORTED); 832 else 833 error = ESRCH; 834 splx(s); 835 return (error); 836 } 837 838 if (inp == NULL) { 839 ++tcpstat.tcps_pcbhashmiss; 840 switch (tir.faddr.ss_family) { 841 #ifdef INET6 842 case AF_INET6: 843 inp = in6_pcblookup_listen(&tcbtable, 844 &l6, lin6->sin6_port, 0, NULL); 845 break; 846 #endif 847 case AF_INET: 848 inp = in_pcblookup_listen(&tcbtable, 849 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 850 break; 851 } 852 } 853 854 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 855 tir.ruid = inp->inp_socket->so_ruid; 856 tir.euid = inp->inp_socket->so_euid; 857 } else { 858 tir.ruid = -1; 859 tir.euid = -1; 860 } 861 splx(s); 862 863 *oldlenp = sizeof (tir); 864 error = copyout((void *)&tir, oldp, sizeof (tir)); 865 return (error); 866 } 867 868 /* 869 * Sysctl for tcp variables. 870 */ 871 int 872 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 873 int *name; 874 u_int namelen; 875 void *oldp; 876 size_t *oldlenp; 877 void *newp; 878 size_t newlen; 879 { 880 int error, nval; 881 882 /* All sysctl names at this level are terminal. */ 883 if (namelen != 1) 884 return (ENOTDIR); 885 886 switch (name[0]) { 887 #ifdef TCP_SACK 888 case TCPCTL_SACK: 889 return (sysctl_int(oldp, oldlenp, newp, newlen, 890 &tcp_do_sack)); 891 #endif 892 case TCPCTL_SLOWHZ: 893 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 894 895 case TCPCTL_BADDYNAMIC: 896 return (sysctl_struct(oldp, oldlenp, newp, newlen, 897 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 898 899 case TCPCTL_IDENT: 900 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 901 902 case TCPCTL_DROP: 903 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 904 905 case TCPCTL_ALWAYS_KEEPALIVE: 906 return (sysctl_int(oldp, oldlenp, newp, newlen, 907 &tcp_always_keepalive)); 908 909 #ifdef TCP_ECN 910 case TCPCTL_ECN: 911 return (sysctl_int(oldp, oldlenp, newp, newlen, 912 &tcp_do_ecn)); 913 #endif 914 case TCPCTL_REASS_LIMIT: 915 nval = tcp_reass_limit; 916 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 917 if (error) 918 return (error); 919 if (nval != tcp_reass_limit) { 920 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 921 if (error) 922 return (error); 923 tcp_reass_limit = nval; 924 } 925 return (0); 926 #ifdef TCP_SACK 927 case TCPCTL_SACKHOLE_LIMIT: 928 nval = tcp_sackhole_limit; 929 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 930 if (error) 931 return (error); 932 if (nval != tcp_sackhole_limit) { 933 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 934 if (error) 935 return (error); 936 tcp_sackhole_limit = nval; 937 } 938 return (0); 939 #endif 940 941 case TCPCTL_STATS: 942 if (newp != NULL) 943 return (EPERM); 944 return (sysctl_struct(oldp, oldlenp, newp, newlen, 945 &tcpstat, sizeof(tcpstat))); 946 947 default: 948 if (name[0] < TCPCTL_MAXID) 949 return (sysctl_int_arr(tcpctl_vars, name, namelen, 950 oldp, oldlenp, newp, newlen)); 951 return (ENOPROTOOPT); 952 } 953 /* NOTREACHED */ 954 } 955 956 /* 957 * Scale the send buffer so that inflight data is not accounted against 958 * the limit. The buffer will scale with the congestion window, if the 959 * the receiver stops acking data the window will shrink and therefor 960 * the buffer size will shrink as well. 961 * In low memory situation try to shrink the buffer to the initial size 962 * disabling the send buffer scaling as long as the situation persists. 963 */ 964 void 965 tcp_update_sndspace(struct tcpcb *tp) 966 { 967 struct socket *so = tp->t_inpcb->inp_socket; 968 u_long nmax; 969 970 if (sbchecklowmem()) 971 /* low on memory try to get rid of some */ 972 nmax = tcp_sendspace; 973 else if (so->so_snd.sb_wat != tcp_sendspace) 974 /* user requested buffer size, auto-scaling disabled */ 975 nmax = so->so_snd.sb_wat; 976 else 977 /* automatic buffer scaling */ 978 nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max - 979 tp->snd_una); 980 981 /* round to MSS boundary */ 982 nmax = roundup(nmax, tp->t_maxseg); 983 984 if (nmax != so->so_snd.sb_hiwat) 985 sbreserve(&so->so_snd, nmax); 986 } 987 988 /* 989 * Scale the recv buffer by looking at how much data was transferred in 990 * on approximated RTT. If more then a big part of the recv buffer was 991 * transferred during that time we increase the buffer by a constant. 992 * In low memory situation try to shrink the buffer to the initial size. 993 */ 994 void 995 tcp_update_rcvspace(struct tcpcb *tp) 996 { 997 struct socket *so = tp->t_inpcb->inp_socket; 998 u_long nmax = so->so_rcv.sb_hiwat; 999 1000 if (sbchecklowmem()) 1001 /* low on memory try to get rid of some */ 1002 nmax = tcp_recvspace; 1003 else if (so->so_rcv.sb_wat != tcp_recvspace) 1004 /* user requested buffer size, auto-scaling disabled */ 1005 nmax = so->so_rcv.sb_wat; 1006 else { 1007 /* automatic buffer scaling */ 1008 if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7) 1009 nmax = MIN(sb_max, so->so_rcv.sb_hiwat + 1010 tcp_autorcvbuf_inc); 1011 } 1012 1013 if (nmax == so->so_rcv.sb_hiwat) 1014 return; 1015 1016 /* round to MSS boundary */ 1017 nmax = roundup(nmax, tp->t_maxseg); 1018 sbreserve(&so->so_rcv, nmax); 1019 } 1020