1 /* $OpenBSD: tcp_usrreq.c,v 1.123 2014/12/05 15:50:04 mpi Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/sysctl.h> 79 #include <sys/domain.h> 80 #include <sys/kernel.h> 81 #include <sys/pool.h> 82 83 #include <net/if.h> 84 #include <net/if_var.h> 85 #include <net/route.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_var.h> 89 #include <netinet/ip.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/ip_var.h> 92 #include <netinet/tcp.h> 93 #include <netinet/tcp_fsm.h> 94 #include <netinet/tcp_seq.h> 95 #include <netinet/tcp_timer.h> 96 #include <netinet/tcp_var.h> 97 #include <netinet/tcpip.h> 98 #include <netinet/tcp_debug.h> 99 100 #ifdef INET6 101 #include <netinet6/in6_var.h> 102 #endif 103 104 #ifndef TCP_SENDSPACE 105 #define TCP_SENDSPACE 1024*16 106 #endif 107 u_int tcp_sendspace = TCP_SENDSPACE; 108 #ifndef TCP_RECVSPACE 109 #define TCP_RECVSPACE 1024*16 110 #endif 111 u_int tcp_recvspace = TCP_RECVSPACE; 112 u_int tcp_autorcvbuf_inc = 16 * 1024; 113 114 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 115 116 struct inpcbtable tcbtable; 117 118 int tcp_ident(void *, size_t *, void *, size_t, int); 119 120 /* 121 * Process a TCP user request for TCP tb. If this is a send request 122 * then m is the mbuf chain of send data. If this is a timer expiration 123 * (called from the software clock routine), then timertype tells which timer. 124 */ 125 /*ARGSUSED*/ 126 int 127 tcp_usrreq(so, req, m, nam, control, p) 128 struct socket *so; 129 int req; 130 struct mbuf *m, *nam, *control; 131 struct proc *p; 132 { 133 struct sockaddr_in *sin; 134 struct inpcb *inp; 135 struct tcpcb *tp = NULL; 136 int s; 137 int error = 0; 138 short ostate; 139 140 if (req == PRU_CONTROL) { 141 #ifdef INET6 142 if (sotopf(so) == PF_INET6) 143 return in6_control(so, (u_long)m, (caddr_t)nam, 144 (struct ifnet *)control); 145 else 146 #endif /* INET6 */ 147 return (in_control(so, (u_long)m, (caddr_t)nam, 148 (struct ifnet *)control)); 149 } 150 if (control && control->m_len) { 151 m_freem(control); 152 if (m) 153 m_freem(m); 154 return (EINVAL); 155 } 156 157 s = splsoftnet(); 158 inp = sotoinpcb(so); 159 /* 160 * When a TCP is attached to a socket, then there will be 161 * a (struct inpcb) pointed at by the socket, and this 162 * structure will point at a subsidiary (struct tcpcb). 163 */ 164 if (inp == 0 && req != PRU_ATTACH) { 165 error = so->so_error; 166 if (error == 0) 167 error = EINVAL; 168 splx(s); 169 /* 170 * The following corrects an mbuf leak under rare 171 * circumstances 172 */ 173 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 174 m_freem(m); 175 return (error); 176 } 177 if (inp) { 178 tp = intotcpcb(inp); 179 /* tp might get 0 when using socket splicing */ 180 if (tp == NULL) { 181 splx(s); 182 return (0); 183 } 184 #ifdef KPROF 185 tcp_acounts[tp->t_state][req]++; 186 #endif 187 ostate = tp->t_state; 188 } else 189 ostate = 0; 190 switch (req) { 191 192 /* 193 * TCP attaches to socket via PRU_ATTACH, reserving space, 194 * and an internet control block. 195 */ 196 case PRU_ATTACH: 197 if (inp) { 198 error = EISCONN; 199 break; 200 } 201 error = tcp_attach(so); 202 if (error) 203 break; 204 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 205 so->so_linger = TCP_LINGERTIME; 206 tp = sototcpcb(so); 207 break; 208 209 /* 210 * PRU_DETACH detaches the TCP protocol from the socket. 211 * If the protocol state is non-embryonic, then can't 212 * do this directly: have to initiate a PRU_DISCONNECT, 213 * which may finish later; embryonic TCB's can just 214 * be discarded here. 215 */ 216 case PRU_DETACH: 217 tp = tcp_disconnect(tp); 218 break; 219 220 /* 221 * Give the socket an address. 222 */ 223 case PRU_BIND: 224 #ifdef INET6 225 if (inp->inp_flags & INP_IPV6) 226 error = in6_pcbbind(inp, nam, p); 227 else 228 #endif 229 error = in_pcbbind(inp, nam, p); 230 if (error) 231 break; 232 break; 233 234 /* 235 * Prepare to accept connections. 236 */ 237 case PRU_LISTEN: 238 if (inp->inp_lport == 0) { 239 #ifdef INET6 240 if (inp->inp_flags & INP_IPV6) 241 error = in6_pcbbind(inp, NULL, p); 242 else 243 #endif 244 error = in_pcbbind(inp, NULL, p); 245 } 246 /* If the in_pcbbind() above is called, the tp->pf 247 should still be whatever it was before. */ 248 if (error == 0) 249 tp->t_state = TCPS_LISTEN; 250 break; 251 252 /* 253 * Initiate connection to peer. 254 * Create a template for use in transmissions on this connection. 255 * Enter SYN_SENT state, and mark socket as connecting. 256 * Start keep-alive timer, and seed output sequence space. 257 * Send initial segment on connection. 258 */ 259 case PRU_CONNECT: 260 sin = mtod(nam, struct sockaddr_in *); 261 262 #ifdef INET6 263 if (sin->sin_family == AF_INET6) { 264 struct in6_addr *in6_addr = &mtod(nam, 265 struct sockaddr_in6 *)->sin6_addr; 266 267 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 268 IN6_IS_ADDR_MULTICAST(in6_addr) || 269 IN6_IS_ADDR_V4MAPPED(in6_addr)) { 270 error = EINVAL; 271 break; 272 } 273 274 error = in6_pcbconnect(inp, nam); 275 } else if (sin->sin_family == AF_INET) 276 #endif /* INET6 */ 277 { 278 if ((sin->sin_addr.s_addr == INADDR_ANY) || 279 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 280 IN_MULTICAST(sin->sin_addr.s_addr) || 281 in_broadcast(sin->sin_addr, inp->inp_rtableid)) { 282 error = EINVAL; 283 break; 284 } 285 286 error = in_pcbconnect(inp, nam); 287 } 288 289 if (error) 290 break; 291 292 tp->t_template = tcp_template(tp); 293 if (tp->t_template == 0) { 294 in_pcbdisconnect(inp); 295 error = ENOBUFS; 296 break; 297 } 298 299 so->so_state |= SS_CONNECTOUT; 300 301 /* Compute window scaling to request. */ 302 tcp_rscale(tp, sb_max); 303 304 soisconnecting(so); 305 tcpstat.tcps_connattempt++; 306 tp->t_state = TCPS_SYN_SENT; 307 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 308 tcp_set_iss_tsm(tp); 309 tcp_sendseqinit(tp); 310 #if defined(TCP_SACK) 311 tp->snd_last = tp->snd_una; 312 #endif 313 #if defined(TCP_SACK) && defined(TCP_FACK) 314 tp->snd_fack = tp->snd_una; 315 tp->retran_data = 0; 316 tp->snd_awnd = 0; 317 #endif 318 error = tcp_output(tp); 319 break; 320 321 /* 322 * Create a TCP connection between two sockets. 323 */ 324 case PRU_CONNECT2: 325 error = EOPNOTSUPP; 326 break; 327 328 /* 329 * Initiate disconnect from peer. 330 * If connection never passed embryonic stage, just drop; 331 * else if don't need to let data drain, then can just drop anyways, 332 * else have to begin TCP shutdown process: mark socket disconnecting, 333 * drain unread data, state switch to reflect user close, and 334 * send segment (e.g. FIN) to peer. Socket will be really disconnected 335 * when peer sends FIN and acks ours. 336 * 337 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 338 */ 339 case PRU_DISCONNECT: 340 tp = tcp_disconnect(tp); 341 break; 342 343 /* 344 * Accept a connection. Essentially all the work is 345 * done at higher levels; just return the address 346 * of the peer, storing through addr. 347 */ 348 case PRU_ACCEPT: 349 #ifdef INET6 350 if (inp->inp_flags & INP_IPV6) 351 in6_setpeeraddr(inp, nam); 352 else 353 #endif 354 in_setpeeraddr(inp, nam); 355 break; 356 357 /* 358 * Mark the connection as being incapable of further output. 359 */ 360 case PRU_SHUTDOWN: 361 if (so->so_state & SS_CANTSENDMORE) 362 break; 363 socantsendmore(so); 364 tp = tcp_usrclosed(tp); 365 if (tp) 366 error = tcp_output(tp); 367 break; 368 369 /* 370 * After a receive, possibly send window update to peer. 371 */ 372 case PRU_RCVD: 373 /* 374 * soreceive() calls this function when a user receives 375 * ancillary data on a listening socket. We don't call 376 * tcp_output in such a case, since there is no header 377 * template for a listening socket and hence the kernel 378 * will panic. 379 */ 380 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 381 (void) tcp_output(tp); 382 break; 383 384 /* 385 * Do a send by putting data in output queue and updating urgent 386 * marker if URG set. Possibly send more data. 387 */ 388 case PRU_SEND: 389 sbappendstream(&so->so_snd, m); 390 error = tcp_output(tp); 391 break; 392 393 /* 394 * Abort the TCP. 395 */ 396 case PRU_ABORT: 397 tp = tcp_drop(tp, ECONNABORTED); 398 break; 399 400 case PRU_SENSE: 401 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 402 splx(s); 403 return (0); 404 405 case PRU_RCVOOB: 406 if ((so->so_oobmark == 0 && 407 (so->so_state & SS_RCVATMARK) == 0) || 408 so->so_options & SO_OOBINLINE || 409 tp->t_oobflags & TCPOOB_HADDATA) { 410 error = EINVAL; 411 break; 412 } 413 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 414 error = EWOULDBLOCK; 415 break; 416 } 417 m->m_len = 1; 418 *mtod(m, caddr_t) = tp->t_iobc; 419 if (((long)nam & MSG_PEEK) == 0) 420 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 421 break; 422 423 case PRU_SENDOOB: 424 if (sbspace(&so->so_snd) < -512) { 425 m_freem(m); 426 error = ENOBUFS; 427 break; 428 } 429 /* 430 * According to RFC961 (Assigned Protocols), 431 * the urgent pointer points to the last octet 432 * of urgent data. We continue, however, 433 * to consider it to indicate the first octet 434 * of data past the urgent section. 435 * Otherwise, snd_up should be one lower. 436 */ 437 sbappendstream(&so->so_snd, m); 438 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 439 tp->t_force = 1; 440 error = tcp_output(tp); 441 tp->t_force = 0; 442 break; 443 444 case PRU_SOCKADDR: 445 #ifdef INET6 446 if (inp->inp_flags & INP_IPV6) 447 in6_setsockaddr(inp, nam); 448 else 449 #endif 450 in_setsockaddr(inp, nam); 451 break; 452 453 case PRU_PEERADDR: 454 #ifdef INET6 455 if (inp->inp_flags & INP_IPV6) 456 in6_setpeeraddr(inp, nam); 457 else 458 #endif 459 in_setpeeraddr(inp, nam); 460 break; 461 462 default: 463 panic("tcp_usrreq"); 464 } 465 if (tp && (so->so_options & SO_DEBUG)) 466 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 467 splx(s); 468 return (error); 469 } 470 471 int 472 tcp_ctloutput(op, so, level, optname, mp) 473 int op; 474 struct socket *so; 475 int level, optname; 476 struct mbuf **mp; 477 { 478 int error = 0, s; 479 struct inpcb *inp; 480 struct tcpcb *tp; 481 struct mbuf *m; 482 int i; 483 484 s = splsoftnet(); 485 inp = sotoinpcb(so); 486 if (inp == NULL) { 487 splx(s); 488 if (op == PRCO_SETOPT && *mp) 489 (void) m_free(*mp); 490 return (ECONNRESET); 491 } 492 if (level != IPPROTO_TCP) { 493 switch (so->so_proto->pr_domain->dom_family) { 494 #ifdef INET6 495 case PF_INET6: 496 error = ip6_ctloutput(op, so, level, optname, mp); 497 break; 498 #endif /* INET6 */ 499 case PF_INET: 500 error = ip_ctloutput(op, so, level, optname, mp); 501 break; 502 default: 503 error = EAFNOSUPPORT; /*?*/ 504 break; 505 } 506 splx(s); 507 return (error); 508 } 509 tp = intotcpcb(inp); 510 511 switch (op) { 512 513 case PRCO_SETOPT: 514 m = *mp; 515 switch (optname) { 516 517 case TCP_NODELAY: 518 if (m == NULL || m->m_len < sizeof (int)) 519 error = EINVAL; 520 else if (*mtod(m, int *)) 521 tp->t_flags |= TF_NODELAY; 522 else 523 tp->t_flags &= ~TF_NODELAY; 524 break; 525 526 case TCP_NOPUSH: 527 if (m == NULL || m->m_len < sizeof (int)) 528 error = EINVAL; 529 else if (*mtod(m, int *)) 530 tp->t_flags |= TF_NOPUSH; 531 else if (tp->t_flags & TF_NOPUSH) { 532 tp->t_flags &= ~TF_NOPUSH; 533 if (TCPS_HAVEESTABLISHED(tp->t_state)) 534 error = tcp_output(tp); 535 } 536 break; 537 538 case TCP_MAXSEG: 539 if (m == NULL || m->m_len < sizeof (int)) { 540 error = EINVAL; 541 break; 542 } 543 544 i = *mtod(m, int *); 545 if (i > 0 && i <= tp->t_maxseg) 546 tp->t_maxseg = i; 547 else 548 error = EINVAL; 549 break; 550 551 #ifdef TCP_SACK 552 case TCP_SACK_ENABLE: 553 if (m == NULL || m->m_len < sizeof (int)) { 554 error = EINVAL; 555 break; 556 } 557 558 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 559 error = EPERM; 560 break; 561 } 562 563 if (tp->t_flags & TF_SIGNATURE) { 564 error = EPERM; 565 break; 566 } 567 568 if (*mtod(m, int *)) 569 tp->sack_enable = 1; 570 else 571 tp->sack_enable = 0; 572 break; 573 #endif 574 #ifdef TCP_SIGNATURE 575 case TCP_MD5SIG: 576 if (m == NULL || m->m_len < sizeof (int)) { 577 error = EINVAL; 578 break; 579 } 580 581 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 582 error = EPERM; 583 break; 584 } 585 586 if (*mtod(m, int *)) { 587 tp->t_flags |= TF_SIGNATURE; 588 #ifdef TCP_SACK 589 tp->sack_enable = 0; 590 #endif /* TCP_SACK */ 591 } else 592 tp->t_flags &= ~TF_SIGNATURE; 593 break; 594 #endif /* TCP_SIGNATURE */ 595 default: 596 error = ENOPROTOOPT; 597 break; 598 } 599 if (m) 600 (void) m_free(m); 601 break; 602 603 case PRCO_GETOPT: 604 *mp = m = m_get(M_WAIT, MT_SOOPTS); 605 m->m_len = sizeof(int); 606 607 switch (optname) { 608 case TCP_NODELAY: 609 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 610 break; 611 case TCP_NOPUSH: 612 *mtod(m, int *) = tp->t_flags & TF_NOPUSH; 613 break; 614 case TCP_MAXSEG: 615 *mtod(m, int *) = tp->t_maxseg; 616 break; 617 #ifdef TCP_SACK 618 case TCP_SACK_ENABLE: 619 *mtod(m, int *) = tp->sack_enable; 620 break; 621 #endif 622 #ifdef TCP_SIGNATURE 623 case TCP_MD5SIG: 624 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 625 break; 626 #endif 627 default: 628 error = ENOPROTOOPT; 629 break; 630 } 631 break; 632 } 633 splx(s); 634 return (error); 635 } 636 637 /* 638 * Attach TCP protocol to socket, allocating 639 * internet protocol control block, tcp control block, 640 * bufer space, and entering LISTEN state if to accept connections. 641 */ 642 int 643 tcp_attach(so) 644 struct socket *so; 645 { 646 struct tcpcb *tp; 647 struct inpcb *inp; 648 int error; 649 650 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 || 651 sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) || 652 sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) { 653 error = soreserve(so, tcp_sendspace, tcp_recvspace); 654 if (error) 655 return (error); 656 } 657 658 error = in_pcballoc(so, &tcbtable); 659 if (error) 660 return (error); 661 inp = sotoinpcb(so); 662 tp = tcp_newtcpcb(inp); 663 if (tp == NULL) { 664 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 665 666 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 667 in_pcbdetach(inp); 668 so->so_state |= nofd; 669 return (ENOBUFS); 670 } 671 tp->t_state = TCPS_CLOSED; 672 #ifdef INET6 673 /* we disallow IPv4 mapped address completely. */ 674 if (inp->inp_flags & INP_IPV6) 675 tp->pf = PF_INET6; 676 else 677 tp->pf = PF_INET; 678 #else 679 tp->pf = PF_INET; 680 #endif 681 return (0); 682 } 683 684 /* 685 * Initiate (or continue) disconnect. 686 * If embryonic state, just send reset (once). 687 * If in ``let data drain'' option and linger null, just drop. 688 * Otherwise (hard), mark socket disconnecting and drop 689 * current input data; switch states based on user close, and 690 * send segment to peer (with FIN). 691 */ 692 struct tcpcb * 693 tcp_disconnect(tp) 694 struct tcpcb *tp; 695 { 696 struct socket *so = tp->t_inpcb->inp_socket; 697 698 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 699 tp = tcp_close(tp); 700 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 701 tp = tcp_drop(tp, 0); 702 else { 703 soisdisconnecting(so); 704 sbflush(&so->so_rcv); 705 tp = tcp_usrclosed(tp); 706 if (tp) 707 (void) tcp_output(tp); 708 } 709 return (tp); 710 } 711 712 /* 713 * User issued close, and wish to trail through shutdown states: 714 * if never received SYN, just forget it. If got a SYN from peer, 715 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 716 * If already got a FIN from peer, then almost done; go to LAST_ACK 717 * state. In all other cases, have already sent FIN to peer (e.g. 718 * after PRU_SHUTDOWN), and just have to play tedious game waiting 719 * for peer to send FIN or not respond to keep-alives, etc. 720 * We can let the user exit from the close as soon as the FIN is acked. 721 */ 722 struct tcpcb * 723 tcp_usrclosed(tp) 724 struct tcpcb *tp; 725 { 726 727 switch (tp->t_state) { 728 729 case TCPS_CLOSED: 730 case TCPS_LISTEN: 731 case TCPS_SYN_SENT: 732 tp->t_state = TCPS_CLOSED; 733 tp = tcp_close(tp); 734 break; 735 736 case TCPS_SYN_RECEIVED: 737 case TCPS_ESTABLISHED: 738 tp->t_state = TCPS_FIN_WAIT_1; 739 break; 740 741 case TCPS_CLOSE_WAIT: 742 tp->t_state = TCPS_LAST_ACK; 743 break; 744 } 745 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 746 soisdisconnected(tp->t_inpcb->inp_socket); 747 /* 748 * If we are in FIN_WAIT_2, we arrived here because the 749 * application did a shutdown of the send side. Like the 750 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 751 * a full close, we start a timer to make sure sockets are 752 * not left in FIN_WAIT_2 forever. 753 */ 754 if (tp->t_state == TCPS_FIN_WAIT_2) 755 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 756 } 757 return (tp); 758 } 759 760 /* 761 * Look up a socket for ident or tcpdrop, ... 762 */ 763 int 764 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 765 { 766 int error = 0, s; 767 struct tcp_ident_mapping tir; 768 struct inpcb *inp; 769 struct tcpcb *tp = NULL; 770 struct sockaddr_in *fin, *lin; 771 #ifdef INET6 772 struct sockaddr_in6 *fin6, *lin6; 773 struct in6_addr f6, l6; 774 #endif 775 if (dodrop) { 776 if (oldp != NULL || *oldlenp != 0) 777 return (EINVAL); 778 if (newp == NULL) 779 return (EPERM); 780 if (newlen < sizeof(tir)) 781 return (ENOMEM); 782 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 783 return (error); 784 } else { 785 if (oldp == NULL) 786 return (EINVAL); 787 if (*oldlenp < sizeof(tir)) 788 return (ENOMEM); 789 if (newp != NULL || newlen != 0) 790 return (EINVAL); 791 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 792 return (error); 793 } 794 switch (tir.faddr.ss_family) { 795 #ifdef INET6 796 case AF_INET6: 797 fin6 = (struct sockaddr_in6 *)&tir.faddr; 798 error = in6_embedscope(&f6, fin6, NULL, NULL); 799 if (error) 800 return EINVAL; /*?*/ 801 lin6 = (struct sockaddr_in6 *)&tir.laddr; 802 error = in6_embedscope(&l6, lin6, NULL, NULL); 803 if (error) 804 return EINVAL; /*?*/ 805 break; 806 #endif 807 case AF_INET: 808 fin = (struct sockaddr_in *)&tir.faddr; 809 lin = (struct sockaddr_in *)&tir.laddr; 810 break; 811 default: 812 return (EINVAL); 813 } 814 815 s = splsoftnet(); 816 switch (tir.faddr.ss_family) { 817 #ifdef INET6 818 case AF_INET6: 819 inp = in6_pcbhashlookup(&tcbtable, &f6, 820 fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain); 821 break; 822 #endif 823 case AF_INET: 824 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 825 fin->sin_port, lin->sin_addr, lin->sin_port, tir.rdomain); 826 break; 827 } 828 829 if (dodrop) { 830 if (inp && (tp = intotcpcb(inp)) && 831 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 832 tp = tcp_drop(tp, ECONNABORTED); 833 else 834 error = ESRCH; 835 splx(s); 836 return (error); 837 } 838 839 if (inp == NULL) { 840 ++tcpstat.tcps_pcbhashmiss; 841 switch (tir.faddr.ss_family) { 842 #ifdef INET6 843 case AF_INET6: 844 inp = in6_pcblookup_listen(&tcbtable, 845 &l6, lin6->sin6_port, 0, NULL, tir.rdomain); 846 break; 847 #endif 848 case AF_INET: 849 inp = in_pcblookup_listen(&tcbtable, 850 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 851 break; 852 } 853 } 854 855 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 856 tir.ruid = inp->inp_socket->so_ruid; 857 tir.euid = inp->inp_socket->so_euid; 858 } else { 859 tir.ruid = -1; 860 tir.euid = -1; 861 } 862 splx(s); 863 864 *oldlenp = sizeof (tir); 865 error = copyout((void *)&tir, oldp, sizeof (tir)); 866 return (error); 867 } 868 869 /* 870 * Sysctl for tcp variables. 871 */ 872 int 873 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 874 int *name; 875 u_int namelen; 876 void *oldp; 877 size_t *oldlenp; 878 void *newp; 879 size_t newlen; 880 { 881 int error, nval; 882 883 /* All sysctl names at this level are terminal. */ 884 if (namelen != 1) 885 return (ENOTDIR); 886 887 switch (name[0]) { 888 #ifdef TCP_SACK 889 case TCPCTL_SACK: 890 return (sysctl_int(oldp, oldlenp, newp, newlen, 891 &tcp_do_sack)); 892 #endif 893 case TCPCTL_SLOWHZ: 894 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 895 896 case TCPCTL_BADDYNAMIC: 897 return (sysctl_struct(oldp, oldlenp, newp, newlen, 898 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 899 900 case TCPCTL_IDENT: 901 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 902 903 case TCPCTL_DROP: 904 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 905 906 case TCPCTL_ALWAYS_KEEPALIVE: 907 return (sysctl_int(oldp, oldlenp, newp, newlen, 908 &tcp_always_keepalive)); 909 910 #ifdef TCP_ECN 911 case TCPCTL_ECN: 912 return (sysctl_int(oldp, oldlenp, newp, newlen, 913 &tcp_do_ecn)); 914 #endif 915 case TCPCTL_REASS_LIMIT: 916 nval = tcp_reass_limit; 917 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 918 if (error) 919 return (error); 920 if (nval != tcp_reass_limit) { 921 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 922 if (error) 923 return (error); 924 tcp_reass_limit = nval; 925 } 926 return (0); 927 #ifdef TCP_SACK 928 case TCPCTL_SACKHOLE_LIMIT: 929 nval = tcp_sackhole_limit; 930 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 931 if (error) 932 return (error); 933 if (nval != tcp_sackhole_limit) { 934 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 935 if (error) 936 return (error); 937 tcp_sackhole_limit = nval; 938 } 939 return (0); 940 #endif 941 942 case TCPCTL_STATS: 943 if (newp != NULL) 944 return (EPERM); 945 return (sysctl_struct(oldp, oldlenp, newp, newlen, 946 &tcpstat, sizeof(tcpstat))); 947 948 default: 949 if (name[0] < TCPCTL_MAXID) 950 return (sysctl_int_arr(tcpctl_vars, name, namelen, 951 oldp, oldlenp, newp, newlen)); 952 return (ENOPROTOOPT); 953 } 954 /* NOTREACHED */ 955 } 956 957 /* 958 * Scale the send buffer so that inflight data is not accounted against 959 * the limit. The buffer will scale with the congestion window, if the 960 * the receiver stops acking data the window will shrink and therefor 961 * the buffer size will shrink as well. 962 * In low memory situation try to shrink the buffer to the initial size 963 * disabling the send buffer scaling as long as the situation persists. 964 */ 965 void 966 tcp_update_sndspace(struct tcpcb *tp) 967 { 968 struct socket *so = tp->t_inpcb->inp_socket; 969 u_long nmax; 970 971 if (sbchecklowmem()) 972 /* low on memory try to get rid of some */ 973 nmax = tcp_sendspace; 974 else if (so->so_snd.sb_wat != tcp_sendspace) 975 /* user requested buffer size, auto-scaling disabled */ 976 nmax = so->so_snd.sb_wat; 977 else 978 /* automatic buffer scaling */ 979 nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max - 980 tp->snd_una); 981 982 /* round to MSS boundary */ 983 nmax = roundup(nmax, tp->t_maxseg); 984 985 if (nmax != so->so_snd.sb_hiwat) 986 sbreserve(&so->so_snd, nmax); 987 } 988 989 /* 990 * Scale the recv buffer by looking at how much data was transferred in 991 * on approximated RTT. If more then a big part of the recv buffer was 992 * transferred during that time we increase the buffer by a constant. 993 * In low memory situation try to shrink the buffer to the initial size. 994 */ 995 void 996 tcp_update_rcvspace(struct tcpcb *tp) 997 { 998 struct socket *so = tp->t_inpcb->inp_socket; 999 u_long nmax = so->so_rcv.sb_hiwat; 1000 1001 if (sbchecklowmem()) 1002 /* low on memory try to get rid of some */ 1003 nmax = tcp_recvspace; 1004 else if (so->so_rcv.sb_wat != tcp_recvspace) 1005 /* user requested buffer size, auto-scaling disabled */ 1006 nmax = so->so_rcv.sb_wat; 1007 else { 1008 /* automatic buffer scaling */ 1009 if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7) 1010 nmax = MIN(sb_max, so->so_rcv.sb_hiwat + 1011 tcp_autorcvbuf_inc); 1012 } 1013 1014 if (nmax == so->so_rcv.sb_hiwat) 1015 return; 1016 1017 /* round to MSS boundary */ 1018 nmax = roundup(nmax, tp->t_maxseg); 1019 sbreserve(&so->so_rcv, nmax); 1020 } 1021