1 /* $OpenBSD: tcp_usrreq.c,v 1.110 2012/02/24 06:19:00 guenther Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/proc.h> 79 #include <sys/sysctl.h> 80 #include <sys/domain.h> 81 #include <sys/kernel.h> 82 #include <sys/pool.h> 83 84 #include <dev/rndvar.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip_var.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_timer.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #include <netinet/tcp_debug.h> 102 103 /* 104 * TCP protocol interface to socket abstraction. 105 */ 106 extern char *tcpstates[]; 107 extern int tcptv_keep_init; 108 109 extern int tcp_rst_ppslim; 110 111 /* from in_pcb.c */ 112 extern struct baddynamicports baddynamicports; 113 114 #ifndef TCP_SENDSPACE 115 #define TCP_SENDSPACE 1024*16 116 #endif 117 u_int tcp_sendspace = TCP_SENDSPACE; 118 #ifndef TCP_RECVSPACE 119 #define TCP_RECVSPACE 1024*16 120 #endif 121 u_int tcp_recvspace = TCP_RECVSPACE; 122 u_int tcp_autorcvbuf_inc = 16 * 1024; 123 124 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 125 126 struct inpcbtable tcbtable; 127 128 int tcp_ident(void *, size_t *, void *, size_t, int); 129 130 /* 131 * Process a TCP user request for TCP tb. If this is a send request 132 * then m is the mbuf chain of send data. If this is a timer expiration 133 * (called from the software clock routine), then timertype tells which timer. 134 */ 135 /*ARGSUSED*/ 136 int 137 tcp_usrreq(so, req, m, nam, control, p) 138 struct socket *so; 139 int req; 140 struct mbuf *m, *nam, *control; 141 struct proc *p; 142 { 143 struct sockaddr_in *sin; 144 struct inpcb *inp; 145 struct tcpcb *tp = NULL; 146 int s; 147 int error = 0; 148 short ostate; 149 150 if (req == PRU_CONTROL) { 151 #ifdef INET6 152 if (sotopf(so) == PF_INET6) 153 return in6_control(so, (u_long)m, (caddr_t)nam, 154 (struct ifnet *)control, 0); 155 else 156 #endif /* INET6 */ 157 return (in_control(so, (u_long)m, (caddr_t)nam, 158 (struct ifnet *)control)); 159 } 160 if (control && control->m_len) { 161 m_freem(control); 162 if (m) 163 m_freem(m); 164 return (EINVAL); 165 } 166 167 s = splsoftnet(); 168 inp = sotoinpcb(so); 169 /* 170 * When a TCP is attached to a socket, then there will be 171 * a (struct inpcb) pointed at by the socket, and this 172 * structure will point at a subsidiary (struct tcpcb). 173 */ 174 if (inp == 0 && req != PRU_ATTACH) { 175 error = so->so_error; 176 if (error == 0) 177 error = EINVAL; 178 splx(s); 179 /* 180 * The following corrects an mbuf leak under rare 181 * circumstances 182 */ 183 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 184 m_freem(m); 185 return (error); 186 } 187 if (inp) { 188 tp = intotcpcb(inp); 189 /* tp might get 0 when using socket splicing */ 190 if (tp == NULL) { 191 splx(s); 192 return (0); 193 } 194 #ifdef KPROF 195 tcp_acounts[tp->t_state][req]++; 196 #endif 197 ostate = tp->t_state; 198 } else 199 ostate = 0; 200 switch (req) { 201 202 /* 203 * TCP attaches to socket via PRU_ATTACH, reserving space, 204 * and an internet control block. 205 */ 206 case PRU_ATTACH: 207 if (inp) { 208 error = EISCONN; 209 break; 210 } 211 error = tcp_attach(so); 212 if (error) 213 break; 214 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 215 so->so_linger = TCP_LINGERTIME; 216 tp = sototcpcb(so); 217 break; 218 219 /* 220 * PRU_DETACH detaches the TCP protocol from the socket. 221 * If the protocol state is non-embryonic, then can't 222 * do this directly: have to initiate a PRU_DISCONNECT, 223 * which may finish later; embryonic TCB's can just 224 * be discarded here. 225 */ 226 case PRU_DETACH: 227 tp = tcp_disconnect(tp); 228 break; 229 230 /* 231 * Give the socket an address. 232 */ 233 case PRU_BIND: 234 #ifdef INET6 235 if (inp->inp_flags & INP_IPV6) 236 error = in6_pcbbind(inp, nam, p); 237 else 238 #endif 239 error = in_pcbbind(inp, nam, p); 240 if (error) 241 break; 242 break; 243 244 /* 245 * Prepare to accept connections. 246 */ 247 case PRU_LISTEN: 248 if (inp->inp_lport == 0) { 249 #ifdef INET6 250 if (inp->inp_flags & INP_IPV6) 251 error = in6_pcbbind(inp, NULL, p); 252 else 253 #endif 254 error = in_pcbbind(inp, NULL, p); 255 } 256 /* If the in_pcbbind() above is called, the tp->pf 257 should still be whatever it was before. */ 258 if (error == 0) 259 tp->t_state = TCPS_LISTEN; 260 break; 261 262 /* 263 * Initiate connection to peer. 264 * Create a template for use in transmissions on this connection. 265 * Enter SYN_SENT state, and mark socket as connecting. 266 * Start keep-alive timer, and seed output sequence space. 267 * Send initial segment on connection. 268 */ 269 case PRU_CONNECT: 270 sin = mtod(nam, struct sockaddr_in *); 271 272 #ifdef INET6 273 if (sin->sin_family == AF_INET6) { 274 struct in6_addr *in6_addr = &mtod(nam, 275 struct sockaddr_in6 *)->sin6_addr; 276 277 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 278 IN6_IS_ADDR_MULTICAST(in6_addr) || 279 IN6_IS_ADDR_V4MAPPED(in6_addr)) { 280 error = EINVAL; 281 break; 282 } 283 284 if (inp->inp_lport == 0) { 285 error = in6_pcbbind(inp, NULL, p); 286 if (error) 287 break; 288 } 289 error = in6_pcbconnect(inp, nam); 290 } else if (sin->sin_family == AF_INET) 291 #endif /* INET6 */ 292 { 293 if ((sin->sin_addr.s_addr == INADDR_ANY) || 294 IN_MULTICAST(sin->sin_addr.s_addr) || 295 in_broadcast(sin->sin_addr, NULL, 296 inp->inp_rtableid)) { 297 error = EINVAL; 298 break; 299 } 300 301 if (inp->inp_lport == 0) { 302 error = in_pcbbind(inp, NULL, p); 303 if (error) 304 break; 305 } 306 error = in_pcbconnect(inp, nam); 307 } 308 309 if (error) 310 break; 311 312 tp->t_template = tcp_template(tp); 313 if (tp->t_template == 0) { 314 in_pcbdisconnect(inp); 315 error = ENOBUFS; 316 break; 317 } 318 319 so->so_state |= SS_CONNECTOUT; 320 321 /* Compute window scaling to request. */ 322 tcp_rscale(tp, sb_max); 323 324 soisconnecting(so); 325 tcpstat.tcps_connattempt++; 326 tp->t_state = TCPS_SYN_SENT; 327 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 328 tcp_set_iss_tsm(tp); 329 tcp_sendseqinit(tp); 330 #if defined(TCP_SACK) 331 tp->snd_last = tp->snd_una; 332 #endif 333 #if defined(TCP_SACK) && defined(TCP_FACK) 334 tp->snd_fack = tp->snd_una; 335 tp->retran_data = 0; 336 tp->snd_awnd = 0; 337 #endif 338 error = tcp_output(tp); 339 break; 340 341 /* 342 * Create a TCP connection between two sockets. 343 */ 344 case PRU_CONNECT2: 345 error = EOPNOTSUPP; 346 break; 347 348 /* 349 * Initiate disconnect from peer. 350 * If connection never passed embryonic stage, just drop; 351 * else if don't need to let data drain, then can just drop anyways, 352 * else have to begin TCP shutdown process: mark socket disconnecting, 353 * drain unread data, state switch to reflect user close, and 354 * send segment (e.g. FIN) to peer. Socket will be really disconnected 355 * when peer sends FIN and acks ours. 356 * 357 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 358 */ 359 case PRU_DISCONNECT: 360 tp = tcp_disconnect(tp); 361 break; 362 363 /* 364 * Accept a connection. Essentially all the work is 365 * done at higher levels; just return the address 366 * of the peer, storing through addr. 367 */ 368 case PRU_ACCEPT: 369 #ifdef INET6 370 if (inp->inp_flags & INP_IPV6) 371 in6_setpeeraddr(inp, nam); 372 else 373 #endif 374 in_setpeeraddr(inp, nam); 375 break; 376 377 /* 378 * Mark the connection as being incapable of further output. 379 */ 380 case PRU_SHUTDOWN: 381 if (so->so_state & SS_CANTSENDMORE) 382 break; 383 socantsendmore(so); 384 tp = tcp_usrclosed(tp); 385 if (tp) 386 error = tcp_output(tp); 387 break; 388 389 /* 390 * After a receive, possibly send window update to peer. 391 */ 392 case PRU_RCVD: 393 /* 394 * soreceive() calls this function when a user receives 395 * ancillary data on a listening socket. We don't call 396 * tcp_output in such a case, since there is no header 397 * template for a listening socket and hence the kernel 398 * will panic. 399 */ 400 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 401 (void) tcp_output(tp); 402 break; 403 404 /* 405 * Do a send by putting data in output queue and updating urgent 406 * marker if URG set. Possibly send more data. 407 */ 408 case PRU_SEND: 409 sbappendstream(&so->so_snd, m); 410 error = tcp_output(tp); 411 break; 412 413 /* 414 * Abort the TCP. 415 */ 416 case PRU_ABORT: 417 tp = tcp_drop(tp, ECONNABORTED); 418 break; 419 420 case PRU_SENSE: 421 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 422 splx(s); 423 return (0); 424 425 case PRU_RCVOOB: 426 if ((so->so_oobmark == 0 && 427 (so->so_state & SS_RCVATMARK) == 0) || 428 so->so_options & SO_OOBINLINE || 429 tp->t_oobflags & TCPOOB_HADDATA) { 430 error = EINVAL; 431 break; 432 } 433 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 434 error = EWOULDBLOCK; 435 break; 436 } 437 m->m_len = 1; 438 *mtod(m, caddr_t) = tp->t_iobc; 439 if (((long)nam & MSG_PEEK) == 0) 440 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 441 break; 442 443 case PRU_SENDOOB: 444 if (sbspace(&so->so_snd) < -512) { 445 m_freem(m); 446 error = ENOBUFS; 447 break; 448 } 449 /* 450 * According to RFC961 (Assigned Protocols), 451 * the urgent pointer points to the last octet 452 * of urgent data. We continue, however, 453 * to consider it to indicate the first octet 454 * of data past the urgent section. 455 * Otherwise, snd_up should be one lower. 456 */ 457 sbappendstream(&so->so_snd, m); 458 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 459 tp->t_force = 1; 460 error = tcp_output(tp); 461 tp->t_force = 0; 462 break; 463 464 case PRU_SOCKADDR: 465 #ifdef INET6 466 if (inp->inp_flags & INP_IPV6) 467 in6_setsockaddr(inp, nam); 468 else 469 #endif 470 in_setsockaddr(inp, nam); 471 break; 472 473 case PRU_PEERADDR: 474 #ifdef INET6 475 if (inp->inp_flags & INP_IPV6) 476 in6_setpeeraddr(inp, nam); 477 else 478 #endif 479 in_setpeeraddr(inp, nam); 480 break; 481 482 default: 483 panic("tcp_usrreq"); 484 } 485 if (tp && (so->so_options & SO_DEBUG)) 486 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 487 splx(s); 488 return (error); 489 } 490 491 int 492 tcp_ctloutput(op, so, level, optname, mp) 493 int op; 494 struct socket *so; 495 int level, optname; 496 struct mbuf **mp; 497 { 498 int error = 0, s; 499 struct inpcb *inp; 500 struct tcpcb *tp; 501 struct mbuf *m; 502 int i; 503 504 s = splsoftnet(); 505 inp = sotoinpcb(so); 506 if (inp == NULL) { 507 splx(s); 508 if (op == PRCO_SETOPT && *mp) 509 (void) m_free(*mp); 510 return (ECONNRESET); 511 } 512 #ifdef INET6 513 tp = intotcpcb(inp); 514 #endif /* INET6 */ 515 if (level != IPPROTO_TCP) { 516 switch (so->so_proto->pr_domain->dom_family) { 517 #ifdef INET6 518 case PF_INET6: 519 error = ip6_ctloutput(op, so, level, optname, mp); 520 break; 521 #endif /* INET6 */ 522 case PF_INET: 523 error = ip_ctloutput(op, so, level, optname, mp); 524 break; 525 default: 526 error = EAFNOSUPPORT; /*?*/ 527 break; 528 } 529 splx(s); 530 return (error); 531 } 532 #ifndef INET6 533 tp = intotcpcb(inp); 534 #endif /* !INET6 */ 535 536 switch (op) { 537 538 case PRCO_SETOPT: 539 m = *mp; 540 switch (optname) { 541 542 case TCP_NODELAY: 543 if (m == NULL || m->m_len < sizeof (int)) 544 error = EINVAL; 545 else if (*mtod(m, int *)) 546 tp->t_flags |= TF_NODELAY; 547 else 548 tp->t_flags &= ~TF_NODELAY; 549 break; 550 551 case TCP_MAXSEG: 552 if (m == NULL || m->m_len < sizeof (int)) { 553 error = EINVAL; 554 break; 555 } 556 557 i = *mtod(m, int *); 558 if (i > 0 && i <= tp->t_maxseg) 559 tp->t_maxseg = i; 560 else 561 error = EINVAL; 562 break; 563 564 #ifdef TCP_SACK 565 case TCP_SACK_ENABLE: 566 if (m == NULL || m->m_len < sizeof (int)) { 567 error = EINVAL; 568 break; 569 } 570 571 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 572 error = EPERM; 573 break; 574 } 575 576 if (tp->t_flags & TF_SIGNATURE) { 577 error = EPERM; 578 break; 579 } 580 581 if (*mtod(m, int *)) 582 tp->sack_enable = 1; 583 else 584 tp->sack_enable = 0; 585 break; 586 #endif 587 #ifdef TCP_SIGNATURE 588 case TCP_MD5SIG: 589 if (m == NULL || m->m_len < sizeof (int)) { 590 error = EINVAL; 591 break; 592 } 593 594 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 595 error = EPERM; 596 break; 597 } 598 599 if (*mtod(m, int *)) { 600 tp->t_flags |= TF_SIGNATURE; 601 #ifdef TCP_SACK 602 tp->sack_enable = 0; 603 #endif /* TCP_SACK */ 604 } else 605 tp->t_flags &= ~TF_SIGNATURE; 606 break; 607 #endif /* TCP_SIGNATURE */ 608 default: 609 error = ENOPROTOOPT; 610 break; 611 } 612 if (m) 613 (void) m_free(m); 614 break; 615 616 case PRCO_GETOPT: 617 *mp = m = m_get(M_WAIT, MT_SOOPTS); 618 m->m_len = sizeof(int); 619 620 switch (optname) { 621 case TCP_NODELAY: 622 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 623 break; 624 case TCP_MAXSEG: 625 *mtod(m, int *) = tp->t_maxseg; 626 break; 627 #ifdef TCP_SACK 628 case TCP_SACK_ENABLE: 629 *mtod(m, int *) = tp->sack_enable; 630 break; 631 #endif 632 #ifdef TCP_SIGNATURE 633 case TCP_MD5SIG: 634 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 635 break; 636 #endif 637 default: 638 error = ENOPROTOOPT; 639 break; 640 } 641 break; 642 } 643 splx(s); 644 return (error); 645 } 646 647 /* 648 * Attach TCP protocol to socket, allocating 649 * internet protocol control block, tcp control block, 650 * bufer space, and entering LISTEN state if to accept connections. 651 */ 652 int 653 tcp_attach(so) 654 struct socket *so; 655 { 656 struct tcpcb *tp; 657 struct inpcb *inp; 658 int error; 659 660 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 || 661 sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) || 662 sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) { 663 error = soreserve(so, tcp_sendspace, tcp_recvspace); 664 if (error) 665 return (error); 666 } 667 668 error = in_pcballoc(so, &tcbtable); 669 if (error) 670 return (error); 671 inp = sotoinpcb(so); 672 tp = tcp_newtcpcb(inp); 673 if (tp == NULL) { 674 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 675 676 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 677 in_pcbdetach(inp); 678 so->so_state |= nofd; 679 return (ENOBUFS); 680 } 681 tp->t_state = TCPS_CLOSED; 682 #ifdef INET6 683 /* we disallow IPv4 mapped address completely. */ 684 if (inp->inp_flags & INP_IPV6) 685 tp->pf = PF_INET6; 686 else 687 tp->pf = PF_INET; 688 #else 689 tp->pf = PF_INET; 690 #endif 691 return (0); 692 } 693 694 /* 695 * Initiate (or continue) disconnect. 696 * If embryonic state, just send reset (once). 697 * If in ``let data drain'' option and linger null, just drop. 698 * Otherwise (hard), mark socket disconnecting and drop 699 * current input data; switch states based on user close, and 700 * send segment to peer (with FIN). 701 */ 702 struct tcpcb * 703 tcp_disconnect(tp) 704 struct tcpcb *tp; 705 { 706 struct socket *so = tp->t_inpcb->inp_socket; 707 708 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 709 tp = tcp_close(tp); 710 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 711 tp = tcp_drop(tp, 0); 712 else { 713 soisdisconnecting(so); 714 sbflush(&so->so_rcv); 715 tp = tcp_usrclosed(tp); 716 if (tp) 717 (void) tcp_output(tp); 718 } 719 return (tp); 720 } 721 722 /* 723 * User issued close, and wish to trail through shutdown states: 724 * if never received SYN, just forget it. If got a SYN from peer, 725 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 726 * If already got a FIN from peer, then almost done; go to LAST_ACK 727 * state. In all other cases, have already sent FIN to peer (e.g. 728 * after PRU_SHUTDOWN), and just have to play tedious game waiting 729 * for peer to send FIN or not respond to keep-alives, etc. 730 * We can let the user exit from the close as soon as the FIN is acked. 731 */ 732 struct tcpcb * 733 tcp_usrclosed(tp) 734 struct tcpcb *tp; 735 { 736 737 switch (tp->t_state) { 738 739 case TCPS_CLOSED: 740 case TCPS_LISTEN: 741 case TCPS_SYN_SENT: 742 tp->t_state = TCPS_CLOSED; 743 tp = tcp_close(tp); 744 break; 745 746 case TCPS_SYN_RECEIVED: 747 case TCPS_ESTABLISHED: 748 tp->t_state = TCPS_FIN_WAIT_1; 749 break; 750 751 case TCPS_CLOSE_WAIT: 752 tp->t_state = TCPS_LAST_ACK; 753 break; 754 } 755 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 756 soisdisconnected(tp->t_inpcb->inp_socket); 757 /* 758 * If we are in FIN_WAIT_2, we arrived here because the 759 * application did a shutdown of the send side. Like the 760 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 761 * a full close, we start a timer to make sure sockets are 762 * not left in FIN_WAIT_2 forever. 763 */ 764 if (tp->t_state == TCPS_FIN_WAIT_2) 765 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 766 } 767 return (tp); 768 } 769 770 /* 771 * Look up a socket for ident or tcpdrop, ... 772 */ 773 int 774 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 775 { 776 int error = 0, s; 777 struct tcp_ident_mapping tir; 778 struct inpcb *inp; 779 struct tcpcb *tp = NULL; 780 struct sockaddr_in *fin, *lin; 781 #ifdef INET6 782 struct sockaddr_in6 *fin6, *lin6; 783 struct in6_addr f6, l6; 784 #endif 785 if (dodrop) { 786 if (oldp != NULL || *oldlenp != 0) 787 return (EINVAL); 788 if (newp == NULL) 789 return (EPERM); 790 if (newlen < sizeof(tir)) 791 return (ENOMEM); 792 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 793 return (error); 794 } else { 795 if (oldp == NULL) 796 return (EINVAL); 797 if (*oldlenp < sizeof(tir)) 798 return (ENOMEM); 799 if (newp != NULL || newlen != 0) 800 return (EINVAL); 801 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 802 return (error); 803 } 804 switch (tir.faddr.ss_family) { 805 #ifdef INET6 806 case AF_INET6: 807 fin6 = (struct sockaddr_in6 *)&tir.faddr; 808 error = in6_embedscope(&f6, fin6, NULL, NULL); 809 if (error) 810 return EINVAL; /*?*/ 811 lin6 = (struct sockaddr_in6 *)&tir.laddr; 812 error = in6_embedscope(&l6, lin6, NULL, NULL); 813 if (error) 814 return EINVAL; /*?*/ 815 break; 816 #endif 817 case AF_INET: 818 fin = (struct sockaddr_in *)&tir.faddr; 819 lin = (struct sockaddr_in *)&tir.laddr; 820 break; 821 default: 822 return (EINVAL); 823 } 824 825 s = splsoftnet(); 826 switch (tir.faddr.ss_family) { 827 #ifdef INET6 828 case AF_INET6: 829 inp = in6_pcbhashlookup(&tcbtable, &f6, 830 fin6->sin6_port, &l6, lin6->sin6_port); 831 break; 832 #endif 833 case AF_INET: 834 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 835 fin->sin_port, lin->sin_addr, lin->sin_port , tir.rdomain); 836 break; 837 } 838 839 if (dodrop) { 840 if (inp && (tp = intotcpcb(inp)) && 841 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 842 tp = tcp_drop(tp, ECONNABORTED); 843 else 844 error = ESRCH; 845 splx(s); 846 return (error); 847 } 848 849 if (inp == NULL) { 850 ++tcpstat.tcps_pcbhashmiss; 851 switch (tir.faddr.ss_family) { 852 #ifdef INET6 853 case AF_INET6: 854 inp = in6_pcblookup_listen(&tcbtable, 855 &l6, lin6->sin6_port, 0, NULL); 856 break; 857 #endif 858 case AF_INET: 859 inp = in_pcblookup_listen(&tcbtable, 860 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 861 break; 862 } 863 } 864 865 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 866 tir.ruid = inp->inp_socket->so_ruid; 867 tir.euid = inp->inp_socket->so_euid; 868 } else { 869 tir.ruid = -1; 870 tir.euid = -1; 871 } 872 splx(s); 873 874 *oldlenp = sizeof (tir); 875 error = copyout((void *)&tir, oldp, sizeof (tir)); 876 return (error); 877 } 878 879 /* 880 * Sysctl for tcp variables. 881 */ 882 int 883 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 884 int *name; 885 u_int namelen; 886 void *oldp; 887 size_t *oldlenp; 888 void *newp; 889 size_t newlen; 890 { 891 int error, nval; 892 893 /* All sysctl names at this level are terminal. */ 894 if (namelen != 1) 895 return (ENOTDIR); 896 897 switch (name[0]) { 898 #ifdef TCP_SACK 899 case TCPCTL_SACK: 900 return (sysctl_int(oldp, oldlenp, newp, newlen, 901 &tcp_do_sack)); 902 #endif 903 case TCPCTL_SLOWHZ: 904 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 905 906 case TCPCTL_BADDYNAMIC: 907 return (sysctl_struct(oldp, oldlenp, newp, newlen, 908 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 909 910 case TCPCTL_IDENT: 911 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 912 913 case TCPCTL_DROP: 914 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 915 916 case TCPCTL_ALWAYS_KEEPALIVE: 917 return (sysctl_int(oldp, oldlenp, newp, newlen, 918 &tcp_always_keepalive)); 919 920 #ifdef TCP_ECN 921 case TCPCTL_ECN: 922 return (sysctl_int(oldp, oldlenp, newp, newlen, 923 &tcp_do_ecn)); 924 #endif 925 case TCPCTL_REASS_LIMIT: 926 nval = tcp_reass_limit; 927 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 928 if (error) 929 return (error); 930 if (nval != tcp_reass_limit) { 931 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 932 if (error) 933 return (error); 934 tcp_reass_limit = nval; 935 } 936 return (0); 937 #ifdef TCP_SACK 938 case TCPCTL_SACKHOLE_LIMIT: 939 nval = tcp_sackhole_limit; 940 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 941 if (error) 942 return (error); 943 if (nval != tcp_sackhole_limit) { 944 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 945 if (error) 946 return (error); 947 tcp_sackhole_limit = nval; 948 } 949 return (0); 950 #endif 951 952 case TCPCTL_STATS: 953 if (newp != NULL) 954 return (EPERM); 955 return (sysctl_struct(oldp, oldlenp, newp, newlen, 956 &tcpstat, sizeof(tcpstat))); 957 958 default: 959 if (name[0] < TCPCTL_MAXID) 960 return (sysctl_int_arr(tcpctl_vars, name, namelen, 961 oldp, oldlenp, newp, newlen)); 962 return (ENOPROTOOPT); 963 } 964 /* NOTREACHED */ 965 } 966 967 /* 968 * Scale the send buffer so that inflight data is not accounted against 969 * the limit. The buffer will scale with the congestion window, if the 970 * the receiver stops acking data the window will shrink and therefor 971 * the buffer size will shrink as well. 972 * In low memory situation try to shrink the buffer to the initial size 973 * disabling the send buffer scaling as long as the situation persists. 974 */ 975 void 976 tcp_update_sndspace(struct tcpcb *tp) 977 { 978 struct socket *so = tp->t_inpcb->inp_socket; 979 u_long nmax; 980 981 if (sbchecklowmem()) 982 /* low on memory try to get rid of some */ 983 nmax = tcp_sendspace; 984 else if (so->so_snd.sb_wat != tcp_sendspace) 985 /* user requested buffer size, auto-scaling disabled */ 986 nmax = so->so_snd.sb_wat; 987 else 988 /* automatic buffer scaling */ 989 nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max - 990 tp->snd_una); 991 992 /* round to MSS boundary */ 993 nmax = roundup(nmax, tp->t_maxseg); 994 995 if (nmax != so->so_snd.sb_hiwat) 996 sbreserve(&so->so_snd, nmax); 997 } 998 999 /* 1000 * Scale the recv buffer by looking at how much data was transferred in 1001 * on approximated RTT. If more then a big part of the recv buffer was 1002 * transferred during that time we increase the buffer by a constant. 1003 * In low memory situation try to shrink the buffer to the initial size. 1004 */ 1005 void 1006 tcp_update_rcvspace(struct tcpcb *tp) 1007 { 1008 struct socket *so = tp->t_inpcb->inp_socket; 1009 u_long nmax = so->so_rcv.sb_hiwat; 1010 1011 if (sbchecklowmem()) 1012 /* low on memory try to get rid of some */ 1013 nmax = tcp_recvspace; 1014 else if (so->so_rcv.sb_wat != tcp_recvspace) 1015 /* user requested buffer size, auto-scaling disabled */ 1016 nmax = so->so_rcv.sb_wat; 1017 else { 1018 /* automatic buffer scaling */ 1019 if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7) 1020 nmax = MIN(sb_max, so->so_rcv.sb_hiwat + 1021 tcp_autorcvbuf_inc); 1022 } 1023 1024 if (nmax == so->so_rcv.sb_hiwat) 1025 return; 1026 1027 /* round to MSS boundary */ 1028 nmax = roundup(nmax, tp->t_maxseg); 1029 sbreserve(&so->so_rcv, nmax); 1030 } 1031