1 /* $OpenBSD: tcp_usrreq.c,v 1.101 2010/04/20 22:05:43 tedu Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/proc.h> 79 #include <sys/sysctl.h> 80 #include <sys/domain.h> 81 #include <sys/kernel.h> 82 #include <sys/pool.h> 83 84 #include <dev/rndvar.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip_var.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_timer.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #include <netinet/tcp_debug.h> 102 103 /* 104 * TCP protocol interface to socket abstraction. 105 */ 106 extern char *tcpstates[]; 107 extern int tcptv_keep_init; 108 109 extern int tcp_rst_ppslim; 110 111 /* from in_pcb.c */ 112 extern struct baddynamicports baddynamicports; 113 114 #ifndef TCP_SENDSPACE 115 #define TCP_SENDSPACE 1024*16 116 #endif 117 u_int tcp_sendspace = TCP_SENDSPACE; 118 #ifndef TCP_RECVSPACE 119 #define TCP_RECVSPACE 1024*16 120 #endif 121 u_int tcp_recvspace = TCP_RECVSPACE; 122 123 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 124 125 struct inpcbtable tcbtable; 126 127 int tcp_ident(void *, size_t *, void *, size_t, int); 128 129 /* 130 * Process a TCP user request for TCP tb. If this is a send request 131 * then m is the mbuf chain of send data. If this is a timer expiration 132 * (called from the software clock routine), then timertype tells which timer. 133 */ 134 /*ARGSUSED*/ 135 int 136 tcp_usrreq(so, req, m, nam, control, p) 137 struct socket *so; 138 int req; 139 struct mbuf *m, *nam, *control; 140 struct proc *p; 141 { 142 struct sockaddr_in *sin; 143 struct inpcb *inp; 144 struct tcpcb *tp = NULL; 145 int s; 146 int error = 0; 147 short ostate; 148 149 if (req == PRU_CONTROL) { 150 #ifdef INET6 151 if (sotopf(so) == PF_INET6) 152 return in6_control(so, (u_long)m, (caddr_t)nam, 153 (struct ifnet *)control, 0); 154 else 155 #endif /* INET6 */ 156 return (in_control(so, (u_long)m, (caddr_t)nam, 157 (struct ifnet *)control)); 158 } 159 if (control && control->m_len) { 160 m_freem(control); 161 if (m) 162 m_freem(m); 163 return (EINVAL); 164 } 165 166 s = splsoftnet(); 167 inp = sotoinpcb(so); 168 /* 169 * When a TCP is attached to a socket, then there will be 170 * a (struct inpcb) pointed at by the socket, and this 171 * structure will point at a subsidiary (struct tcpcb). 172 */ 173 if (inp == 0 && req != PRU_ATTACH) { 174 error = so->so_error; 175 if (error == 0) 176 error = EINVAL; 177 splx(s); 178 /* 179 * The following corrects an mbuf leak under rare 180 * circumstances 181 */ 182 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 183 m_freem(m); 184 return (error); 185 } 186 if (inp) { 187 tp = intotcpcb(inp); 188 /* WHAT IF TP IS 0? */ 189 #ifdef KPROF 190 tcp_acounts[tp->t_state][req]++; 191 #endif 192 ostate = tp->t_state; 193 } else 194 ostate = 0; 195 switch (req) { 196 197 /* 198 * TCP attaches to socket via PRU_ATTACH, reserving space, 199 * and an internet control block. 200 */ 201 case PRU_ATTACH: 202 if (inp) { 203 error = EISCONN; 204 break; 205 } 206 error = tcp_attach(so); 207 if (error) 208 break; 209 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 210 so->so_linger = TCP_LINGERTIME; 211 tp = sototcpcb(so); 212 break; 213 214 /* 215 * PRU_DETACH detaches the TCP protocol from the socket. 216 * If the protocol state is non-embryonic, then can't 217 * do this directly: have to initiate a PRU_DISCONNECT, 218 * which may finish later; embryonic TCB's can just 219 * be discarded here. 220 */ 221 case PRU_DETACH: 222 tp = tcp_disconnect(tp); 223 break; 224 225 /* 226 * Give the socket an address. 227 */ 228 case PRU_BIND: 229 #ifdef INET6 230 if (inp->inp_flags & INP_IPV6) 231 error = in6_pcbbind(inp, nam, p); 232 else 233 #endif 234 error = in_pcbbind(inp, nam, p); 235 if (error) 236 break; 237 break; 238 239 /* 240 * Prepare to accept connections. 241 */ 242 case PRU_LISTEN: 243 if (inp->inp_lport == 0) { 244 #ifdef INET6 245 if (inp->inp_flags & INP_IPV6) 246 error = in6_pcbbind(inp, NULL, p); 247 else 248 #endif 249 error = in_pcbbind(inp, NULL, p); 250 } 251 /* If the in_pcbbind() above is called, the tp->pf 252 should still be whatever it was before. */ 253 if (error == 0) 254 tp->t_state = TCPS_LISTEN; 255 break; 256 257 /* 258 * Initiate connection to peer. 259 * Create a template for use in transmissions on this connection. 260 * Enter SYN_SENT state, and mark socket as connecting. 261 * Start keep-alive timer, and seed output sequence space. 262 * Send initial segment on connection. 263 */ 264 case PRU_CONNECT: 265 sin = mtod(nam, struct sockaddr_in *); 266 267 #ifdef INET6 268 if (sin->sin_family == AF_INET6) { 269 struct in6_addr *in6_addr = &mtod(nam, 270 struct sockaddr_in6 *)->sin6_addr; 271 272 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 273 IN6_IS_ADDR_MULTICAST(in6_addr) || 274 (IN6_IS_ADDR_V4MAPPED(in6_addr) && 275 ((in6_addr->s6_addr32[3] == INADDR_ANY) || 276 IN_MULTICAST(in6_addr->s6_addr32[3]) || 277 in_broadcast(sin->sin_addr, NULL)))) { 278 error = EINVAL; 279 break; 280 } 281 282 if (inp->inp_lport == 0) { 283 error = in6_pcbbind(inp, NULL, p); 284 if (error) 285 break; 286 } 287 error = in6_pcbconnect(inp, nam); 288 } else if (sin->sin_family == AF_INET) 289 #endif /* INET6 */ 290 { 291 if ((sin->sin_addr.s_addr == INADDR_ANY) || 292 IN_MULTICAST(sin->sin_addr.s_addr) || 293 in_broadcast(sin->sin_addr, NULL)) { 294 error = EINVAL; 295 break; 296 } 297 298 if (inp->inp_lport == 0) { 299 error = in_pcbbind(inp, NULL, p); 300 if (error) 301 break; 302 } 303 error = in_pcbconnect(inp, nam); 304 } 305 306 if (error) 307 break; 308 309 tp->t_template = tcp_template(tp); 310 if (tp->t_template == 0) { 311 in_pcbdisconnect(inp); 312 error = ENOBUFS; 313 break; 314 } 315 316 so->so_state |= SS_CONNECTOUT; 317 318 /* Compute window scaling to request. */ 319 tcp_rscale(tp, so->so_rcv.sb_hiwat); 320 321 soisconnecting(so); 322 tcpstat.tcps_connattempt++; 323 tp->t_state = TCPS_SYN_SENT; 324 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 325 tcp_set_iss_tsm(tp); 326 tcp_sendseqinit(tp); 327 #if defined(TCP_SACK) 328 tp->snd_last = tp->snd_una; 329 #endif 330 #if defined(TCP_SACK) && defined(TCP_FACK) 331 tp->snd_fack = tp->snd_una; 332 tp->retran_data = 0; 333 tp->snd_awnd = 0; 334 #endif 335 error = tcp_output(tp); 336 break; 337 338 /* 339 * Create a TCP connection between two sockets. 340 */ 341 case PRU_CONNECT2: 342 error = EOPNOTSUPP; 343 break; 344 345 /* 346 * Initiate disconnect from peer. 347 * If connection never passed embryonic stage, just drop; 348 * else if don't need to let data drain, then can just drop anyways, 349 * else have to begin TCP shutdown process: mark socket disconnecting, 350 * drain unread data, state switch to reflect user close, and 351 * send segment (e.g. FIN) to peer. Socket will be really disconnected 352 * when peer sends FIN and acks ours. 353 * 354 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 355 */ 356 case PRU_DISCONNECT: 357 tp = tcp_disconnect(tp); 358 break; 359 360 /* 361 * Accept a connection. Essentially all the work is 362 * done at higher levels; just return the address 363 * of the peer, storing through addr. 364 */ 365 case PRU_ACCEPT: 366 #ifdef INET6 367 if (inp->inp_flags & INP_IPV6) 368 in6_setpeeraddr(inp, nam); 369 else 370 #endif 371 in_setpeeraddr(inp, nam); 372 break; 373 374 /* 375 * Mark the connection as being incapable of further output. 376 */ 377 case PRU_SHUTDOWN: 378 if (so->so_state & SS_CANTSENDMORE) 379 break; 380 socantsendmore(so); 381 tp = tcp_usrclosed(tp); 382 if (tp) 383 error = tcp_output(tp); 384 break; 385 386 /* 387 * After a receive, possibly send window update to peer. 388 */ 389 case PRU_RCVD: 390 /* 391 * soreceive() calls this function when a user receives 392 * ancillary data on a listening socket. We don't call 393 * tcp_output in such a case, since there is no header 394 * template for a listening socket and hence the kernel 395 * will panic. 396 */ 397 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 398 (void) tcp_output(tp); 399 break; 400 401 /* 402 * Do a send by putting data in output queue and updating urgent 403 * marker if URG set. Possibly send more data. 404 */ 405 case PRU_SEND: 406 sbappendstream(&so->so_snd, m); 407 error = tcp_output(tp); 408 break; 409 410 /* 411 * Abort the TCP. 412 */ 413 case PRU_ABORT: 414 tp = tcp_drop(tp, ECONNABORTED); 415 break; 416 417 case PRU_SENSE: 418 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 419 splx(s); 420 return (0); 421 422 case PRU_RCVOOB: 423 if ((so->so_oobmark == 0 && 424 (so->so_state & SS_RCVATMARK) == 0) || 425 so->so_options & SO_OOBINLINE || 426 tp->t_oobflags & TCPOOB_HADDATA) { 427 error = EINVAL; 428 break; 429 } 430 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 431 error = EWOULDBLOCK; 432 break; 433 } 434 m->m_len = 1; 435 *mtod(m, caddr_t) = tp->t_iobc; 436 if (((long)nam & MSG_PEEK) == 0) 437 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 438 break; 439 440 case PRU_SENDOOB: 441 if (sbspace(&so->so_snd) < -512) { 442 m_freem(m); 443 error = ENOBUFS; 444 break; 445 } 446 /* 447 * According to RFC961 (Assigned Protocols), 448 * the urgent pointer points to the last octet 449 * of urgent data. We continue, however, 450 * to consider it to indicate the first octet 451 * of data past the urgent section. 452 * Otherwise, snd_up should be one lower. 453 */ 454 sbappendstream(&so->so_snd, m); 455 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 456 tp->t_force = 1; 457 error = tcp_output(tp); 458 tp->t_force = 0; 459 break; 460 461 case PRU_SOCKADDR: 462 #ifdef INET6 463 if (inp->inp_flags & INP_IPV6) 464 in6_setsockaddr(inp, nam); 465 else 466 #endif 467 in_setsockaddr(inp, nam); 468 break; 469 470 case PRU_PEERADDR: 471 #ifdef INET6 472 if (inp->inp_flags & INP_IPV6) 473 in6_setpeeraddr(inp, nam); 474 else 475 #endif 476 in_setpeeraddr(inp, nam); 477 break; 478 479 default: 480 panic("tcp_usrreq"); 481 } 482 if (tp && (so->so_options & SO_DEBUG)) 483 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 484 splx(s); 485 return (error); 486 } 487 488 int 489 tcp_ctloutput(op, so, level, optname, mp) 490 int op; 491 struct socket *so; 492 int level, optname; 493 struct mbuf **mp; 494 { 495 int error = 0, s; 496 struct inpcb *inp; 497 struct tcpcb *tp; 498 struct mbuf *m; 499 int i; 500 501 s = splsoftnet(); 502 inp = sotoinpcb(so); 503 if (inp == NULL) { 504 splx(s); 505 if (op == PRCO_SETOPT && *mp) 506 (void) m_free(*mp); 507 return (ECONNRESET); 508 } 509 #ifdef INET6 510 tp = intotcpcb(inp); 511 #endif /* INET6 */ 512 if (level != IPPROTO_TCP) { 513 switch (so->so_proto->pr_domain->dom_family) { 514 #ifdef INET6 515 case PF_INET6: 516 error = ip6_ctloutput(op, so, level, optname, mp); 517 break; 518 #endif /* INET6 */ 519 case PF_INET: 520 error = ip_ctloutput(op, so, level, optname, mp); 521 break; 522 default: 523 error = EAFNOSUPPORT; /*?*/ 524 break; 525 } 526 splx(s); 527 return (error); 528 } 529 #ifndef INET6 530 tp = intotcpcb(inp); 531 #endif /* !INET6 */ 532 533 switch (op) { 534 535 case PRCO_SETOPT: 536 m = *mp; 537 switch (optname) { 538 539 case TCP_NODELAY: 540 if (m == NULL || m->m_len < sizeof (int)) 541 error = EINVAL; 542 else if (*mtod(m, int *)) 543 tp->t_flags |= TF_NODELAY; 544 else 545 tp->t_flags &= ~TF_NODELAY; 546 break; 547 548 case TCP_MAXSEG: 549 if (m == NULL || m->m_len < sizeof (int)) { 550 error = EINVAL; 551 break; 552 } 553 554 i = *mtod(m, int *); 555 if (i > 0 && i <= tp->t_maxseg) 556 tp->t_maxseg = i; 557 else 558 error = EINVAL; 559 break; 560 561 #ifdef TCP_SACK 562 case TCP_SACK_ENABLE: 563 if (m == NULL || m->m_len < sizeof (int)) { 564 error = EINVAL; 565 break; 566 } 567 568 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 569 error = EPERM; 570 break; 571 } 572 573 if (tp->t_flags & TF_SIGNATURE) { 574 error = EPERM; 575 break; 576 } 577 578 if (*mtod(m, int *)) 579 tp->sack_enable = 1; 580 else 581 tp->sack_enable = 0; 582 break; 583 #endif 584 #ifdef TCP_SIGNATURE 585 case TCP_MD5SIG: 586 if (m == NULL || m->m_len < sizeof (int)) { 587 error = EINVAL; 588 break; 589 } 590 591 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 592 error = EPERM; 593 break; 594 } 595 596 if (*mtod(m, int *)) { 597 tp->t_flags |= TF_SIGNATURE; 598 #ifdef TCP_SACK 599 tp->sack_enable = 0; 600 #endif /* TCP_SACK */ 601 } else 602 tp->t_flags &= ~TF_SIGNATURE; 603 break; 604 #endif /* TCP_SIGNATURE */ 605 default: 606 error = ENOPROTOOPT; 607 break; 608 } 609 if (m) 610 (void) m_free(m); 611 break; 612 613 case PRCO_GETOPT: 614 *mp = m = m_get(M_WAIT, MT_SOOPTS); 615 m->m_len = sizeof(int); 616 617 switch (optname) { 618 case TCP_NODELAY: 619 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 620 break; 621 case TCP_MAXSEG: 622 *mtod(m, int *) = tp->t_maxseg; 623 break; 624 #ifdef TCP_SACK 625 case TCP_SACK_ENABLE: 626 *mtod(m, int *) = tp->sack_enable; 627 break; 628 #endif 629 #ifdef TCP_SIGNATURE 630 case TCP_MD5SIG: 631 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 632 break; 633 #endif 634 default: 635 error = ENOPROTOOPT; 636 break; 637 } 638 break; 639 } 640 splx(s); 641 return (error); 642 } 643 644 /* 645 * Attach TCP protocol to socket, allocating 646 * internet protocol control block, tcp control block, 647 * bufer space, and entering LISTEN state if to accept connections. 648 */ 649 int 650 tcp_attach(so) 651 struct socket *so; 652 { 653 struct tcpcb *tp; 654 struct inpcb *inp; 655 int error; 656 657 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 658 error = soreserve(so, tcp_sendspace, tcp_recvspace); 659 if (error) 660 return (error); 661 } 662 error = in_pcballoc(so, &tcbtable); 663 if (error) 664 return (error); 665 inp = sotoinpcb(so); 666 tp = tcp_newtcpcb(inp); 667 if (tp == NULL) { 668 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 669 670 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 671 in_pcbdetach(inp); 672 so->so_state |= nofd; 673 return (ENOBUFS); 674 } 675 tp->t_state = TCPS_CLOSED; 676 #ifdef INET6 677 /* we disallow IPv4 mapped address completely. */ 678 if (inp->inp_flags & INP_IPV6) 679 tp->pf = PF_INET6; 680 else 681 tp->pf = PF_INET; 682 #else 683 tp->pf = PF_INET; 684 #endif 685 return (0); 686 } 687 688 /* 689 * Initiate (or continue) disconnect. 690 * If embryonic state, just send reset (once). 691 * If in ``let data drain'' option and linger null, just drop. 692 * Otherwise (hard), mark socket disconnecting and drop 693 * current input data; switch states based on user close, and 694 * send segment to peer (with FIN). 695 */ 696 struct tcpcb * 697 tcp_disconnect(tp) 698 struct tcpcb *tp; 699 { 700 struct socket *so = tp->t_inpcb->inp_socket; 701 702 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 703 tp = tcp_close(tp); 704 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 705 tp = tcp_drop(tp, 0); 706 else { 707 soisdisconnecting(so); 708 sbflush(&so->so_rcv); 709 tp = tcp_usrclosed(tp); 710 if (tp) 711 (void) tcp_output(tp); 712 } 713 return (tp); 714 } 715 716 /* 717 * User issued close, and wish to trail through shutdown states: 718 * if never received SYN, just forget it. If got a SYN from peer, 719 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 720 * If already got a FIN from peer, then almost done; go to LAST_ACK 721 * state. In all other cases, have already sent FIN to peer (e.g. 722 * after PRU_SHUTDOWN), and just have to play tedious game waiting 723 * for peer to send FIN or not respond to keep-alives, etc. 724 * We can let the user exit from the close as soon as the FIN is acked. 725 */ 726 struct tcpcb * 727 tcp_usrclosed(tp) 728 struct tcpcb *tp; 729 { 730 731 switch (tp->t_state) { 732 733 case TCPS_CLOSED: 734 case TCPS_LISTEN: 735 case TCPS_SYN_SENT: 736 tp->t_state = TCPS_CLOSED; 737 tp = tcp_close(tp); 738 break; 739 740 case TCPS_SYN_RECEIVED: 741 case TCPS_ESTABLISHED: 742 tp->t_state = TCPS_FIN_WAIT_1; 743 break; 744 745 case TCPS_CLOSE_WAIT: 746 tp->t_state = TCPS_LAST_ACK; 747 break; 748 } 749 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 750 soisdisconnected(tp->t_inpcb->inp_socket); 751 /* 752 * If we are in FIN_WAIT_2, we arrived here because the 753 * application did a shutdown of the send side. Like the 754 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 755 * a full close, we start a timer to make sure sockets are 756 * not left in FIN_WAIT_2 forever. 757 */ 758 if (tp->t_state == TCPS_FIN_WAIT_2) 759 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 760 } 761 return (tp); 762 } 763 764 /* 765 * Look up a socket for ident or tcpdrop, ... 766 */ 767 int 768 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 769 { 770 int error = 0, s; 771 struct tcp_ident_mapping tir; 772 struct inpcb *inp; 773 struct tcpcb *tp = NULL; 774 struct sockaddr_in *fin, *lin; 775 #ifdef INET6 776 struct sockaddr_in6 *fin6, *lin6; 777 struct in6_addr f6, l6; 778 #endif 779 if (dodrop) { 780 if (oldp != NULL || *oldlenp != 0) 781 return (EINVAL); 782 if (newp == NULL) 783 return (EPERM); 784 if (newlen < sizeof(tir)) 785 return (ENOMEM); 786 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 787 return (error); 788 } else { 789 if (oldp == NULL) 790 return (EINVAL); 791 if (*oldlenp < sizeof(tir)) 792 return (ENOMEM); 793 if (newp != NULL || newlen != 0) 794 return (EINVAL); 795 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 796 return (error); 797 } 798 switch (tir.faddr.ss_family) { 799 #ifdef INET6 800 case AF_INET6: 801 fin6 = (struct sockaddr_in6 *)&tir.faddr; 802 error = in6_embedscope(&f6, fin6, NULL, NULL); 803 if (error) 804 return EINVAL; /*?*/ 805 lin6 = (struct sockaddr_in6 *)&tir.laddr; 806 error = in6_embedscope(&l6, lin6, NULL, NULL); 807 if (error) 808 return EINVAL; /*?*/ 809 break; 810 #endif 811 case AF_INET: 812 fin = (struct sockaddr_in *)&tir.faddr; 813 lin = (struct sockaddr_in *)&tir.laddr; 814 break; 815 default: 816 return (EINVAL); 817 } 818 819 s = splsoftnet(); 820 switch (tir.faddr.ss_family) { 821 #ifdef INET6 822 case AF_INET6: 823 inp = in6_pcbhashlookup(&tcbtable, &f6, 824 fin6->sin6_port, &l6, lin6->sin6_port); 825 break; 826 #endif 827 case AF_INET: 828 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 829 fin->sin_port, lin->sin_addr, lin->sin_port , tir.rdomain); 830 break; 831 } 832 833 if (dodrop) { 834 if (inp && (tp = intotcpcb(inp)) && 835 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 836 tp = tcp_drop(tp, ECONNABORTED); 837 else 838 error = ESRCH; 839 splx(s); 840 return (error); 841 } 842 843 if (inp == NULL) { 844 ++tcpstat.tcps_pcbhashmiss; 845 switch (tir.faddr.ss_family) { 846 #ifdef INET6 847 case AF_INET6: 848 inp = in6_pcblookup_listen(&tcbtable, 849 &l6, lin6->sin6_port, 0, NULL); 850 break; 851 #endif 852 case AF_INET: 853 inp = in_pcblookup_listen(&tcbtable, 854 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 855 break; 856 } 857 } 858 859 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 860 tir.ruid = inp->inp_socket->so_ruid; 861 tir.euid = inp->inp_socket->so_euid; 862 } else { 863 tir.ruid = -1; 864 tir.euid = -1; 865 } 866 splx(s); 867 868 *oldlenp = sizeof (tir); 869 error = copyout((void *)&tir, oldp, sizeof (tir)); 870 return (error); 871 } 872 873 /* 874 * Sysctl for tcp variables. 875 */ 876 int 877 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 878 int *name; 879 u_int namelen; 880 void *oldp; 881 size_t *oldlenp; 882 void *newp; 883 size_t newlen; 884 { 885 int error, nval; 886 887 /* All sysctl names at this level are terminal. */ 888 if (namelen != 1) 889 return (ENOTDIR); 890 891 switch (name[0]) { 892 #ifdef TCP_SACK 893 case TCPCTL_SACK: 894 return (sysctl_int(oldp, oldlenp, newp, newlen, 895 &tcp_do_sack)); 896 #endif 897 case TCPCTL_SLOWHZ: 898 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 899 900 case TCPCTL_BADDYNAMIC: 901 return (sysctl_struct(oldp, oldlenp, newp, newlen, 902 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 903 904 case TCPCTL_IDENT: 905 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 906 907 case TCPCTL_DROP: 908 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 909 910 #ifdef TCP_ECN 911 case TCPCTL_ECN: 912 return (sysctl_int(oldp, oldlenp, newp, newlen, 913 &tcp_do_ecn)); 914 #endif 915 case TCPCTL_REASS_LIMIT: 916 nval = tcp_reass_limit; 917 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 918 if (error) 919 return (error); 920 if (nval != tcp_reass_limit) { 921 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 922 if (error) 923 return (error); 924 tcp_reass_limit = nval; 925 } 926 return (0); 927 #ifdef TCP_SACK 928 case TCPCTL_SACKHOLE_LIMIT: 929 nval = tcp_sackhole_limit; 930 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 931 if (error) 932 return (error); 933 if (nval != tcp_sackhole_limit) { 934 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 935 if (error) 936 return (error); 937 tcp_sackhole_limit = nval; 938 } 939 return (0); 940 #endif 941 942 case TCPCTL_STATS: 943 if (newp != NULL) 944 return (EPERM); 945 return (sysctl_struct(oldp, oldlenp, newp, newlen, 946 &tcpstat, sizeof(tcpstat))); 947 948 default: 949 if (name[0] < TCPCTL_MAXID) 950 return (sysctl_int_arr(tcpctl_vars, name, namelen, 951 oldp, oldlenp, newp, newlen)); 952 return (ENOPROTOOPT); 953 } 954 /* NOTREACHED */ 955 } 956