1 /* $OpenBSD: tcp_usrreq.c,v 1.89 2005/03/04 13:21:42 markus Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/sysctl.h> 79 #include <sys/domain.h> 80 #include <sys/kernel.h> 81 82 #include <dev/rndvar.h> 83 84 #include <net/if.h> 85 #include <net/route.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/in_var.h> 90 #include <netinet/ip.h> 91 #include <netinet/in_pcb.h> 92 #include <netinet/ip_var.h> 93 #include <netinet/tcp.h> 94 #include <netinet/tcp_fsm.h> 95 #include <netinet/tcp_seq.h> 96 #include <netinet/tcp_timer.h> 97 #include <netinet/tcp_var.h> 98 #include <netinet/tcpip.h> 99 #include <netinet/tcp_debug.h> 100 101 /* 102 * TCP protocol interface to socket abstraction. 103 */ 104 extern char *tcpstates[]; 105 extern int tcptv_keep_init; 106 107 extern int tcp_rst_ppslim; 108 109 /* from in_pcb.c */ 110 extern struct baddynamicports baddynamicports; 111 112 #ifndef TCP_SENDSPACE 113 #define TCP_SENDSPACE 1024*16 114 #endif 115 u_int tcp_sendspace = TCP_SENDSPACE; 116 #ifndef TCP_RECVSPACE 117 #define TCP_RECVSPACE 1024*16 118 #endif 119 u_int tcp_recvspace = TCP_RECVSPACE; 120 121 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 122 123 struct inpcbtable tcbtable; 124 125 int tcp_ident(void *, size_t *, void *, size_t, int); 126 127 #ifdef INET6 128 int 129 tcp6_usrreq(so, req, m, nam, control, p) 130 struct socket *so; 131 int req; 132 struct mbuf *m, *nam, *control; 133 struct proc *p; 134 { 135 136 return tcp_usrreq(so, req, m, nam, control); 137 } 138 #endif 139 140 /* 141 * Process a TCP user request for TCP tb. If this is a send request 142 * then m is the mbuf chain of send data. If this is a timer expiration 143 * (called from the software clock routine), then timertype tells which timer. 144 */ 145 /*ARGSUSED*/ 146 int 147 tcp_usrreq(so, req, m, nam, control) 148 struct socket *so; 149 int req; 150 struct mbuf *m, *nam, *control; 151 { 152 struct sockaddr_in *sin; 153 struct inpcb *inp; 154 struct tcpcb *tp = NULL; 155 int s; 156 int error = 0; 157 short ostate; 158 159 if (req == PRU_CONTROL) { 160 #ifdef INET6 161 if (sotopf(so) == PF_INET6) 162 return in6_control(so, (u_long)m, (caddr_t)nam, 163 (struct ifnet *)control, 0); 164 else 165 #endif /* INET6 */ 166 return (in_control(so, (u_long)m, (caddr_t)nam, 167 (struct ifnet *)control)); 168 } 169 if (control && control->m_len) { 170 m_freem(control); 171 if (m) 172 m_freem(m); 173 return (EINVAL); 174 } 175 176 s = splsoftnet(); 177 inp = sotoinpcb(so); 178 /* 179 * When a TCP is attached to a socket, then there will be 180 * a (struct inpcb) pointed at by the socket, and this 181 * structure will point at a subsidiary (struct tcpcb). 182 */ 183 if (inp == 0 && req != PRU_ATTACH) { 184 splx(s); 185 /* 186 * The following corrects an mbuf leak under rare 187 * circumstances 188 */ 189 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 190 m_freem(m); 191 return (EINVAL); /* XXX */ 192 } 193 if (inp) { 194 tp = intotcpcb(inp); 195 /* WHAT IF TP IS 0? */ 196 #ifdef KPROF 197 tcp_acounts[tp->t_state][req]++; 198 #endif 199 ostate = tp->t_state; 200 } else 201 ostate = 0; 202 switch (req) { 203 204 /* 205 * TCP attaches to socket via PRU_ATTACH, reserving space, 206 * and an internet control block. 207 */ 208 case PRU_ATTACH: 209 if (inp) { 210 error = EISCONN; 211 break; 212 } 213 error = tcp_attach(so); 214 if (error) 215 break; 216 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 217 so->so_linger = TCP_LINGERTIME; 218 tp = sototcpcb(so); 219 break; 220 221 /* 222 * PRU_DETACH detaches the TCP protocol from the socket. 223 * If the protocol state is non-embryonic, then can't 224 * do this directly: have to initiate a PRU_DISCONNECT, 225 * which may finish later; embryonic TCB's can just 226 * be discarded here. 227 */ 228 case PRU_DETACH: 229 tp = tcp_disconnect(tp); 230 break; 231 232 /* 233 * Give the socket an address. 234 */ 235 case PRU_BIND: 236 #ifdef INET6 237 if (inp->inp_flags & INP_IPV6) 238 error = in6_pcbbind(inp, nam); 239 else 240 #endif 241 error = in_pcbbind(inp, nam); 242 if (error) 243 break; 244 break; 245 246 /* 247 * Prepare to accept connections. 248 */ 249 case PRU_LISTEN: 250 if (inp->inp_lport == 0) { 251 #ifdef INET6 252 if (inp->inp_flags & INP_IPV6) 253 error = in6_pcbbind(inp, NULL); 254 else 255 #endif 256 error = in_pcbbind(inp, NULL); 257 } 258 /* If the in_pcbbind() above is called, the tp->pf 259 should still be whatever it was before. */ 260 if (error == 0) 261 tp->t_state = TCPS_LISTEN; 262 break; 263 264 /* 265 * Initiate connection to peer. 266 * Create a template for use in transmissions on this connection. 267 * Enter SYN_SENT state, and mark socket as connecting. 268 * Start keep-alive timer, and seed output sequence space. 269 * Send initial segment on connection. 270 */ 271 case PRU_CONNECT: 272 sin = mtod(nam, struct sockaddr_in *); 273 274 #ifdef INET6 275 if (sin->sin_family == AF_INET6) { 276 struct in6_addr *in6_addr = &mtod(nam, 277 struct sockaddr_in6 *)->sin6_addr; 278 279 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 280 IN6_IS_ADDR_MULTICAST(in6_addr) || 281 (IN6_IS_ADDR_V4MAPPED(in6_addr) && 282 ((in6_addr->s6_addr32[3] == INADDR_ANY) || 283 IN_MULTICAST(in6_addr->s6_addr32[3]) || 284 in_broadcast(sin->sin_addr, NULL)))) { 285 error = EINVAL; 286 break; 287 } 288 289 if (inp->inp_lport == 0) { 290 error = in6_pcbbind(inp, NULL); 291 if (error) 292 break; 293 } 294 error = in6_pcbconnect(inp, nam); 295 } else if (sin->sin_family == AF_INET) 296 #endif /* INET6 */ 297 { 298 if ((sin->sin_addr.s_addr == INADDR_ANY) || 299 IN_MULTICAST(sin->sin_addr.s_addr) || 300 in_broadcast(sin->sin_addr, NULL)) { 301 error = EINVAL; 302 break; 303 } 304 305 if (inp->inp_lport == 0) { 306 error = in_pcbbind(inp, NULL); 307 if (error) 308 break; 309 } 310 error = in_pcbconnect(inp, nam); 311 } 312 313 if (error) 314 break; 315 316 tp->t_template = tcp_template(tp); 317 if (tp->t_template == 0) { 318 in_pcbdisconnect(inp); 319 error = ENOBUFS; 320 break; 321 } 322 323 so->so_state |= SS_CONNECTOUT; 324 325 /* initialise the timestamp modulator */ 326 if (tp->t_flags & TF_REQ_TSTMP) 327 tp->ts_modulate = arc4random(); 328 329 /* Compute window scaling to request. */ 330 tcp_rscale(tp, so->so_rcv.sb_hiwat); 331 332 soisconnecting(so); 333 tcpstat.tcps_connattempt++; 334 tp->t_state = TCPS_SYN_SENT; 335 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 336 #ifdef TCP_COMPAT_42 337 tp->iss = tcp_iss; 338 tcp_iss += TCP_ISSINCR/2; 339 #else /* TCP_COMPAT_42 */ 340 tp->iss = tcp_rndiss_next(); 341 #endif /* !TCP_COMPAT_42 */ 342 tcp_sendseqinit(tp); 343 #if defined(TCP_SACK) 344 tp->snd_last = tp->snd_una; 345 #endif 346 #if defined(TCP_SACK) && defined(TCP_FACK) 347 tp->snd_fack = tp->snd_una; 348 tp->retran_data = 0; 349 tp->snd_awnd = 0; 350 #endif 351 error = tcp_output(tp); 352 break; 353 354 /* 355 * Create a TCP connection between two sockets. 356 */ 357 case PRU_CONNECT2: 358 error = EOPNOTSUPP; 359 break; 360 361 /* 362 * Initiate disconnect from peer. 363 * If connection never passed embryonic stage, just drop; 364 * else if don't need to let data drain, then can just drop anyways, 365 * else have to begin TCP shutdown process: mark socket disconnecting, 366 * drain unread data, state switch to reflect user close, and 367 * send segment (e.g. FIN) to peer. Socket will be really disconnected 368 * when peer sends FIN and acks ours. 369 * 370 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 371 */ 372 case PRU_DISCONNECT: 373 tp = tcp_disconnect(tp); 374 break; 375 376 /* 377 * Accept a connection. Essentially all the work is 378 * done at higher levels; just return the address 379 * of the peer, storing through addr. 380 */ 381 case PRU_ACCEPT: 382 #ifdef INET6 383 if (inp->inp_flags & INP_IPV6) 384 in6_setpeeraddr(inp, nam); 385 else 386 #endif 387 in_setpeeraddr(inp, nam); 388 break; 389 390 /* 391 * Mark the connection as being incapable of further output. 392 */ 393 case PRU_SHUTDOWN: 394 if (so->so_state & SS_CANTSENDMORE) 395 break; 396 socantsendmore(so); 397 tp = tcp_usrclosed(tp); 398 if (tp) 399 error = tcp_output(tp); 400 break; 401 402 /* 403 * After a receive, possibly send window update to peer. 404 */ 405 case PRU_RCVD: 406 /* 407 * soreceive() calls this function when a user receives 408 * ancillary data on a listening socket. We don't call 409 * tcp_output in such a case, since there is no header 410 * template for a listening socket and hence the kernel 411 * will panic. 412 */ 413 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 414 (void) tcp_output(tp); 415 break; 416 417 /* 418 * Do a send by putting data in output queue and updating urgent 419 * marker if URG set. Possibly send more data. 420 */ 421 case PRU_SEND: 422 sbappendstream(&so->so_snd, m); 423 error = tcp_output(tp); 424 break; 425 426 /* 427 * Abort the TCP. 428 */ 429 case PRU_ABORT: 430 tp = tcp_drop(tp, ECONNABORTED); 431 break; 432 433 case PRU_SENSE: 434 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 435 splx(s); 436 return (0); 437 438 case PRU_RCVOOB: 439 if ((so->so_oobmark == 0 && 440 (so->so_state & SS_RCVATMARK) == 0) || 441 so->so_options & SO_OOBINLINE || 442 tp->t_oobflags & TCPOOB_HADDATA) { 443 error = EINVAL; 444 break; 445 } 446 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 447 error = EWOULDBLOCK; 448 break; 449 } 450 m->m_len = 1; 451 *mtod(m, caddr_t) = tp->t_iobc; 452 if (((long)nam & MSG_PEEK) == 0) 453 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 454 break; 455 456 case PRU_SENDOOB: 457 if (sbspace(&so->so_snd) < -512) { 458 m_freem(m); 459 error = ENOBUFS; 460 break; 461 } 462 /* 463 * According to RFC961 (Assigned Protocols), 464 * the urgent pointer points to the last octet 465 * of urgent data. We continue, however, 466 * to consider it to indicate the first octet 467 * of data past the urgent section. 468 * Otherwise, snd_up should be one lower. 469 */ 470 sbappendstream(&so->so_snd, m); 471 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 472 tp->t_force = 1; 473 error = tcp_output(tp); 474 tp->t_force = 0; 475 break; 476 477 case PRU_SOCKADDR: 478 #ifdef INET6 479 if (inp->inp_flags & INP_IPV6) 480 in6_setsockaddr(inp, nam); 481 else 482 #endif 483 in_setsockaddr(inp, nam); 484 break; 485 486 case PRU_PEERADDR: 487 #ifdef INET6 488 if (inp->inp_flags & INP_IPV6) 489 in6_setpeeraddr(inp, nam); 490 else 491 #endif 492 in_setpeeraddr(inp, nam); 493 break; 494 495 default: 496 panic("tcp_usrreq"); 497 } 498 if (tp && (so->so_options & SO_DEBUG)) 499 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 500 splx(s); 501 return (error); 502 } 503 504 int 505 tcp_ctloutput(op, so, level, optname, mp) 506 int op; 507 struct socket *so; 508 int level, optname; 509 struct mbuf **mp; 510 { 511 int error = 0, s; 512 struct inpcb *inp; 513 struct tcpcb *tp; 514 struct mbuf *m; 515 int i; 516 517 s = splsoftnet(); 518 inp = sotoinpcb(so); 519 if (inp == NULL) { 520 splx(s); 521 if (op == PRCO_SETOPT && *mp) 522 (void) m_free(*mp); 523 return (ECONNRESET); 524 } 525 #ifdef INET6 526 tp = intotcpcb(inp); 527 #endif /* INET6 */ 528 if (level != IPPROTO_TCP) { 529 switch (so->so_proto->pr_domain->dom_family) { 530 #ifdef INET6 531 case PF_INET6: 532 error = ip6_ctloutput(op, so, level, optname, mp); 533 break; 534 #endif /* INET6 */ 535 case PF_INET: 536 error = ip_ctloutput(op, so, level, optname, mp); 537 break; 538 default: 539 error = EAFNOSUPPORT; /*?*/ 540 break; 541 } 542 splx(s); 543 return (error); 544 } 545 #ifndef INET6 546 tp = intotcpcb(inp); 547 #endif /* !INET6 */ 548 549 switch (op) { 550 551 case PRCO_SETOPT: 552 m = *mp; 553 switch (optname) { 554 555 case TCP_NODELAY: 556 if (m == NULL || m->m_len < sizeof (int)) 557 error = EINVAL; 558 else if (*mtod(m, int *)) 559 tp->t_flags |= TF_NODELAY; 560 else 561 tp->t_flags &= ~TF_NODELAY; 562 break; 563 564 case TCP_MAXSEG: 565 if (m == NULL || m->m_len < sizeof (int)) { 566 error = EINVAL; 567 break; 568 } 569 570 i = *mtod(m, int *); 571 if (i > 0 && i <= tp->t_maxseg) 572 tp->t_maxseg = i; 573 else 574 error = EINVAL; 575 break; 576 577 #ifdef TCP_SACK 578 case TCP_SACK_ENABLE: 579 if (m == NULL || m->m_len < sizeof (int)) { 580 error = EINVAL; 581 break; 582 } 583 584 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 585 error = EPERM; 586 break; 587 } 588 589 if (tp->t_flags & TF_SIGNATURE) { 590 error = EPERM; 591 break; 592 } 593 594 if (*mtod(m, int *)) 595 tp->sack_enable = 1; 596 else 597 tp->sack_enable = 0; 598 break; 599 #endif 600 #ifdef TCP_SIGNATURE 601 case TCP_MD5SIG: 602 if (m == NULL || m->m_len < sizeof (int)) { 603 error = EINVAL; 604 break; 605 } 606 607 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 608 error = EPERM; 609 break; 610 } 611 612 if (*mtod(m, int *)) { 613 tp->t_flags |= TF_SIGNATURE; 614 #ifdef TCP_SACK 615 tp->sack_enable = 0; 616 #endif /* TCP_SACK */ 617 } else 618 tp->t_flags &= ~TF_SIGNATURE; 619 break; 620 #endif /* TCP_SIGNATURE */ 621 default: 622 error = ENOPROTOOPT; 623 break; 624 } 625 if (m) 626 (void) m_free(m); 627 break; 628 629 case PRCO_GETOPT: 630 *mp = m = m_get(M_WAIT, MT_SOOPTS); 631 m->m_len = sizeof(int); 632 633 switch (optname) { 634 case TCP_NODELAY: 635 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 636 break; 637 case TCP_MAXSEG: 638 *mtod(m, int *) = tp->t_maxseg; 639 break; 640 #ifdef TCP_SACK 641 case TCP_SACK_ENABLE: 642 *mtod(m, int *) = tp->sack_enable; 643 break; 644 #endif 645 #ifdef TCP_SIGNATURE 646 case TCP_MD5SIG: 647 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 648 break; 649 #endif 650 default: 651 error = ENOPROTOOPT; 652 break; 653 } 654 break; 655 } 656 splx(s); 657 return (error); 658 } 659 660 /* 661 * Attach TCP protocol to socket, allocating 662 * internet protocol control block, tcp control block, 663 * bufer space, and entering LISTEN state if to accept connections. 664 */ 665 int 666 tcp_attach(so) 667 struct socket *so; 668 { 669 struct tcpcb *tp; 670 struct inpcb *inp; 671 int error; 672 673 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 674 error = soreserve(so, tcp_sendspace, tcp_recvspace); 675 if (error) 676 return (error); 677 } 678 error = in_pcballoc(so, &tcbtable); 679 if (error) 680 return (error); 681 inp = sotoinpcb(so); 682 tp = tcp_newtcpcb(inp); 683 if (tp == NULL) { 684 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 685 686 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 687 in_pcbdetach(inp); 688 so->so_state |= nofd; 689 return (ENOBUFS); 690 } 691 tp->t_state = TCPS_CLOSED; 692 #ifdef INET6 693 /* we disallow IPv4 mapped address completely. */ 694 if (inp->inp_flags & INP_IPV6) 695 tp->pf = PF_INET6; 696 else 697 tp->pf = PF_INET; 698 #else 699 tp->pf = PF_INET; 700 #endif 701 return (0); 702 } 703 704 /* 705 * Initiate (or continue) disconnect. 706 * If embryonic state, just send reset (once). 707 * If in ``let data drain'' option and linger null, just drop. 708 * Otherwise (hard), mark socket disconnecting and drop 709 * current input data; switch states based on user close, and 710 * send segment to peer (with FIN). 711 */ 712 struct tcpcb * 713 tcp_disconnect(tp) 714 struct tcpcb *tp; 715 { 716 struct socket *so = tp->t_inpcb->inp_socket; 717 718 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 719 tp = tcp_close(tp); 720 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 721 tp = tcp_drop(tp, 0); 722 else { 723 soisdisconnecting(so); 724 sbflush(&so->so_rcv); 725 tp = tcp_usrclosed(tp); 726 if (tp) 727 (void) tcp_output(tp); 728 } 729 return (tp); 730 } 731 732 /* 733 * User issued close, and wish to trail through shutdown states: 734 * if never received SYN, just forget it. If got a SYN from peer, 735 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 736 * If already got a FIN from peer, then almost done; go to LAST_ACK 737 * state. In all other cases, have already sent FIN to peer (e.g. 738 * after PRU_SHUTDOWN), and just have to play tedious game waiting 739 * for peer to send FIN or not respond to keep-alives, etc. 740 * We can let the user exit from the close as soon as the FIN is acked. 741 */ 742 struct tcpcb * 743 tcp_usrclosed(tp) 744 struct tcpcb *tp; 745 { 746 747 switch (tp->t_state) { 748 749 case TCPS_CLOSED: 750 case TCPS_LISTEN: 751 case TCPS_SYN_SENT: 752 tp->t_state = TCPS_CLOSED; 753 tp = tcp_close(tp); 754 break; 755 756 case TCPS_SYN_RECEIVED: 757 case TCPS_ESTABLISHED: 758 tp->t_state = TCPS_FIN_WAIT_1; 759 break; 760 761 case TCPS_CLOSE_WAIT: 762 tp->t_state = TCPS_LAST_ACK; 763 break; 764 } 765 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 766 soisdisconnected(tp->t_inpcb->inp_socket); 767 /* 768 * If we are in FIN_WAIT_2, we arrived here because the 769 * application did a shutdown of the send side. Like the 770 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 771 * a full close, we start a timer to make sure sockets are 772 * not left in FIN_WAIT_2 forever. 773 */ 774 if (tp->t_state == TCPS_FIN_WAIT_2) 775 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 776 } 777 return (tp); 778 } 779 780 /* 781 * Look up a socket for ident or tcpdrop, ... 782 */ 783 int 784 tcp_ident(oldp, oldlenp, newp, newlen, dodrop) 785 void *oldp; 786 size_t *oldlenp; 787 void *newp; 788 size_t newlen; 789 int dodrop; 790 { 791 int error = 0, s; 792 struct tcp_ident_mapping tir; 793 struct inpcb *inp; 794 struct tcpcb *tp = NULL; 795 struct sockaddr_in *fin, *lin; 796 #ifdef INET6 797 struct sockaddr_in6 *fin6, *lin6; 798 struct in6_addr f6, l6; 799 #endif 800 if (dodrop) { 801 if (oldp != NULL || *oldlenp != 0) 802 return (EINVAL); 803 if (newp == NULL) 804 return (EPERM); 805 if (newlen < sizeof(tir)) 806 return (ENOMEM); 807 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 808 return (error); 809 } else { 810 if (oldp == NULL) 811 return (EINVAL); 812 if (*oldlenp < sizeof(tir)) 813 return (ENOMEM); 814 if (newp != NULL || newlen != 0) 815 return (EINVAL); 816 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 817 return (error); 818 } 819 switch (tir.faddr.ss_family) { 820 #ifdef INET6 821 case AF_INET6: 822 fin6 = (struct sockaddr_in6 *)&tir.faddr; 823 error = in6_embedscope(&f6, fin6, NULL, NULL); 824 if (error) 825 return EINVAL; /*?*/ 826 lin6 = (struct sockaddr_in6 *)&tir.laddr; 827 error = in6_embedscope(&l6, lin6, NULL, NULL); 828 if (error) 829 return EINVAL; /*?*/ 830 break; 831 #endif 832 case AF_INET: 833 fin = (struct sockaddr_in *)&tir.faddr; 834 lin = (struct sockaddr_in *)&tir.laddr; 835 break; 836 default: 837 return (EINVAL); 838 } 839 840 s = splsoftnet(); 841 switch (tir.faddr.ss_family) { 842 #ifdef INET6 843 case AF_INET6: 844 inp = in6_pcbhashlookup(&tcbtable, &f6, 845 fin6->sin6_port, &l6, lin6->sin6_port); 846 break; 847 #endif 848 case AF_INET: 849 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 850 fin->sin_port, lin->sin_addr, lin->sin_port); 851 break; 852 } 853 854 if (dodrop) { 855 if (inp && (tp = intotcpcb(inp)) && 856 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 857 tp = tcp_drop(tp, ECONNABORTED); 858 else 859 error = ESRCH; 860 splx(s); 861 return (error); 862 } 863 864 if (inp == NULL) { 865 ++tcpstat.tcps_pcbhashmiss; 866 switch (tir.faddr.ss_family) { 867 #ifdef INET6 868 case AF_INET6: 869 inp = in6_pcblookup_listen(&tcbtable, 870 &l6, lin6->sin6_port, 0); 871 break; 872 #endif 873 case AF_INET: 874 inp = in_pcblookup_listen(&tcbtable, 875 lin->sin_addr, lin->sin_port, 0); 876 break; 877 } 878 } 879 880 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 881 tir.ruid = inp->inp_socket->so_ruid; 882 tir.euid = inp->inp_socket->so_euid; 883 } else { 884 tir.ruid = -1; 885 tir.euid = -1; 886 } 887 splx(s); 888 889 *oldlenp = sizeof (tir); 890 error = copyout((void *)&tir, oldp, sizeof (tir)); 891 return (error); 892 } 893 894 /* 895 * Sysctl for tcp variables. 896 */ 897 int 898 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 899 int *name; 900 u_int namelen; 901 void *oldp; 902 size_t *oldlenp; 903 void *newp; 904 size_t newlen; 905 { 906 int error, nval; 907 908 /* All sysctl names at this level are terminal. */ 909 if (namelen != 1) 910 return (ENOTDIR); 911 912 switch (name[0]) { 913 #ifdef TCP_SACK 914 case TCPCTL_SACK: 915 return (sysctl_int(oldp, oldlenp, newp, newlen, 916 &tcp_do_sack)); 917 #endif 918 case TCPCTL_SLOWHZ: 919 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 920 921 case TCPCTL_BADDYNAMIC: 922 return (sysctl_struct(oldp, oldlenp, newp, newlen, 923 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 924 925 case TCPCTL_IDENT: 926 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 927 928 case TCPCTL_DROP: 929 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 930 931 #ifdef TCP_ECN 932 case TCPCTL_ECN: 933 return (sysctl_int(oldp, oldlenp, newp, newlen, 934 &tcp_do_ecn)); 935 #endif 936 case TCPCTL_REASS_LIMIT: 937 nval = tcp_reass_limit; 938 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 939 if (error) 940 return (error); 941 if (nval != tcp_reass_limit) { 942 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 943 if (error) 944 return (error); 945 tcp_reass_limit = nval; 946 } 947 return (0); 948 #ifdef TCP_SACK 949 case TCPCTL_SACKHOLE_LIMIT: 950 nval = tcp_sackhole_limit; 951 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 952 if (error) 953 return (error); 954 if (nval != tcp_sackhole_limit) { 955 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 956 if (error) 957 return (error); 958 tcp_sackhole_limit = nval; 959 } 960 return (0); 961 #endif 962 default: 963 if (name[0] < TCPCTL_MAXID) 964 return (sysctl_int_arr(tcpctl_vars, name, namelen, 965 oldp, oldlenp, newp, newlen)); 966 return (ENOPROTOOPT); 967 } 968 /* NOTREACHED */ 969 } 970