1 /* $OpenBSD: tcp_usrreq.c,v 1.79 2004/01/31 21:09:15 henning Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/sysctl.h> 79 #include <sys/domain.h> 80 #include <sys/kernel.h> 81 82 #include <net/if.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/in_pcb.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/tcp.h> 92 #include <netinet/tcp_fsm.h> 93 #include <netinet/tcp_seq.h> 94 #include <netinet/tcp_timer.h> 95 #include <netinet/tcp_var.h> 96 #include <netinet/tcpip.h> 97 #include <netinet/tcp_debug.h> 98 99 /* 100 * TCP protocol interface to socket abstraction. 101 */ 102 extern char *tcpstates[]; 103 extern int tcptv_keep_init; 104 105 extern int tcp_rst_ppslim; 106 107 /* from in_pcb.c */ 108 extern struct baddynamicports baddynamicports; 109 110 struct inpcbtable tcbtable; 111 112 int tcp_ident(void *, size_t *, void *, size_t); 113 114 #ifdef INET6 115 int 116 tcp6_usrreq(so, req, m, nam, control, p) 117 struct socket *so; 118 int req; 119 struct mbuf *m, *nam, *control; 120 struct proc *p; 121 { 122 123 return tcp_usrreq(so, req, m, nam, control); 124 } 125 #endif 126 127 /* 128 * Process a TCP user request for TCP tb. If this is a send request 129 * then m is the mbuf chain of send data. If this is a timer expiration 130 * (called from the software clock routine), then timertype tells which timer. 131 */ 132 /*ARGSUSED*/ 133 int 134 tcp_usrreq(so, req, m, nam, control) 135 struct socket *so; 136 int req; 137 struct mbuf *m, *nam, *control; 138 { 139 struct sockaddr_in *sin; 140 struct inpcb *inp; 141 struct tcpcb *tp = NULL; 142 int s; 143 int error = 0; 144 int ostate; 145 146 if (req == PRU_CONTROL) { 147 #ifdef INET6 148 if (sotopf(so) == PF_INET6) 149 return in6_control(so, (u_long)m, (caddr_t)nam, 150 (struct ifnet *)control, 0); 151 else 152 #endif /* INET6 */ 153 return (in_control(so, (u_long)m, (caddr_t)nam, 154 (struct ifnet *)control)); 155 } 156 if (control && control->m_len) { 157 m_freem(control); 158 if (m) 159 m_freem(m); 160 return (EINVAL); 161 } 162 163 s = splsoftnet(); 164 inp = sotoinpcb(so); 165 /* 166 * When a TCP is attached to a socket, then there will be 167 * a (struct inpcb) pointed at by the socket, and this 168 * structure will point at a subsidary (struct tcpcb). 169 */ 170 if (inp == 0 && req != PRU_ATTACH) { 171 splx(s); 172 /* 173 * The following corrects an mbuf leak under rare 174 * circumstances 175 */ 176 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 177 m_freem(m); 178 return (EINVAL); /* XXX */ 179 } 180 if (inp) { 181 tp = intotcpcb(inp); 182 /* WHAT IF TP IS 0? */ 183 #ifdef KPROF 184 tcp_acounts[tp->t_state][req]++; 185 #endif 186 ostate = tp->t_state; 187 } else 188 ostate = 0; 189 switch (req) { 190 191 /* 192 * TCP attaches to socket via PRU_ATTACH, reserving space, 193 * and an internet control block. 194 */ 195 case PRU_ATTACH: 196 if (inp) { 197 error = EISCONN; 198 break; 199 } 200 error = tcp_attach(so); 201 if (error) 202 break; 203 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 204 so->so_linger = TCP_LINGERTIME; 205 tp = sototcpcb(so); 206 break; 207 208 /* 209 * PRU_DETACH detaches the TCP protocol from the socket. 210 * If the protocol state is non-embryonic, then can't 211 * do this directly: have to initiate a PRU_DISCONNECT, 212 * which may finish later; embryonic TCB's can just 213 * be discarded here. 214 */ 215 case PRU_DETACH: 216 tp = tcp_disconnect(tp); 217 break; 218 219 /* 220 * Give the socket an address. 221 */ 222 case PRU_BIND: 223 #ifdef INET6 224 if (inp->inp_flags & INP_IPV6) 225 error = in6_pcbbind(inp, nam); 226 else 227 #endif 228 error = in_pcbbind(inp, nam); 229 if (error) 230 break; 231 break; 232 233 /* 234 * Prepare to accept connections. 235 */ 236 case PRU_LISTEN: 237 if (inp->inp_lport == 0) { 238 #ifdef INET6 239 if (inp->inp_flags & INP_IPV6) 240 error = in6_pcbbind(inp, NULL); 241 else 242 #endif 243 error = in_pcbbind(inp, NULL); 244 } 245 /* If the in_pcbbind() above is called, the tp->pf 246 should still be whatever it was before. */ 247 if (error == 0) 248 tp->t_state = TCPS_LISTEN; 249 break; 250 251 /* 252 * Initiate connection to peer. 253 * Create a template for use in transmissions on this connection. 254 * Enter SYN_SENT state, and mark socket as connecting. 255 * Start keep-alive timer, and seed output sequence space. 256 * Send initial segment on connection. 257 */ 258 case PRU_CONNECT: 259 sin = mtod(nam, struct sockaddr_in *); 260 261 #ifdef INET6 262 if (sin->sin_family == AF_INET6) { 263 struct in6_addr *in6_addr = &mtod(nam, 264 struct sockaddr_in6 *)->sin6_addr; 265 266 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 267 IN6_IS_ADDR_MULTICAST(in6_addr) || 268 (IN6_IS_ADDR_V4MAPPED(in6_addr) && 269 ((in6_addr->s6_addr32[3] == INADDR_ANY) || 270 IN_MULTICAST(in6_addr->s6_addr32[3]) || 271 in_broadcast(sin->sin_addr, NULL)))) { 272 error = EINVAL; 273 break; 274 } 275 276 if (inp->inp_lport == 0) { 277 error = in6_pcbbind(inp, NULL); 278 if (error) 279 break; 280 } 281 error = in6_pcbconnect(inp, nam); 282 } else if (sin->sin_family == AF_INET) 283 #endif /* INET6 */ 284 { 285 if ((sin->sin_addr.s_addr == INADDR_ANY) || 286 IN_MULTICAST(sin->sin_addr.s_addr) || 287 in_broadcast(sin->sin_addr, NULL)) { 288 error = EINVAL; 289 break; 290 } 291 292 /* Trying to connect to some broadcast address */ 293 if (in_broadcast(sin->sin_addr, NULL)) { 294 error = EINVAL; 295 break; 296 } 297 298 if (inp->inp_lport == 0) { 299 error = in_pcbbind(inp, NULL); 300 if (error) 301 break; 302 } 303 error = in_pcbconnect(inp, nam); 304 } 305 306 if (error) 307 break; 308 309 tp->t_template = tcp_template(tp); 310 if (tp->t_template == 0) { 311 in_pcbdisconnect(inp); 312 error = ENOBUFS; 313 break; 314 } 315 316 so->so_state |= SS_CONNECTOUT; 317 /* Compute window scaling to request. */ 318 tcp_rscale(tp, so->so_rcv.sb_hiwat); 319 320 soisconnecting(so); 321 tcpstat.tcps_connattempt++; 322 tp->t_state = TCPS_SYN_SENT; 323 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 324 #ifdef TCP_COMPAT_42 325 tp->iss = tcp_iss; 326 tcp_iss += TCP_ISSINCR/2; 327 #else /* TCP_COMPAT_42 */ 328 tp->iss = tcp_rndiss_next(); 329 #endif /* !TCP_COMPAT_42 */ 330 tcp_sendseqinit(tp); 331 #if defined(TCP_SACK) 332 tp->snd_last = tp->snd_una; 333 #endif 334 #if defined(TCP_SACK) && defined(TCP_FACK) 335 tp->snd_fack = tp->snd_una; 336 tp->retran_data = 0; 337 tp->snd_awnd = 0; 338 #endif 339 error = tcp_output(tp); 340 break; 341 342 /* 343 * Create a TCP connection between two sockets. 344 */ 345 case PRU_CONNECT2: 346 error = EOPNOTSUPP; 347 break; 348 349 /* 350 * Initiate disconnect from peer. 351 * If connection never passed embryonic stage, just drop; 352 * else if don't need to let data drain, then can just drop anyways, 353 * else have to begin TCP shutdown process: mark socket disconnecting, 354 * drain unread data, state switch to reflect user close, and 355 * send segment (e.g. FIN) to peer. Socket will be really disconnected 356 * when peer sends FIN and acks ours. 357 * 358 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 359 */ 360 case PRU_DISCONNECT: 361 tp = tcp_disconnect(tp); 362 break; 363 364 /* 365 * Accept a connection. Essentially all the work is 366 * done at higher levels; just return the address 367 * of the peer, storing through addr. 368 */ 369 case PRU_ACCEPT: 370 #ifdef INET6 371 if (inp->inp_flags & INP_IPV6) 372 in6_setpeeraddr(inp, nam); 373 else 374 #endif 375 in_setpeeraddr(inp, nam); 376 break; 377 378 /* 379 * Mark the connection as being incapable of further output. 380 */ 381 case PRU_SHUTDOWN: 382 if (so->so_state & SS_CANTSENDMORE) 383 break; 384 socantsendmore(so); 385 tp = tcp_usrclosed(tp); 386 if (tp) 387 error = tcp_output(tp); 388 break; 389 390 /* 391 * After a receive, possibly send window update to peer. 392 */ 393 case PRU_RCVD: 394 /* 395 * soreceive() calls this function when a user receives 396 * ancillary data on a listening socket. We don't call 397 * tcp_output in such a case, since there is no header 398 * template for a listening socket and hence the kernel 399 * will panic. 400 */ 401 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 402 (void) tcp_output(tp); 403 break; 404 405 /* 406 * Do a send by putting data in output queue and updating urgent 407 * marker if URG set. Possibly send more data. 408 */ 409 case PRU_SEND: 410 sbappendstream(&so->so_snd, m); 411 error = tcp_output(tp); 412 break; 413 414 /* 415 * Abort the TCP. 416 */ 417 case PRU_ABORT: 418 tp = tcp_drop(tp, ECONNABORTED); 419 break; 420 421 case PRU_SENSE: 422 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 423 splx(s); 424 return (0); 425 426 case PRU_RCVOOB: 427 if ((so->so_oobmark == 0 && 428 (so->so_state & SS_RCVATMARK) == 0) || 429 so->so_options & SO_OOBINLINE || 430 tp->t_oobflags & TCPOOB_HADDATA) { 431 error = EINVAL; 432 break; 433 } 434 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 435 error = EWOULDBLOCK; 436 break; 437 } 438 m->m_len = 1; 439 *mtod(m, caddr_t) = tp->t_iobc; 440 if (((long)nam & MSG_PEEK) == 0) 441 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 442 break; 443 444 case PRU_SENDOOB: 445 if (sbspace(&so->so_snd) < -512) { 446 m_freem(m); 447 error = ENOBUFS; 448 break; 449 } 450 /* 451 * According to RFC961 (Assigned Protocols), 452 * the urgent pointer points to the last octet 453 * of urgent data. We continue, however, 454 * to consider it to indicate the first octet 455 * of data past the urgent section. 456 * Otherwise, snd_up should be one lower. 457 */ 458 sbappendstream(&so->so_snd, m); 459 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 460 tp->t_force = 1; 461 error = tcp_output(tp); 462 tp->t_force = 0; 463 break; 464 465 case PRU_SOCKADDR: 466 #ifdef INET6 467 if (inp->inp_flags & INP_IPV6) 468 in6_setsockaddr(inp, nam); 469 else 470 #endif 471 in_setsockaddr(inp, nam); 472 break; 473 474 case PRU_PEERADDR: 475 #ifdef INET6 476 if (inp->inp_flags & INP_IPV6) 477 in6_setpeeraddr(inp, nam); 478 else 479 #endif 480 in_setpeeraddr(inp, nam); 481 break; 482 483 default: 484 panic("tcp_usrreq"); 485 } 486 if (tp && (so->so_options & SO_DEBUG)) 487 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 488 splx(s); 489 return (error); 490 } 491 492 int 493 tcp_ctloutput(op, so, level, optname, mp) 494 int op; 495 struct socket *so; 496 int level, optname; 497 struct mbuf **mp; 498 { 499 int error = 0, s; 500 struct inpcb *inp; 501 struct tcpcb *tp; 502 struct mbuf *m; 503 int i; 504 505 s = splsoftnet(); 506 inp = sotoinpcb(so); 507 if (inp == NULL) { 508 splx(s); 509 if (op == PRCO_SETOPT && *mp) 510 (void) m_free(*mp); 511 return (ECONNRESET); 512 } 513 #ifdef INET6 514 tp = intotcpcb(inp); 515 #endif /* INET6 */ 516 if (level != IPPROTO_TCP) { 517 switch (so->so_proto->pr_domain->dom_family) { 518 #ifdef INET6 519 case PF_INET6: 520 error = ip6_ctloutput(op, so, level, optname, mp); 521 break; 522 #endif /* INET6 */ 523 case PF_INET: 524 error = ip_ctloutput(op, so, level, optname, mp); 525 break; 526 default: 527 error = EAFNOSUPPORT; /*?*/ 528 break; 529 } 530 splx(s); 531 return (error); 532 } 533 #ifndef INET6 534 tp = intotcpcb(inp); 535 #endif /* !INET6 */ 536 537 switch (op) { 538 539 case PRCO_SETOPT: 540 m = *mp; 541 switch (optname) { 542 543 case TCP_NODELAY: 544 if (m == NULL || m->m_len < sizeof (int)) 545 error = EINVAL; 546 else if (*mtod(m, int *)) 547 tp->t_flags |= TF_NODELAY; 548 else 549 tp->t_flags &= ~TF_NODELAY; 550 break; 551 552 case TCP_MAXSEG: 553 if (m == NULL || m->m_len < sizeof (int)) { 554 error = EINVAL; 555 break; 556 } 557 558 i = *mtod(m, int *); 559 if (i > 0 && i <= tp->t_maxseg) 560 tp->t_maxseg = i; 561 else 562 error = EINVAL; 563 break; 564 565 #ifdef TCP_SACK 566 case TCP_SACK_ENABLE: 567 if (m == NULL || m->m_len < sizeof (int)) { 568 error = EINVAL; 569 break; 570 } 571 572 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 573 error = EPERM; 574 break; 575 } 576 577 if (tp->t_flags & TF_SIGNATURE) { 578 error = EPERM; 579 break; 580 } 581 582 if (*mtod(m, int *)) 583 tp->sack_enable = 1; 584 else 585 tp->sack_enable = 0; 586 break; 587 #endif 588 #ifdef TCP_SIGNATURE 589 case TCP_MD5SIG: 590 if (m == NULL || m->m_len < sizeof (int)) { 591 error = EINVAL; 592 break; 593 } 594 595 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 596 error = EPERM; 597 break; 598 } 599 600 if (*mtod(m, int *)) { 601 tp->t_flags |= TF_SIGNATURE; 602 #ifdef TCP_SACK 603 tp->sack_enable = 0; 604 #endif /* TCP_SACK */ 605 } else 606 tp->t_flags &= ~TF_SIGNATURE; 607 break; 608 #endif /* TCP_SIGNATURE */ 609 default: 610 error = ENOPROTOOPT; 611 break; 612 } 613 if (m) 614 (void) m_free(m); 615 break; 616 617 case PRCO_GETOPT: 618 *mp = m = m_get(M_WAIT, MT_SOOPTS); 619 m->m_len = sizeof(int); 620 621 switch (optname) { 622 case TCP_NODELAY: 623 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 624 break; 625 case TCP_MAXSEG: 626 *mtod(m, int *) = tp->t_maxseg; 627 break; 628 #ifdef TCP_SACK 629 case TCP_SACK_ENABLE: 630 *mtod(m, int *) = tp->sack_enable; 631 break; 632 #endif 633 #ifdef TCP_SIGNATURE 634 case TCP_MD5SIG: 635 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 636 break; 637 #endif 638 default: 639 error = ENOPROTOOPT; 640 break; 641 } 642 break; 643 } 644 splx(s); 645 return (error); 646 } 647 648 #ifndef TCP_SENDSPACE 649 #define TCP_SENDSPACE 1024*16 650 #endif 651 u_int tcp_sendspace = TCP_SENDSPACE; 652 #ifndef TCP_RECVSPACE 653 #define TCP_RECVSPACE 1024*16 654 #endif 655 u_int tcp_recvspace = TCP_RECVSPACE; 656 657 /* 658 * Attach TCP protocol to socket, allocating 659 * internet protocol control block, tcp control block, 660 * bufer space, and entering LISTEN state if to accept connections. 661 */ 662 int 663 tcp_attach(so) 664 struct socket *so; 665 { 666 struct tcpcb *tp; 667 struct inpcb *inp; 668 int error; 669 670 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 671 error = soreserve(so, tcp_sendspace, tcp_recvspace); 672 if (error) 673 return (error); 674 } 675 error = in_pcballoc(so, &tcbtable); 676 if (error) 677 return (error); 678 inp = sotoinpcb(so); 679 tp = tcp_newtcpcb(inp); 680 if (tp == NULL) { 681 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 682 683 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 684 in_pcbdetach(inp); 685 so->so_state |= nofd; 686 return (ENOBUFS); 687 } 688 tp->t_state = TCPS_CLOSED; 689 #ifdef INET6 690 /* we disallow IPv4 mapped address completely. */ 691 if (inp->inp_flags & INP_IPV6) 692 tp->pf = PF_INET6; 693 else 694 tp->pf = PF_INET; 695 #else 696 tp->pf = PF_INET; 697 #endif 698 return (0); 699 } 700 701 /* 702 * Initiate (or continue) disconnect. 703 * If embryonic state, just send reset (once). 704 * If in ``let data drain'' option and linger null, just drop. 705 * Otherwise (hard), mark socket disconnecting and drop 706 * current input data; switch states based on user close, and 707 * send segment to peer (with FIN). 708 */ 709 struct tcpcb * 710 tcp_disconnect(tp) 711 struct tcpcb *tp; 712 { 713 struct socket *so = tp->t_inpcb->inp_socket; 714 715 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 716 tp = tcp_close(tp); 717 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 718 tp = tcp_drop(tp, 0); 719 else { 720 soisdisconnecting(so); 721 sbflush(&so->so_rcv); 722 tp = tcp_usrclosed(tp); 723 if (tp) 724 (void) tcp_output(tp); 725 } 726 return (tp); 727 } 728 729 /* 730 * User issued close, and wish to trail through shutdown states: 731 * if never received SYN, just forget it. If got a SYN from peer, 732 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 733 * If already got a FIN from peer, then almost done; go to LAST_ACK 734 * state. In all other cases, have already sent FIN to peer (e.g. 735 * after PRU_SHUTDOWN), and just have to play tedious game waiting 736 * for peer to send FIN or not respond to keep-alives, etc. 737 * We can let the user exit from the close as soon as the FIN is acked. 738 */ 739 struct tcpcb * 740 tcp_usrclosed(tp) 741 struct tcpcb *tp; 742 { 743 744 switch (tp->t_state) { 745 746 case TCPS_CLOSED: 747 case TCPS_LISTEN: 748 case TCPS_SYN_SENT: 749 tp->t_state = TCPS_CLOSED; 750 tp = tcp_close(tp); 751 break; 752 753 case TCPS_SYN_RECEIVED: 754 case TCPS_ESTABLISHED: 755 tp->t_state = TCPS_FIN_WAIT_1; 756 break; 757 758 case TCPS_CLOSE_WAIT: 759 tp->t_state = TCPS_LAST_ACK; 760 break; 761 } 762 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 763 soisdisconnected(tp->t_inpcb->inp_socket); 764 /* 765 * If we are in FIN_WAIT_2, we arrived here because the 766 * application did a shutdown of the send side. Like the 767 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 768 * a full close, we start a timer to make sure sockets are 769 * not left in FIN_WAIT_2 forever. 770 */ 771 if (tp->t_state == TCPS_FIN_WAIT_2) 772 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 773 } 774 return (tp); 775 } 776 777 /* 778 * Look up a socket for ident.. 779 */ 780 int 781 tcp_ident(oldp, oldlenp, newp, newlen) 782 void *oldp; 783 size_t *oldlenp; 784 void *newp; 785 size_t newlen; 786 { 787 int error = 0, s; 788 struct tcp_ident_mapping tir; 789 struct inpcb *inp; 790 struct sockaddr_in *fin, *lin; 791 #ifdef INET6 792 struct sockaddr_in6 *fin6, *lin6; 793 struct in6_addr f6, l6; 794 #endif 795 796 if (oldp == NULL || newp != NULL || newlen != 0) 797 return (EINVAL); 798 if (*oldlenp < sizeof(tir)) 799 return (ENOMEM); 800 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 801 return (error); 802 switch (tir.faddr.ss_family) { 803 #ifdef INET6 804 case AF_INET6: 805 fin6 = (struct sockaddr_in6 *)&tir.faddr; 806 error = in6_embedscope(&f6, fin6, NULL, NULL); 807 if (error) 808 return EINVAL; /*?*/ 809 lin6 = (struct sockaddr_in6 *)&tir.laddr; 810 error = in6_embedscope(&l6, lin6, NULL, NULL); 811 if (error) 812 return EINVAL; /*?*/ 813 break; 814 #endif 815 case AF_INET: 816 fin = (struct sockaddr_in *)&tir.faddr; 817 lin = (struct sockaddr_in *)&tir.laddr; 818 break; 819 default: 820 return (EINVAL); 821 } 822 823 s = splsoftnet(); 824 switch (tir.faddr.ss_family) { 825 case AF_INET6: 826 #ifdef INET6 827 inp = in6_pcbhashlookup(&tcbtable, &f6, 828 fin6->sin6_port, &l6, lin6->sin6_port); 829 break; 830 #endif 831 case AF_INET: 832 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 833 fin->sin_port, lin->sin_addr, lin->sin_port); 834 break; 835 } 836 837 if (inp == NULL) { 838 ++tcpstat.tcps_pcbhashmiss; 839 switch (tir.faddr.ss_family) { 840 #ifdef INET6 841 case AF_INET6: 842 inp = in6_pcblookup_listen(&tcbtable, 843 &l6, lin6->sin6_port, 0); 844 break; 845 #endif 846 case AF_INET: 847 inp = in_pcblookup_listen(&tcbtable, 848 lin->sin_addr, lin->sin_port, 0); 849 break; 850 } 851 } 852 853 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 854 tir.ruid = inp->inp_socket->so_ruid; 855 tir.euid = inp->inp_socket->so_euid; 856 } else { 857 tir.ruid = -1; 858 tir.euid = -1; 859 } 860 splx(s); 861 862 *oldlenp = sizeof (tir); 863 error = copyout((void *)&tir, oldp, sizeof (tir)); 864 return (error); 865 } 866 867 /* 868 * Sysctl for tcp variables. 869 */ 870 int 871 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 872 int *name; 873 u_int namelen; 874 void *oldp; 875 size_t *oldlenp; 876 void *newp; 877 size_t newlen; 878 { 879 880 /* All sysctl names at this level are terminal. */ 881 if (namelen != 1) 882 return (ENOTDIR); 883 884 switch (name[0]) { 885 case TCPCTL_RFC1323: 886 return (sysctl_int(oldp, oldlenp, newp, newlen, 887 &tcp_do_rfc1323)); 888 #ifdef TCP_SACK 889 case TCPCTL_SACK: 890 return (sysctl_int(oldp, oldlenp, newp, newlen, 891 &tcp_do_sack)); 892 #endif 893 case TCPCTL_MSSDFLT: 894 return (sysctl_int(oldp, oldlenp, newp, newlen, 895 &tcp_mssdflt)); 896 case TCPCTL_KEEPINITTIME: 897 return (sysctl_int(oldp, oldlenp, newp, newlen, 898 &tcptv_keep_init)); 899 900 case TCPCTL_KEEPIDLE: 901 return (sysctl_int(oldp, oldlenp, newp, newlen, 902 &tcp_keepidle)); 903 904 case TCPCTL_KEEPINTVL: 905 return (sysctl_int(oldp, oldlenp, newp, newlen, 906 &tcp_keepintvl)); 907 908 case TCPCTL_SLOWHZ: 909 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 910 911 case TCPCTL_BADDYNAMIC: 912 return (sysctl_struct(oldp, oldlenp, newp, newlen, 913 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 914 915 case TCPCTL_RECVSPACE: 916 return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace)); 917 918 case TCPCTL_SENDSPACE: 919 return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace)); 920 case TCPCTL_IDENT: 921 return (tcp_ident(oldp, oldlenp, newp, newlen)); 922 case TCPCTL_RSTPPSLIMIT: 923 return (sysctl_int(oldp, oldlenp, newp, newlen, 924 &tcp_rst_ppslim)); 925 case TCPCTL_ACK_ON_PUSH: 926 return (sysctl_int(oldp, oldlenp, newp, newlen, 927 &tcp_ack_on_push)); 928 #ifdef TCP_ECN 929 case TCPCTL_ECN: 930 return (sysctl_int(oldp, oldlenp, newp, newlen, 931 &tcp_do_ecn)); 932 #endif 933 case TCPCTL_SYN_CACHE_LIMIT: 934 return (sysctl_int(oldp, oldlenp, newp, newlen, 935 &tcp_syn_cache_limit)); 936 case TCPCTL_SYN_BUCKET_LIMIT: 937 return (sysctl_int(oldp, oldlenp, newp, newlen, 938 &tcp_syn_bucket_limit)); 939 case TCPCTL_RFC3390: 940 return (sysctl_int(oldp, oldlenp, newp, newlen, 941 &tcp_do_rfc3390)); 942 default: 943 return (ENOPROTOOPT); 944 } 945 /* NOTREACHED */ 946 } 947