1 /* $OpenBSD: tcp_usrreq.c,v 1.85 2004/04/27 17:51:33 otto Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/sysctl.h> 79 #include <sys/domain.h> 80 #include <sys/kernel.h> 81 82 #include <net/if.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/in_pcb.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/tcp.h> 92 #include <netinet/tcp_fsm.h> 93 #include <netinet/tcp_seq.h> 94 #include <netinet/tcp_timer.h> 95 #include <netinet/tcp_var.h> 96 #include <netinet/tcpip.h> 97 #include <netinet/tcp_debug.h> 98 99 /* 100 * TCP protocol interface to socket abstraction. 101 */ 102 extern char *tcpstates[]; 103 extern int tcptv_keep_init; 104 105 extern int tcp_rst_ppslim; 106 107 /* from in_pcb.c */ 108 extern struct baddynamicports baddynamicports; 109 110 #ifndef TCP_SENDSPACE 111 #define TCP_SENDSPACE 1024*16 112 #endif 113 u_int tcp_sendspace = TCP_SENDSPACE; 114 #ifndef TCP_RECVSPACE 115 #define TCP_RECVSPACE 1024*16 116 #endif 117 u_int tcp_recvspace = TCP_RECVSPACE; 118 119 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 120 121 struct inpcbtable tcbtable; 122 123 int tcp_ident(void *, size_t *, void *, size_t, int); 124 125 #ifdef INET6 126 int 127 tcp6_usrreq(so, req, m, nam, control, p) 128 struct socket *so; 129 int req; 130 struct mbuf *m, *nam, *control; 131 struct proc *p; 132 { 133 134 return tcp_usrreq(so, req, m, nam, control); 135 } 136 #endif 137 138 /* 139 * Process a TCP user request for TCP tb. If this is a send request 140 * then m is the mbuf chain of send data. If this is a timer expiration 141 * (called from the software clock routine), then timertype tells which timer. 142 */ 143 /*ARGSUSED*/ 144 int 145 tcp_usrreq(so, req, m, nam, control) 146 struct socket *so; 147 int req; 148 struct mbuf *m, *nam, *control; 149 { 150 struct sockaddr_in *sin; 151 struct inpcb *inp; 152 struct tcpcb *tp = NULL; 153 int s; 154 int error = 0; 155 int ostate; 156 157 if (req == PRU_CONTROL) { 158 #ifdef INET6 159 if (sotopf(so) == PF_INET6) 160 return in6_control(so, (u_long)m, (caddr_t)nam, 161 (struct ifnet *)control, 0); 162 else 163 #endif /* INET6 */ 164 return (in_control(so, (u_long)m, (caddr_t)nam, 165 (struct ifnet *)control)); 166 } 167 if (control && control->m_len) { 168 m_freem(control); 169 if (m) 170 m_freem(m); 171 return (EINVAL); 172 } 173 174 s = splsoftnet(); 175 inp = sotoinpcb(so); 176 /* 177 * When a TCP is attached to a socket, then there will be 178 * a (struct inpcb) pointed at by the socket, and this 179 * structure will point at a subsidiary (struct tcpcb). 180 */ 181 if (inp == 0 && req != PRU_ATTACH) { 182 splx(s); 183 /* 184 * The following corrects an mbuf leak under rare 185 * circumstances 186 */ 187 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 188 m_freem(m); 189 return (EINVAL); /* XXX */ 190 } 191 if (inp) { 192 tp = intotcpcb(inp); 193 /* WHAT IF TP IS 0? */ 194 #ifdef KPROF 195 tcp_acounts[tp->t_state][req]++; 196 #endif 197 ostate = tp->t_state; 198 } else 199 ostate = 0; 200 switch (req) { 201 202 /* 203 * TCP attaches to socket via PRU_ATTACH, reserving space, 204 * and an internet control block. 205 */ 206 case PRU_ATTACH: 207 if (inp) { 208 error = EISCONN; 209 break; 210 } 211 error = tcp_attach(so); 212 if (error) 213 break; 214 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 215 so->so_linger = TCP_LINGERTIME; 216 tp = sototcpcb(so); 217 break; 218 219 /* 220 * PRU_DETACH detaches the TCP protocol from the socket. 221 * If the protocol state is non-embryonic, then can't 222 * do this directly: have to initiate a PRU_DISCONNECT, 223 * which may finish later; embryonic TCB's can just 224 * be discarded here. 225 */ 226 case PRU_DETACH: 227 tp = tcp_disconnect(tp); 228 break; 229 230 /* 231 * Give the socket an address. 232 */ 233 case PRU_BIND: 234 #ifdef INET6 235 if (inp->inp_flags & INP_IPV6) 236 error = in6_pcbbind(inp, nam); 237 else 238 #endif 239 error = in_pcbbind(inp, nam); 240 if (error) 241 break; 242 break; 243 244 /* 245 * Prepare to accept connections. 246 */ 247 case PRU_LISTEN: 248 if (inp->inp_lport == 0) { 249 #ifdef INET6 250 if (inp->inp_flags & INP_IPV6) 251 error = in6_pcbbind(inp, NULL); 252 else 253 #endif 254 error = in_pcbbind(inp, NULL); 255 } 256 /* If the in_pcbbind() above is called, the tp->pf 257 should still be whatever it was before. */ 258 if (error == 0) 259 tp->t_state = TCPS_LISTEN; 260 break; 261 262 /* 263 * Initiate connection to peer. 264 * Create a template for use in transmissions on this connection. 265 * Enter SYN_SENT state, and mark socket as connecting. 266 * Start keep-alive timer, and seed output sequence space. 267 * Send initial segment on connection. 268 */ 269 case PRU_CONNECT: 270 sin = mtod(nam, struct sockaddr_in *); 271 272 #ifdef INET6 273 if (sin->sin_family == AF_INET6) { 274 struct in6_addr *in6_addr = &mtod(nam, 275 struct sockaddr_in6 *)->sin6_addr; 276 277 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 278 IN6_IS_ADDR_MULTICAST(in6_addr) || 279 (IN6_IS_ADDR_V4MAPPED(in6_addr) && 280 ((in6_addr->s6_addr32[3] == INADDR_ANY) || 281 IN_MULTICAST(in6_addr->s6_addr32[3]) || 282 in_broadcast(sin->sin_addr, NULL)))) { 283 error = EINVAL; 284 break; 285 } 286 287 if (inp->inp_lport == 0) { 288 error = in6_pcbbind(inp, NULL); 289 if (error) 290 break; 291 } 292 error = in6_pcbconnect(inp, nam); 293 } else if (sin->sin_family == AF_INET) 294 #endif /* INET6 */ 295 { 296 if ((sin->sin_addr.s_addr == INADDR_ANY) || 297 IN_MULTICAST(sin->sin_addr.s_addr) || 298 in_broadcast(sin->sin_addr, NULL)) { 299 error = EINVAL; 300 break; 301 } 302 303 if (inp->inp_lport == 0) { 304 error = in_pcbbind(inp, NULL); 305 if (error) 306 break; 307 } 308 error = in_pcbconnect(inp, nam); 309 } 310 311 if (error) 312 break; 313 314 tp->t_template = tcp_template(tp); 315 if (tp->t_template == 0) { 316 in_pcbdisconnect(inp); 317 error = ENOBUFS; 318 break; 319 } 320 321 so->so_state |= SS_CONNECTOUT; 322 /* Compute window scaling to request. */ 323 tcp_rscale(tp, so->so_rcv.sb_hiwat); 324 325 soisconnecting(so); 326 tcpstat.tcps_connattempt++; 327 tp->t_state = TCPS_SYN_SENT; 328 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 329 #ifdef TCP_COMPAT_42 330 tp->iss = tcp_iss; 331 tcp_iss += TCP_ISSINCR/2; 332 #else /* TCP_COMPAT_42 */ 333 tp->iss = tcp_rndiss_next(); 334 #endif /* !TCP_COMPAT_42 */ 335 tcp_sendseqinit(tp); 336 #if defined(TCP_SACK) 337 tp->snd_last = tp->snd_una; 338 #endif 339 #if defined(TCP_SACK) && defined(TCP_FACK) 340 tp->snd_fack = tp->snd_una; 341 tp->retran_data = 0; 342 tp->snd_awnd = 0; 343 #endif 344 error = tcp_output(tp); 345 break; 346 347 /* 348 * Create a TCP connection between two sockets. 349 */ 350 case PRU_CONNECT2: 351 error = EOPNOTSUPP; 352 break; 353 354 /* 355 * Initiate disconnect from peer. 356 * If connection never passed embryonic stage, just drop; 357 * else if don't need to let data drain, then can just drop anyways, 358 * else have to begin TCP shutdown process: mark socket disconnecting, 359 * drain unread data, state switch to reflect user close, and 360 * send segment (e.g. FIN) to peer. Socket will be really disconnected 361 * when peer sends FIN and acks ours. 362 * 363 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 364 */ 365 case PRU_DISCONNECT: 366 tp = tcp_disconnect(tp); 367 break; 368 369 /* 370 * Accept a connection. Essentially all the work is 371 * done at higher levels; just return the address 372 * of the peer, storing through addr. 373 */ 374 case PRU_ACCEPT: 375 #ifdef INET6 376 if (inp->inp_flags & INP_IPV6) 377 in6_setpeeraddr(inp, nam); 378 else 379 #endif 380 in_setpeeraddr(inp, nam); 381 break; 382 383 /* 384 * Mark the connection as being incapable of further output. 385 */ 386 case PRU_SHUTDOWN: 387 if (so->so_state & SS_CANTSENDMORE) 388 break; 389 socantsendmore(so); 390 tp = tcp_usrclosed(tp); 391 if (tp) 392 error = tcp_output(tp); 393 break; 394 395 /* 396 * After a receive, possibly send window update to peer. 397 */ 398 case PRU_RCVD: 399 /* 400 * soreceive() calls this function when a user receives 401 * ancillary data on a listening socket. We don't call 402 * tcp_output in such a case, since there is no header 403 * template for a listening socket and hence the kernel 404 * will panic. 405 */ 406 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 407 (void) tcp_output(tp); 408 break; 409 410 /* 411 * Do a send by putting data in output queue and updating urgent 412 * marker if URG set. Possibly send more data. 413 */ 414 case PRU_SEND: 415 sbappendstream(&so->so_snd, m); 416 error = tcp_output(tp); 417 break; 418 419 /* 420 * Abort the TCP. 421 */ 422 case PRU_ABORT: 423 tp = tcp_drop(tp, ECONNABORTED); 424 break; 425 426 case PRU_SENSE: 427 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 428 splx(s); 429 return (0); 430 431 case PRU_RCVOOB: 432 if ((so->so_oobmark == 0 && 433 (so->so_state & SS_RCVATMARK) == 0) || 434 so->so_options & SO_OOBINLINE || 435 tp->t_oobflags & TCPOOB_HADDATA) { 436 error = EINVAL; 437 break; 438 } 439 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 440 error = EWOULDBLOCK; 441 break; 442 } 443 m->m_len = 1; 444 *mtod(m, caddr_t) = tp->t_iobc; 445 if (((long)nam & MSG_PEEK) == 0) 446 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 447 break; 448 449 case PRU_SENDOOB: 450 if (sbspace(&so->so_snd) < -512) { 451 m_freem(m); 452 error = ENOBUFS; 453 break; 454 } 455 /* 456 * According to RFC961 (Assigned Protocols), 457 * the urgent pointer points to the last octet 458 * of urgent data. We continue, however, 459 * to consider it to indicate the first octet 460 * of data past the urgent section. 461 * Otherwise, snd_up should be one lower. 462 */ 463 sbappendstream(&so->so_snd, m); 464 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 465 tp->t_force = 1; 466 error = tcp_output(tp); 467 tp->t_force = 0; 468 break; 469 470 case PRU_SOCKADDR: 471 #ifdef INET6 472 if (inp->inp_flags & INP_IPV6) 473 in6_setsockaddr(inp, nam); 474 else 475 #endif 476 in_setsockaddr(inp, nam); 477 break; 478 479 case PRU_PEERADDR: 480 #ifdef INET6 481 if (inp->inp_flags & INP_IPV6) 482 in6_setpeeraddr(inp, nam); 483 else 484 #endif 485 in_setpeeraddr(inp, nam); 486 break; 487 488 default: 489 panic("tcp_usrreq"); 490 } 491 if (tp && (so->so_options & SO_DEBUG)) 492 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 493 splx(s); 494 return (error); 495 } 496 497 int 498 tcp_ctloutput(op, so, level, optname, mp) 499 int op; 500 struct socket *so; 501 int level, optname; 502 struct mbuf **mp; 503 { 504 int error = 0, s; 505 struct inpcb *inp; 506 struct tcpcb *tp; 507 struct mbuf *m; 508 int i; 509 510 s = splsoftnet(); 511 inp = sotoinpcb(so); 512 if (inp == NULL) { 513 splx(s); 514 if (op == PRCO_SETOPT && *mp) 515 (void) m_free(*mp); 516 return (ECONNRESET); 517 } 518 #ifdef INET6 519 tp = intotcpcb(inp); 520 #endif /* INET6 */ 521 if (level != IPPROTO_TCP) { 522 switch (so->so_proto->pr_domain->dom_family) { 523 #ifdef INET6 524 case PF_INET6: 525 error = ip6_ctloutput(op, so, level, optname, mp); 526 break; 527 #endif /* INET6 */ 528 case PF_INET: 529 error = ip_ctloutput(op, so, level, optname, mp); 530 break; 531 default: 532 error = EAFNOSUPPORT; /*?*/ 533 break; 534 } 535 splx(s); 536 return (error); 537 } 538 #ifndef INET6 539 tp = intotcpcb(inp); 540 #endif /* !INET6 */ 541 542 switch (op) { 543 544 case PRCO_SETOPT: 545 m = *mp; 546 switch (optname) { 547 548 case TCP_NODELAY: 549 if (m == NULL || m->m_len < sizeof (int)) 550 error = EINVAL; 551 else if (*mtod(m, int *)) 552 tp->t_flags |= TF_NODELAY; 553 else 554 tp->t_flags &= ~TF_NODELAY; 555 break; 556 557 case TCP_MAXSEG: 558 if (m == NULL || m->m_len < sizeof (int)) { 559 error = EINVAL; 560 break; 561 } 562 563 i = *mtod(m, int *); 564 if (i > 0 && i <= tp->t_maxseg) 565 tp->t_maxseg = i; 566 else 567 error = EINVAL; 568 break; 569 570 #ifdef TCP_SACK 571 case TCP_SACK_ENABLE: 572 if (m == NULL || m->m_len < sizeof (int)) { 573 error = EINVAL; 574 break; 575 } 576 577 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 578 error = EPERM; 579 break; 580 } 581 582 if (tp->t_flags & TF_SIGNATURE) { 583 error = EPERM; 584 break; 585 } 586 587 if (*mtod(m, int *)) 588 tp->sack_enable = 1; 589 else 590 tp->sack_enable = 0; 591 break; 592 #endif 593 #ifdef TCP_SIGNATURE 594 case TCP_MD5SIG: 595 if (m == NULL || m->m_len < sizeof (int)) { 596 error = EINVAL; 597 break; 598 } 599 600 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 601 error = EPERM; 602 break; 603 } 604 605 if (*mtod(m, int *)) { 606 tp->t_flags |= TF_SIGNATURE; 607 #ifdef TCP_SACK 608 tp->sack_enable = 0; 609 #endif /* TCP_SACK */ 610 } else 611 tp->t_flags &= ~TF_SIGNATURE; 612 break; 613 #endif /* TCP_SIGNATURE */ 614 default: 615 error = ENOPROTOOPT; 616 break; 617 } 618 if (m) 619 (void) m_free(m); 620 break; 621 622 case PRCO_GETOPT: 623 *mp = m = m_get(M_WAIT, MT_SOOPTS); 624 m->m_len = sizeof(int); 625 626 switch (optname) { 627 case TCP_NODELAY: 628 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 629 break; 630 case TCP_MAXSEG: 631 *mtod(m, int *) = tp->t_maxseg; 632 break; 633 #ifdef TCP_SACK 634 case TCP_SACK_ENABLE: 635 *mtod(m, int *) = tp->sack_enable; 636 break; 637 #endif 638 #ifdef TCP_SIGNATURE 639 case TCP_MD5SIG: 640 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 641 break; 642 #endif 643 default: 644 error = ENOPROTOOPT; 645 break; 646 } 647 break; 648 } 649 splx(s); 650 return (error); 651 } 652 653 /* 654 * Attach TCP protocol to socket, allocating 655 * internet protocol control block, tcp control block, 656 * bufer space, and entering LISTEN state if to accept connections. 657 */ 658 int 659 tcp_attach(so) 660 struct socket *so; 661 { 662 struct tcpcb *tp; 663 struct inpcb *inp; 664 int error; 665 666 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 667 error = soreserve(so, tcp_sendspace, tcp_recvspace); 668 if (error) 669 return (error); 670 } 671 error = in_pcballoc(so, &tcbtable); 672 if (error) 673 return (error); 674 inp = sotoinpcb(so); 675 tp = tcp_newtcpcb(inp); 676 if (tp == NULL) { 677 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 678 679 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 680 in_pcbdetach(inp); 681 so->so_state |= nofd; 682 return (ENOBUFS); 683 } 684 tp->t_state = TCPS_CLOSED; 685 #ifdef INET6 686 /* we disallow IPv4 mapped address completely. */ 687 if (inp->inp_flags & INP_IPV6) 688 tp->pf = PF_INET6; 689 else 690 tp->pf = PF_INET; 691 #else 692 tp->pf = PF_INET; 693 #endif 694 return (0); 695 } 696 697 /* 698 * Initiate (or continue) disconnect. 699 * If embryonic state, just send reset (once). 700 * If in ``let data drain'' option and linger null, just drop. 701 * Otherwise (hard), mark socket disconnecting and drop 702 * current input data; switch states based on user close, and 703 * send segment to peer (with FIN). 704 */ 705 struct tcpcb * 706 tcp_disconnect(tp) 707 struct tcpcb *tp; 708 { 709 struct socket *so = tp->t_inpcb->inp_socket; 710 711 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 712 tp = tcp_close(tp); 713 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 714 tp = tcp_drop(tp, 0); 715 else { 716 soisdisconnecting(so); 717 sbflush(&so->so_rcv); 718 tp = tcp_usrclosed(tp); 719 if (tp) 720 (void) tcp_output(tp); 721 } 722 return (tp); 723 } 724 725 /* 726 * User issued close, and wish to trail through shutdown states: 727 * if never received SYN, just forget it. If got a SYN from peer, 728 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 729 * If already got a FIN from peer, then almost done; go to LAST_ACK 730 * state. In all other cases, have already sent FIN to peer (e.g. 731 * after PRU_SHUTDOWN), and just have to play tedious game waiting 732 * for peer to send FIN or not respond to keep-alives, etc. 733 * We can let the user exit from the close as soon as the FIN is acked. 734 */ 735 struct tcpcb * 736 tcp_usrclosed(tp) 737 struct tcpcb *tp; 738 { 739 740 switch (tp->t_state) { 741 742 case TCPS_CLOSED: 743 case TCPS_LISTEN: 744 case TCPS_SYN_SENT: 745 tp->t_state = TCPS_CLOSED; 746 tp = tcp_close(tp); 747 break; 748 749 case TCPS_SYN_RECEIVED: 750 case TCPS_ESTABLISHED: 751 tp->t_state = TCPS_FIN_WAIT_1; 752 break; 753 754 case TCPS_CLOSE_WAIT: 755 tp->t_state = TCPS_LAST_ACK; 756 break; 757 } 758 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 759 soisdisconnected(tp->t_inpcb->inp_socket); 760 /* 761 * If we are in FIN_WAIT_2, we arrived here because the 762 * application did a shutdown of the send side. Like the 763 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 764 * a full close, we start a timer to make sure sockets are 765 * not left in FIN_WAIT_2 forever. 766 */ 767 if (tp->t_state == TCPS_FIN_WAIT_2) 768 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 769 } 770 return (tp); 771 } 772 773 /* 774 * Look up a socket for ident or tcpdrop, ... 775 */ 776 int 777 tcp_ident(oldp, oldlenp, newp, newlen, dodrop) 778 void *oldp; 779 size_t *oldlenp; 780 void *newp; 781 size_t newlen; 782 int dodrop; 783 { 784 int error = 0, s; 785 struct tcp_ident_mapping tir; 786 struct inpcb *inp; 787 struct tcpcb *tp = NULL; 788 struct sockaddr_in *fin, *lin; 789 #ifdef INET6 790 struct sockaddr_in6 *fin6, *lin6; 791 struct in6_addr f6, l6; 792 #endif 793 if (dodrop) { 794 if (oldp != NULL || *oldlenp != 0) 795 return (EINVAL); 796 if (newp == NULL) 797 return (EPERM); 798 if (newlen < sizeof(tir)) 799 return (ENOMEM); 800 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 801 return (error); 802 } else { 803 if (oldp == NULL) 804 return (EINVAL); 805 if (*oldlenp < sizeof(tir)) 806 return (ENOMEM); 807 if (newp != NULL || newlen != 0) 808 return (EINVAL); 809 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 810 return (error); 811 } 812 switch (tir.faddr.ss_family) { 813 #ifdef INET6 814 case AF_INET6: 815 fin6 = (struct sockaddr_in6 *)&tir.faddr; 816 error = in6_embedscope(&f6, fin6, NULL, NULL); 817 if (error) 818 return EINVAL; /*?*/ 819 lin6 = (struct sockaddr_in6 *)&tir.laddr; 820 error = in6_embedscope(&l6, lin6, NULL, NULL); 821 if (error) 822 return EINVAL; /*?*/ 823 break; 824 #endif 825 case AF_INET: 826 fin = (struct sockaddr_in *)&tir.faddr; 827 lin = (struct sockaddr_in *)&tir.laddr; 828 break; 829 default: 830 return (EINVAL); 831 } 832 833 s = splsoftnet(); 834 switch (tir.faddr.ss_family) { 835 case AF_INET6: 836 #ifdef INET6 837 inp = in6_pcbhashlookup(&tcbtable, &f6, 838 fin6->sin6_port, &l6, lin6->sin6_port); 839 break; 840 #endif 841 case AF_INET: 842 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 843 fin->sin_port, lin->sin_addr, lin->sin_port); 844 break; 845 } 846 847 if (dodrop) { 848 if (inp && (tp = intotcpcb(inp)) && 849 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 850 tp = tcp_drop(tp, ECONNABORTED); 851 else 852 error = ESRCH; 853 splx(s); 854 return (error); 855 } 856 857 if (inp == NULL) { 858 ++tcpstat.tcps_pcbhashmiss; 859 switch (tir.faddr.ss_family) { 860 #ifdef INET6 861 case AF_INET6: 862 inp = in6_pcblookup_listen(&tcbtable, 863 &l6, lin6->sin6_port, 0); 864 break; 865 #endif 866 case AF_INET: 867 inp = in_pcblookup_listen(&tcbtable, 868 lin->sin_addr, lin->sin_port, 0); 869 break; 870 } 871 } 872 873 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 874 tir.ruid = inp->inp_socket->so_ruid; 875 tir.euid = inp->inp_socket->so_euid; 876 } else { 877 tir.ruid = -1; 878 tir.euid = -1; 879 } 880 splx(s); 881 882 *oldlenp = sizeof (tir); 883 error = copyout((void *)&tir, oldp, sizeof (tir)); 884 return (error); 885 } 886 887 /* 888 * Sysctl for tcp variables. 889 */ 890 int 891 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 892 int *name; 893 u_int namelen; 894 void *oldp; 895 size_t *oldlenp; 896 void *newp; 897 size_t newlen; 898 { 899 int error, nval; 900 901 /* All sysctl names at this level are terminal. */ 902 if (namelen != 1) 903 return (ENOTDIR); 904 905 switch (name[0]) { 906 #ifdef TCP_SACK 907 case TCPCTL_SACK: 908 return (sysctl_int(oldp, oldlenp, newp, newlen, 909 &tcp_do_sack)); 910 #endif 911 case TCPCTL_SLOWHZ: 912 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 913 914 case TCPCTL_BADDYNAMIC: 915 return (sysctl_struct(oldp, oldlenp, newp, newlen, 916 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 917 918 case TCPCTL_IDENT: 919 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 920 921 case TCPCTL_DROP: 922 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 923 924 #ifdef TCP_ECN 925 case TCPCTL_ECN: 926 return (sysctl_int(oldp, oldlenp, newp, newlen, 927 &tcp_do_ecn)); 928 #endif 929 case TCPCTL_REASS_LIMIT: 930 nval = tcp_reass_limit; 931 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 932 if (error) 933 return (error); 934 if (nval != tcp_reass_limit) { 935 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 936 if (error) 937 return (error); 938 tcp_reass_limit = nval; 939 } 940 return (0); 941 default: 942 if (name[0] < TCPCTL_MAXID) 943 return (sysctl_int_arr(tcpctl_vars, name, namelen, 944 oldp, oldlenp, newp, newlen)); 945 return (ENOPROTOOPT); 946 } 947 /* NOTREACHED */ 948 } 949