1 /* $OpenBSD: tcp_usrreq.c,v 1.54 2001/06/26 06:55:32 aaron Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 37 * 38 * NRL grants permission for redistribution and use in source and binary 39 * forms, with or without modification, of the software and documentation 40 * created at NRL provided that the following conditions are met: 41 * 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgements: 49 * This product includes software developed by the University of 50 * California, Berkeley and its contributors. 51 * This product includes software developed at the Information 52 * Technology Division, US Naval Research Laboratory. 53 * 4. Neither the name of the NRL nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 58 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 59 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 60 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 61 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 62 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 63 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 64 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 65 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 66 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 67 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 * 69 * The views and conclusions contained in the software and documentation 70 * are those of the authors and should not be interpreted as representing 71 * official policies, either expressed or implied, of the US Naval 72 * Research Laboratory (NRL). 73 */ 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/mbuf.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/protosw.h> 81 #include <sys/stat.h> 82 #include <sys/sysctl.h> 83 #include <sys/domain.h> 84 85 #include <net/if.h> 86 #include <net/route.h> 87 88 #include <netinet/in.h> 89 #include <netinet/in_systm.h> 90 #include <netinet/in_var.h> 91 #include <netinet/ip.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet/ip_var.h> 94 #include <netinet/tcp.h> 95 #include <netinet/tcp_fsm.h> 96 #include <netinet/tcp_seq.h> 97 #include <netinet/tcp_timer.h> 98 #include <netinet/tcp_var.h> 99 #include <netinet/tcpip.h> 100 #include <netinet/tcp_debug.h> 101 102 /* 103 * TCP protocol interface to socket abstraction. 104 */ 105 extern char *tcpstates[]; 106 extern int tcptv_keep_init; 107 108 extern int tcp_rst_ppslim; 109 110 /* from in_pcb.c */ 111 extern struct baddynamicports baddynamicports; 112 113 int tcp_ident __P((void *, size_t *, void *, size_t)); 114 115 #ifdef INET6 116 int 117 tcp6_usrreq(so, req, m, nam, control, p) 118 struct socket *so; 119 int req; 120 struct mbuf *m, *nam, *control; 121 struct proc *p; 122 { 123 124 return tcp_usrreq(so, req, m, nam, control); 125 } 126 #endif 127 128 /* 129 * Process a TCP user request for TCP tb. If this is a send request 130 * then m is the mbuf chain of send data. If this is a timer expiration 131 * (called from the software clock routine), then timertype tells which timer. 132 */ 133 /*ARGSUSED*/ 134 int 135 tcp_usrreq(so, req, m, nam, control) 136 struct socket *so; 137 int req; 138 struct mbuf *m, *nam, *control; 139 { 140 struct sockaddr_in *sin; 141 register struct inpcb *inp; 142 register struct tcpcb *tp = NULL; 143 int s; 144 int error = 0; 145 int ostate; 146 147 if (req == PRU_CONTROL) { 148 #ifdef INET6 149 if (sotopf(so) == PF_INET6) 150 return in6_control(so, (u_long)m, (caddr_t)nam, 151 (struct ifnet *)control, 0); 152 else 153 #endif /* INET6 */ 154 return (in_control(so, (u_long)m, (caddr_t)nam, 155 (struct ifnet *)control)); 156 } 157 if (control && control->m_len) { 158 m_freem(control); 159 if (m) 160 m_freem(m); 161 return (EINVAL); 162 } 163 164 s = splsoftnet(); 165 inp = sotoinpcb(so); 166 /* 167 * When a TCP is attached to a socket, then there will be 168 * a (struct inpcb) pointed at by the socket, and this 169 * structure will point at a subsidary (struct tcpcb). 170 */ 171 if (inp == 0 && req != PRU_ATTACH) { 172 splx(s); 173 /* 174 * The following corrects an mbuf leak under rare 175 * circumstances 176 */ 177 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 178 m_freem(m); 179 return (EINVAL); /* XXX */ 180 } 181 if (inp) { 182 tp = intotcpcb(inp); 183 /* WHAT IF TP IS 0? */ 184 #ifdef KPROF 185 tcp_acounts[tp->t_state][req]++; 186 #endif 187 ostate = tp->t_state; 188 } else 189 ostate = 0; 190 switch (req) { 191 192 /* 193 * TCP attaches to socket via PRU_ATTACH, reserving space, 194 * and an internet control block. 195 */ 196 case PRU_ATTACH: 197 if (inp) { 198 error = EISCONN; 199 break; 200 } 201 error = tcp_attach(so); 202 if (error) 203 break; 204 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 205 so->so_linger = TCP_LINGERTIME; 206 tp = sototcpcb(so); 207 break; 208 209 /* 210 * PRU_DETACH detaches the TCP protocol from the socket. 211 * If the protocol state is non-embryonic, then can't 212 * do this directly: have to initiate a PRU_DISCONNECT, 213 * which may finish later; embryonic TCB's can just 214 * be discarded here. 215 */ 216 case PRU_DETACH: 217 tp = tcp_disconnect(tp); 218 break; 219 220 /* 221 * Give the socket an address. 222 */ 223 case PRU_BIND: 224 #ifdef INET6 225 if (inp->inp_flags & INP_IPV6) 226 error = in6_pcbbind(inp, nam); 227 else 228 #endif 229 error = in_pcbbind(inp, nam); 230 if (error) 231 break; 232 break; 233 234 /* 235 * Prepare to accept connections. 236 */ 237 case PRU_LISTEN: 238 if (inp->inp_lport == 0) { 239 #ifdef INET6 240 if (inp->inp_flags & INP_IPV6) 241 error = in6_pcbbind(inp, NULL); 242 else 243 #endif 244 error = in_pcbbind(inp, NULL); 245 } 246 /* If the in_pcbbind() above is called, the tp->pf 247 should still be whatever it was before. */ 248 if (error == 0) 249 tp->t_state = TCPS_LISTEN; 250 break; 251 252 /* 253 * Initiate connection to peer. 254 * Create a template for use in transmissions on this connection. 255 * Enter SYN_SENT state, and mark socket as connecting. 256 * Start keep-alive timer, and seed output sequence space. 257 * Send initial segment on connection. 258 */ 259 case PRU_CONNECT: 260 sin = mtod(nam, struct sockaddr_in *); 261 262 #ifdef INET6 263 if (sin->sin_family == AF_INET6) { 264 struct in6_addr *in6_addr = &mtod(nam, 265 struct sockaddr_in6 *)->sin6_addr; 266 267 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 268 IN6_IS_ADDR_MULTICAST(in6_addr) || 269 (IN6_IS_ADDR_V4MAPPED(in6_addr) && 270 ((in6_addr->s6_addr32[3] == INADDR_ANY) || 271 IN_MULTICAST(in6_addr->s6_addr32[3]) || 272 in_broadcast(sin->sin_addr, NULL)))) { 273 error = EINVAL; 274 break; 275 } 276 277 if (inp->inp_lport == 0) { 278 error = in6_pcbbind(inp, NULL); 279 if (error) 280 break; 281 } 282 error = in6_pcbconnect(inp, nam); 283 } else if (sin->sin_family == AF_INET) 284 #endif /* INET6 */ 285 { 286 if ((sin->sin_addr.s_addr == INADDR_ANY) || 287 IN_MULTICAST(sin->sin_addr.s_addr) || 288 in_broadcast(sin->sin_addr, NULL)) { 289 error = EINVAL; 290 break; 291 } 292 293 /* Trying to connect to some broadcast address */ 294 if (in_broadcast(sin->sin_addr, NULL)) { 295 error = EINVAL; 296 break; 297 } 298 299 if (inp->inp_lport == 0) { 300 error = in_pcbbind(inp, NULL); 301 if (error) 302 break; 303 } 304 error = in_pcbconnect(inp, nam); 305 } 306 307 if (error) 308 break; 309 310 tp->t_template = tcp_template(tp); 311 if (tp->t_template == 0) { 312 in_pcbdisconnect(inp); 313 error = ENOBUFS; 314 break; 315 } 316 317 so->so_state |= SS_CONNECTOUT; 318 /* Compute window scaling to request. */ 319 tcp_rscale(tp, so->so_rcv.sb_hiwat); 320 321 soisconnecting(so); 322 tcpstat.tcps_connattempt++; 323 tp->t_state = TCPS_SYN_SENT; 324 tp->t_timer[TCPT_KEEP] = tcptv_keep_init; 325 #ifdef TCP_COMPAT_42 326 tp->iss = tcp_iss; 327 tcp_iss += TCP_ISSINCR/2; 328 #else /* TCP_COMPAT_42 */ 329 tp->iss = tcp_rndiss_next(); 330 #endif /* !TCP_COMPAT_42 */ 331 tcp_sendseqinit(tp); 332 #if defined(TCP_SACK) 333 tp->snd_last = tp->snd_una; 334 #endif 335 #if defined(TCP_SACK) && defined(TCP_FACK) 336 tp->snd_fack = tp->snd_una; 337 tp->retran_data = 0; 338 tp->snd_awnd = 0; 339 #endif 340 error = tcp_output(tp); 341 break; 342 343 /* 344 * Create a TCP connection between two sockets. 345 */ 346 case PRU_CONNECT2: 347 error = EOPNOTSUPP; 348 break; 349 350 /* 351 * Initiate disconnect from peer. 352 * If connection never passed embryonic stage, just drop; 353 * else if don't need to let data drain, then can just drop anyways, 354 * else have to begin TCP shutdown process: mark socket disconnecting, 355 * drain unread data, state switch to reflect user close, and 356 * send segment (e.g. FIN) to peer. Socket will be really disconnected 357 * when peer sends FIN and acks ours. 358 * 359 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 360 */ 361 case PRU_DISCONNECT: 362 tp = tcp_disconnect(tp); 363 break; 364 365 /* 366 * Accept a connection. Essentially all the work is 367 * done at higher levels; just return the address 368 * of the peer, storing through addr. 369 */ 370 case PRU_ACCEPT: 371 #ifdef INET6 372 if (inp->inp_flags & INP_IPV6) 373 in6_setpeeraddr(inp, nam); 374 else 375 #endif 376 in_setpeeraddr(inp, nam); 377 break; 378 379 /* 380 * Mark the connection as being incapable of further output. 381 */ 382 case PRU_SHUTDOWN: 383 if (so->so_state & SS_CANTSENDMORE) 384 break; 385 socantsendmore(so); 386 tp = tcp_usrclosed(tp); 387 if (tp) 388 error = tcp_output(tp); 389 break; 390 391 /* 392 * After a receive, possibly send window update to peer. 393 */ 394 case PRU_RCVD: 395 /* 396 * soreceive() calls this function when a user receives 397 * ancillary data on a listening socket. We don't call 398 * tcp_output in such a case, since there is no header 399 * template for a listening socket and hence the kernel 400 * will panic. 401 */ 402 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 403 (void) tcp_output(tp); 404 break; 405 406 /* 407 * Do a send by putting data in output queue and updating urgent 408 * marker if URG set. Possibly send more data. 409 */ 410 case PRU_SEND: 411 sbappend(&so->so_snd, m); 412 error = tcp_output(tp); 413 break; 414 415 /* 416 * Abort the TCP. 417 */ 418 case PRU_ABORT: 419 tp = tcp_drop(tp, ECONNABORTED); 420 break; 421 422 case PRU_SENSE: 423 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 424 (void) splx(s); 425 return (0); 426 427 case PRU_RCVOOB: 428 if ((so->so_oobmark == 0 && 429 (so->so_state & SS_RCVATMARK) == 0) || 430 so->so_options & SO_OOBINLINE || 431 tp->t_oobflags & TCPOOB_HADDATA) { 432 error = EINVAL; 433 break; 434 } 435 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 436 error = EWOULDBLOCK; 437 break; 438 } 439 m->m_len = 1; 440 *mtod(m, caddr_t) = tp->t_iobc; 441 if (((long)nam & MSG_PEEK) == 0) 442 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 443 break; 444 445 case PRU_SENDOOB: 446 if (sbspace(&so->so_snd) < -512) { 447 m_freem(m); 448 error = ENOBUFS; 449 break; 450 } 451 /* 452 * According to RFC961 (Assigned Protocols), 453 * the urgent pointer points to the last octet 454 * of urgent data. We continue, however, 455 * to consider it to indicate the first octet 456 * of data past the urgent section. 457 * Otherwise, snd_up should be one lower. 458 */ 459 sbappend(&so->so_snd, m); 460 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 461 tp->t_force = 1; 462 error = tcp_output(tp); 463 tp->t_force = 0; 464 break; 465 466 case PRU_SOCKADDR: 467 #ifdef INET6 468 if (inp->inp_flags & INP_IPV6) 469 in6_setsockaddr(inp, nam); 470 else 471 #endif 472 in_setsockaddr(inp, nam); 473 break; 474 475 case PRU_PEERADDR: 476 #ifdef INET6 477 if (inp->inp_flags & INP_IPV6) 478 in6_setpeeraddr(inp, nam); 479 else 480 #endif 481 in_setpeeraddr(inp, nam); 482 break; 483 484 /* 485 * TCP slow timer went off; going through this 486 * routine for tracing's sake. 487 */ 488 case PRU_SLOWTIMO: 489 tp = tcp_timers(tp, (long)nam); 490 req |= (long)nam << 8; /* for debug's sake */ 491 break; 492 493 default: 494 panic("tcp_usrreq"); 495 } 496 if (tp && (so->so_options & SO_DEBUG)) 497 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 498 splx(s); 499 return (error); 500 } 501 502 int 503 tcp_ctloutput(op, so, level, optname, mp) 504 int op; 505 struct socket *so; 506 int level, optname; 507 struct mbuf **mp; 508 { 509 int error = 0, s; 510 struct inpcb *inp; 511 register struct tcpcb *tp; 512 register struct mbuf *m; 513 register int i; 514 515 s = splsoftnet(); 516 inp = sotoinpcb(so); 517 if (inp == NULL) { 518 splx(s); 519 if (op == PRCO_SETOPT && *mp) 520 (void) m_free(*mp); 521 return (ECONNRESET); 522 } 523 #ifdef INET6 524 tp = intotcpcb(inp); 525 #endif /* INET6 */ 526 if (level != IPPROTO_TCP) { 527 switch (so->so_proto->pr_domain->dom_family) { 528 #ifdef INET6 529 case PF_INET6: 530 error = ip6_ctloutput(op, so, level, optname, mp); 531 break; 532 #endif /* INET6 */ 533 case PF_INET: 534 error = ip_ctloutput(op, so, level, optname, mp); 535 break; 536 default: 537 error = EAFNOSUPPORT; /*?*/ 538 break; 539 } 540 splx(s); 541 return (error); 542 } 543 #ifndef INET6 544 tp = intotcpcb(inp); 545 #endif /* !INET6 */ 546 547 switch (op) { 548 549 case PRCO_SETOPT: 550 m = *mp; 551 switch (optname) { 552 553 case TCP_NODELAY: 554 if (m == NULL || m->m_len < sizeof (int)) 555 error = EINVAL; 556 else if (*mtod(m, int *)) 557 tp->t_flags |= TF_NODELAY; 558 else 559 tp->t_flags &= ~TF_NODELAY; 560 break; 561 562 case TCP_MAXSEG: 563 if (m == NULL || m->m_len < sizeof (int)) { 564 error = EINVAL; 565 break; 566 } 567 568 i = *mtod(m, int *); 569 if (i > 0 && i <= tp->t_maxseg) 570 tp->t_maxseg = i; 571 else 572 error = EINVAL; 573 break; 574 575 #ifdef TCP_SACK 576 case TCP_SACK_DISABLE: 577 if (m == NULL || m->m_len < sizeof (int)) { 578 error = EINVAL; 579 break; 580 } 581 582 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 583 error = EPERM; 584 break; 585 } 586 587 if (tp->t_flags & TF_SIGNATURE) { 588 error = EPERM; 589 break; 590 } 591 592 if (*mtod(m, int *)) 593 tp->sack_disable = 1; 594 else 595 tp->sack_disable = 0; 596 break; 597 #endif 598 #ifdef TCP_SIGNATURE 599 case TCP_SIGNATURE_ENABLE: 600 if (m == NULL || m->m_len < sizeof (int)) { 601 error = EINVAL; 602 break; 603 } 604 605 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 606 error = EPERM; 607 break; 608 } 609 610 if (*mtod(m, int *)) { 611 tp->t_flags |= TF_SIGNATURE; 612 #ifdef TCP_SACK 613 tp->sack_disable = 1; 614 #endif /* TCP_SACK */ 615 } else 616 tp->t_flags &= ~TF_SIGNATURE; 617 break; 618 #endif /* TCP_SIGNATURE */ 619 default: 620 error = ENOPROTOOPT; 621 break; 622 } 623 if (m) 624 (void) m_free(m); 625 break; 626 627 case PRCO_GETOPT: 628 *mp = m = m_get(M_WAIT, MT_SOOPTS); 629 m->m_len = sizeof(int); 630 631 switch (optname) { 632 case TCP_NODELAY: 633 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 634 break; 635 case TCP_MAXSEG: 636 *mtod(m, int *) = tp->t_maxseg; 637 break; 638 #ifdef TCP_SACK 639 case TCP_SACK_DISABLE: 640 *mtod(m, int *) = tp->sack_disable; 641 break; 642 #endif 643 default: 644 error = ENOPROTOOPT; 645 break; 646 } 647 break; 648 } 649 splx(s); 650 return (error); 651 } 652 653 #ifndef TCP_SENDSPACE 654 #define TCP_SENDSPACE 1024*16; 655 #endif 656 u_int tcp_sendspace = TCP_SENDSPACE; 657 #ifndef TCP_RECVSPACE 658 #define TCP_RECVSPACE 1024*16; 659 #endif 660 u_int tcp_recvspace = TCP_RECVSPACE; 661 662 /* 663 * Attach TCP protocol to socket, allocating 664 * internet protocol control block, tcp control block, 665 * bufer space, and entering LISTEN state if to accept connections. 666 */ 667 int 668 tcp_attach(so) 669 struct socket *so; 670 { 671 register struct tcpcb *tp; 672 struct inpcb *inp; 673 int error; 674 675 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 676 error = soreserve(so, tcp_sendspace, tcp_recvspace); 677 if (error) 678 return (error); 679 } 680 error = in_pcballoc(so, &tcbtable); 681 if (error) 682 return (error); 683 inp = sotoinpcb(so); 684 tp = tcp_newtcpcb(inp); 685 if (tp == NULL) { 686 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 687 688 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 689 in_pcbdetach(inp); 690 so->so_state |= nofd; 691 return (ENOBUFS); 692 } 693 tp->t_state = TCPS_CLOSED; 694 #ifdef INET6 695 /* we disallow IPv4 mapped address completely. */ 696 if (inp->inp_flags & INP_IPV6) 697 tp->pf = PF_INET6; 698 else 699 tp->pf = PF_INET; 700 #else 701 tp->pf = PF_INET; 702 #endif 703 return (0); 704 } 705 706 /* 707 * Initiate (or continue) disconnect. 708 * If embryonic state, just send reset (once). 709 * If in ``let data drain'' option and linger null, just drop. 710 * Otherwise (hard), mark socket disconnecting and drop 711 * current input data; switch states based on user close, and 712 * send segment to peer (with FIN). 713 */ 714 struct tcpcb * 715 tcp_disconnect(tp) 716 register struct tcpcb *tp; 717 { 718 struct socket *so = tp->t_inpcb->inp_socket; 719 720 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 721 tp = tcp_close(tp); 722 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 723 tp = tcp_drop(tp, 0); 724 else { 725 soisdisconnecting(so); 726 sbflush(&so->so_rcv); 727 tp = tcp_usrclosed(tp); 728 if (tp) 729 (void) tcp_output(tp); 730 } 731 return (tp); 732 } 733 734 /* 735 * User issued close, and wish to trail through shutdown states: 736 * if never received SYN, just forget it. If got a SYN from peer, 737 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 738 * If already got a FIN from peer, then almost done; go to LAST_ACK 739 * state. In all other cases, have already sent FIN to peer (e.g. 740 * after PRU_SHUTDOWN), and just have to play tedious game waiting 741 * for peer to send FIN or not respond to keep-alives, etc. 742 * We can let the user exit from the close as soon as the FIN is acked. 743 */ 744 struct tcpcb * 745 tcp_usrclosed(tp) 746 register struct tcpcb *tp; 747 { 748 749 switch (tp->t_state) { 750 751 case TCPS_CLOSED: 752 case TCPS_LISTEN: 753 case TCPS_SYN_SENT: 754 tp->t_state = TCPS_CLOSED; 755 tp = tcp_close(tp); 756 break; 757 758 case TCPS_SYN_RECEIVED: 759 case TCPS_ESTABLISHED: 760 tp->t_state = TCPS_FIN_WAIT_1; 761 break; 762 763 case TCPS_CLOSE_WAIT: 764 tp->t_state = TCPS_LAST_ACK; 765 break; 766 } 767 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 768 soisdisconnected(tp->t_inpcb->inp_socket); 769 /* 770 * If we are in FIN_WAIT_2, we arrived here because the 771 * application did a shutdown of the send side. Like the 772 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 773 * a full close, we start a timer to make sure sockets are 774 * not left in FIN_WAIT_2 forever. 775 */ 776 if (tp->t_state == TCPS_FIN_WAIT_2) 777 tp->t_timer[TCPT_2MSL] = tcp_maxidle; 778 } 779 return (tp); 780 } 781 782 /* 783 * Look up a socket for ident.. 784 */ 785 int 786 tcp_ident(oldp, oldlenp, newp, newlen) 787 void *oldp; 788 size_t *oldlenp; 789 void *newp; 790 size_t newlen; 791 { 792 int error = 0, s; 793 int is_ipv6 = 0; 794 struct tcp_ident_mapping tir; 795 struct inpcb *inp; 796 struct sockaddr_in *fin, *lin; 797 #ifdef INET6 798 struct sockaddr_in6 *fin6, *lin6; 799 struct in6_addr f6, l6; 800 #endif 801 802 if (oldp == NULL || newp != NULL || newlen != 0) 803 return (EINVAL); 804 if (*oldlenp < sizeof(tir)) 805 return (ENOMEM); 806 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 807 return (error); 808 switch (tir.faddr.ss_family) { 809 #ifdef INET6 810 case AF_INET6: 811 is_ipv6 = 1; 812 fin6 = (struct sockaddr_in6 *)&tir.faddr; 813 error = in6_embedscope(&f6, fin6, NULL, NULL); 814 if (error) 815 return EINVAL; /*?*/ 816 lin6 = (struct sockaddr_in6 *)&tir.laddr; 817 error = in6_embedscope(&l6, lin6, NULL, NULL); 818 if (error) 819 return EINVAL; /*?*/ 820 break; 821 #endif 822 case AF_INET: 823 fin = (struct sockaddr_in *)&tir.faddr; 824 lin = (struct sockaddr_in *)&tir.laddr; 825 break; 826 default: 827 return(EINVAL); 828 } 829 830 s = splsoftnet(); 831 if (is_ipv6) { 832 #ifdef INET6 833 inp = in6_pcbhashlookup(&tcbtable, &f6, 834 fin6->sin6_port, &l6, lin6->sin6_port); 835 #else 836 panic("tcp_ident: cannot happen"); 837 #endif 838 } 839 else 840 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 841 fin->sin_port, lin->sin_addr, lin->sin_port); 842 843 if (inp == NULL) { 844 ++tcpstat.tcps_pcbhashmiss; 845 if (is_ipv6) { 846 #ifdef INET6 847 inp = in_pcblookup(&tcbtable, &f6, 848 fin6->sin6_port, &l6, lin6->sin6_port, 849 INPLOOKUP_WILDCARD | INPLOOKUP_IPV6); 850 #else 851 panic("tcp_ident: cannot happen"); 852 #endif 853 } 854 else 855 inp = in_pcblookup(&tcbtable, &fin->sin_addr, 856 fin->sin_port, &lin->sin_addr, lin->sin_port, 857 INPLOOKUP_WILDCARD); 858 } 859 860 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 861 tir.ruid = inp->inp_socket->so_ruid; 862 tir.euid = inp->inp_socket->so_euid; 863 } else { 864 tir.ruid = -1; 865 tir.euid = -1; 866 } 867 splx(s); 868 869 *oldlenp = sizeof (tir); 870 error = copyout((void *)&tir, oldp, sizeof (tir)); 871 return (error); 872 } 873 874 /* 875 * Sysctl for tcp variables. 876 */ 877 int 878 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 879 int *name; 880 u_int namelen; 881 void *oldp; 882 size_t *oldlenp; 883 void *newp; 884 size_t newlen; 885 { 886 887 /* All sysctl names at this level are terminal. */ 888 if (namelen != 1) 889 return (ENOTDIR); 890 891 switch (name[0]) { 892 case TCPCTL_RFC1323: 893 return (sysctl_int(oldp, oldlenp, newp, newlen, 894 &tcp_do_rfc1323)); 895 #ifdef TCP_SACK 896 case TCPCTL_SACK: 897 return (sysctl_int(oldp, oldlenp, newp, newlen, 898 &tcp_do_sack)); 899 #endif 900 case TCPCTL_MSSDFLT: 901 return (sysctl_int(oldp, oldlenp, newp, newlen, 902 &tcp_mssdflt)); 903 case TCPCTL_KEEPINITTIME: 904 return (sysctl_int(oldp, oldlenp, newp, newlen, 905 &tcptv_keep_init)); 906 907 case TCPCTL_KEEPIDLE: 908 return (sysctl_int(oldp, oldlenp, newp, newlen, 909 &tcp_keepidle)); 910 911 case TCPCTL_KEEPINTVL: 912 return (sysctl_int(oldp, oldlenp, newp, newlen, 913 &tcp_keepintvl)); 914 915 case TCPCTL_SLOWHZ: 916 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 917 918 case TCPCTL_BADDYNAMIC: 919 return (sysctl_struct(oldp, oldlenp, newp, newlen, 920 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 921 922 case TCPCTL_RECVSPACE: 923 return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace)); 924 925 case TCPCTL_SENDSPACE: 926 return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace)); 927 case TCPCTL_IDENT: 928 return (tcp_ident(oldp, oldlenp, newp, newlen)); 929 case TCPCTL_RSTPPSLIMIT: 930 return (sysctl_int(oldp, oldlenp, newp, newlen, 931 &tcp_rst_ppslim)); 932 default: 933 return (ENOPROTOOPT); 934 } 935 /* NOTREACHED */ 936 } 937