1 /* $NetBSD: tcp_usrreq.c,v 1.52 2000/06/28 03:01:17 mrg Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/kernel.h> 110 #include <sys/malloc.h> 111 #include <sys/mbuf.h> 112 #include <sys/socket.h> 113 #include <sys/socketvar.h> 114 #include <sys/protosw.h> 115 #include <sys/errno.h> 116 #include <sys/stat.h> 117 #include <sys/proc.h> 118 #include <sys/ucred.h> 119 #include <sys/domain.h> 120 121 #include <uvm/uvm_extern.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 134 #ifdef INET6 135 #ifndef INET 136 #include <netinet/in.h> 137 #endif 138 #include <netinet/ip6.h> 139 #include <netinet6/in6_pcb.h> 140 #include <netinet6/ip6_var.h> 141 #endif 142 143 #include <netinet/tcp.h> 144 #include <netinet/tcp_fsm.h> 145 #include <netinet/tcp_seq.h> 146 #include <netinet/tcp_timer.h> 147 #include <netinet/tcp_var.h> 148 #include <netinet/tcpip.h> 149 #include <netinet/tcp_debug.h> 150 151 #include "opt_tcp_recvspace.h" 152 #include "opt_tcp_sendspace.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 struct inpcb *inp; 177 #ifdef INET6 178 struct in6pcb *in6p; 179 #endif 180 struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 int ostate; 184 int family; /* family of the socket */ 185 186 family = so->so_proto->pr_domain->dom_family; 187 188 if (req == PRU_CONTROL) { 189 switch (family) { 190 case PF_INET: 191 return (in_control(so, (long)m, (caddr_t)nam, 192 (struct ifnet *)control, p)); 193 #ifdef INET6 194 case PF_INET6: 195 return (in6_control(so, (long)m, (caddr_t)nam, 196 (struct ifnet *)control, p)); 197 #endif 198 default: 199 return EAFNOSUPPORT; 200 } 201 } 202 203 if (req == PRU_PURGEIF) { 204 in_purgeif((struct ifnet *)control); 205 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 206 #ifdef INET6 207 in6_purgeif((struct ifnet *)control); 208 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 209 #endif 210 return (0); 211 } 212 213 s = splsoftnet(); 214 switch (family) { 215 case PF_INET: 216 inp = sotoinpcb(so); 217 #ifdef INET6 218 in6p = NULL; 219 #endif 220 break; 221 #ifdef INET6 222 case PF_INET6: 223 inp = NULL; 224 in6p = sotoin6pcb(so); 225 break; 226 #endif 227 default: 228 splx(s); 229 return EAFNOSUPPORT; 230 } 231 232 #ifdef DIAGNOSTIC 233 if (req != PRU_SEND && req != PRU_SENDOOB && control) 234 panic("tcp_usrreq: unexpected control mbuf"); 235 #endif 236 /* 237 * When a TCP is attached to a socket, then there will be 238 * a (struct inpcb) pointed at by the socket, and this 239 * structure will point at a subsidary (struct tcpcb). 240 */ 241 #ifndef INET6 242 if (inp == 0 && req != PRU_ATTACH) 243 #else 244 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 245 #endif 246 { 247 error = EINVAL; 248 goto release; 249 } 250 if (inp) { 251 tp = intotcpcb(inp); 252 /* WHAT IF TP IS 0? */ 253 #ifdef KPROF 254 tcp_acounts[tp->t_state][req]++; 255 #endif 256 ostate = tp->t_state; 257 } 258 #ifdef INET6 259 else if (in6p) { 260 tp = in6totcpcb(in6p); 261 /* WHAT IF TP IS 0? */ 262 #ifdef KPROF 263 tcp_acounts[tp->t_state][req]++; 264 #endif 265 ostate = tp->t_state; 266 } 267 #endif 268 else 269 ostate = 0; 270 271 switch (req) { 272 273 /* 274 * TCP attaches to socket via PRU_ATTACH, reserving space, 275 * and an internet control block. 276 */ 277 case PRU_ATTACH: 278 #ifndef INET6 279 if (inp != 0) 280 #else 281 if (inp != 0 || in6p != 0) 282 #endif 283 { 284 error = EISCONN; 285 break; 286 } 287 error = tcp_attach(so); 288 if (error) 289 break; 290 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 291 so->so_linger = TCP_LINGERTIME; 292 tp = sototcpcb(so); 293 break; 294 295 /* 296 * PRU_DETACH detaches the TCP protocol from the socket. 297 */ 298 case PRU_DETACH: 299 tp = tcp_disconnect(tp); 300 break; 301 302 /* 303 * Give the socket an address. 304 */ 305 case PRU_BIND: 306 switch (family) { 307 case PF_INET: 308 error = in_pcbbind(inp, nam, p); 309 break; 310 #ifdef INET6 311 case PF_INET6: 312 error = in6_pcbbind(in6p, nam, p); 313 /* mapped addr case */ 314 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 315 tp->t_family = AF_INET; 316 break; 317 #endif 318 } 319 break; 320 321 /* 322 * Prepare to accept connections. 323 */ 324 case PRU_LISTEN: 325 if (inp && inp->inp_lport == 0) { 326 error = in_pcbbind(inp, (struct mbuf *)0, 327 (struct proc *)0); 328 if (error) 329 break; 330 } 331 #ifdef INET6 332 else if (in6p && in6p->in6p_lport == 0) { 333 error = in6_pcbbind(in6p, (struct mbuf *)0, 334 (struct proc *)0); 335 if (error) 336 break; 337 } 338 #endif 339 tp->t_state = TCPS_LISTEN; 340 break; 341 342 /* 343 * Initiate connection to peer. 344 * Create a template for use in transmissions on this connection. 345 * Enter SYN_SENT state, and mark socket as connecting. 346 * Start keep-alive timer, and seed output sequence space. 347 * Send initial segment on connection. 348 */ 349 case PRU_CONNECT: 350 if (inp) { 351 if (inp->inp_lport == 0) { 352 error = in_pcbbind(inp, (struct mbuf *)0, 353 (struct proc *)0); 354 if (error) 355 break; 356 } 357 error = in_pcbconnect(inp, nam); 358 } 359 #ifdef INET6 360 else if (in6p) { 361 if (in6p->in6p_lport == 0) { 362 error = in6_pcbbind(in6p, (struct mbuf *)0, 363 (struct proc *)0); 364 if (error) 365 break; 366 } 367 error = in6_pcbconnect(in6p, nam); 368 /* mapped addr case */ 369 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 370 tp->t_family = AF_INET; 371 } 372 #endif 373 if (error) 374 break; 375 tp->t_template = tcp_template(tp); 376 if (tp->t_template == 0) { 377 if (inp) 378 in_pcbdisconnect(inp); 379 #ifdef INET6 380 else if (in6p) 381 in6_pcbdisconnect(in6p); 382 #endif 383 error = ENOBUFS; 384 break; 385 } 386 /* Compute window scaling to request. */ 387 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 388 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 389 tp->request_r_scale++; 390 soisconnecting(so); 391 tcpstat.tcps_connattempt++; 392 tp->t_state = TCPS_SYN_SENT; 393 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 394 tp->iss = tcp_new_iss(tp, sizeof(struct tcpcb), 0); 395 tcp_sendseqinit(tp); 396 error = tcp_output(tp); 397 break; 398 399 /* 400 * Create a TCP connection between two sockets. 401 */ 402 case PRU_CONNECT2: 403 error = EOPNOTSUPP; 404 break; 405 406 /* 407 * Initiate disconnect from peer. 408 * If connection never passed embryonic stage, just drop; 409 * else if don't need to let data drain, then can just drop anyways, 410 * else have to begin TCP shutdown process: mark socket disconnecting, 411 * drain unread data, state switch to reflect user close, and 412 * send segment (e.g. FIN) to peer. Socket will be really disconnected 413 * when peer sends FIN and acks ours. 414 * 415 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 416 */ 417 case PRU_DISCONNECT: 418 tp = tcp_disconnect(tp); 419 break; 420 421 /* 422 * Accept a connection. Essentially all the work is 423 * done at higher levels; just return the address 424 * of the peer, storing through addr. 425 */ 426 case PRU_ACCEPT: 427 if (inp) 428 in_setpeeraddr(inp, nam); 429 #ifdef INET6 430 else if (in6p) 431 in6_setpeeraddr(in6p, nam); 432 #endif 433 break; 434 435 /* 436 * Mark the connection as being incapable of further output. 437 */ 438 case PRU_SHUTDOWN: 439 socantsendmore(so); 440 tp = tcp_usrclosed(tp); 441 if (tp) 442 error = tcp_output(tp); 443 break; 444 445 /* 446 * After a receive, possibly send window update to peer. 447 */ 448 case PRU_RCVD: 449 (void) tcp_output(tp); 450 break; 451 452 /* 453 * Do a send by putting data in output queue and updating urgent 454 * marker if URG set. Possibly send more data. 455 */ 456 case PRU_SEND: 457 if (control && control->m_len) { 458 m_freem(control); 459 m_freem(m); 460 error = EINVAL; 461 break; 462 } 463 sbappend(&so->so_snd, m); 464 error = tcp_output(tp); 465 break; 466 467 /* 468 * Abort the TCP. 469 */ 470 case PRU_ABORT: 471 tp = tcp_drop(tp, ECONNABORTED); 472 break; 473 474 case PRU_SENSE: 475 /* 476 * stat: don't bother with a blocksize. 477 */ 478 splx(s); 479 return (0); 480 481 case PRU_RCVOOB: 482 if (control && control->m_len) { 483 m_freem(control); 484 m_freem(m); 485 error = EINVAL; 486 break; 487 } 488 if ((so->so_oobmark == 0 && 489 (so->so_state & SS_RCVATMARK) == 0) || 490 so->so_options & SO_OOBINLINE || 491 tp->t_oobflags & TCPOOB_HADDATA) { 492 error = EINVAL; 493 break; 494 } 495 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 496 error = EWOULDBLOCK; 497 break; 498 } 499 m->m_len = 1; 500 *mtod(m, caddr_t) = tp->t_iobc; 501 if (((long)nam & MSG_PEEK) == 0) 502 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 503 break; 504 505 case PRU_SENDOOB: 506 if (sbspace(&so->so_snd) < -512) { 507 m_freem(m); 508 error = ENOBUFS; 509 break; 510 } 511 /* 512 * According to RFC961 (Assigned Protocols), 513 * the urgent pointer points to the last octet 514 * of urgent data. We continue, however, 515 * to consider it to indicate the first octet 516 * of data past the urgent section. 517 * Otherwise, snd_up should be one lower. 518 */ 519 sbappend(&so->so_snd, m); 520 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 521 tp->t_force = 1; 522 error = tcp_output(tp); 523 tp->t_force = 0; 524 break; 525 526 case PRU_SOCKADDR: 527 if (inp) 528 in_setsockaddr(inp, nam); 529 #ifdef INET6 530 else if (in6p) 531 in6_setsockaddr(in6p, nam); 532 #endif 533 break; 534 535 case PRU_PEERADDR: 536 if (inp) 537 in_setpeeraddr(inp, nam); 538 #ifdef INET6 539 else if (in6p) 540 in6_setpeeraddr(in6p, nam); 541 #endif 542 break; 543 544 /* 545 * TCP slow timer went off; going through this 546 * routine for tracing's sake. 547 */ 548 case PRU_SLOWTIMO: 549 tp = tcp_timers(tp, (long)nam); 550 req |= (long)nam << 8; /* for debug's sake */ 551 break; 552 553 default: 554 panic("tcp_usrreq"); 555 } 556 if (tp && (so->so_options & SO_DEBUG)) 557 tcp_trace(TA_USER, ostate, tp, NULL, req); 558 559 release: 560 splx(s); 561 return (error); 562 } 563 564 int 565 tcp_ctloutput(op, so, level, optname, mp) 566 int op; 567 struct socket *so; 568 int level, optname; 569 struct mbuf **mp; 570 { 571 int error = 0, s; 572 struct inpcb *inp; 573 #ifdef INET6 574 struct in6pcb *in6p; 575 #endif 576 struct tcpcb *tp; 577 struct mbuf *m; 578 int i; 579 int family; /* family of the socket */ 580 581 family = so->so_proto->pr_domain->dom_family; 582 583 s = splsoftnet(); 584 switch (family) { 585 case PF_INET: 586 inp = sotoinpcb(so); 587 #ifdef INET6 588 in6p = NULL; 589 #endif 590 break; 591 #ifdef INET6 592 case PF_INET6: 593 inp = NULL; 594 in6p = sotoin6pcb(so); 595 break; 596 #endif 597 default: 598 splx(s); 599 return EAFNOSUPPORT; 600 } 601 #ifndef INET6 602 if (inp == NULL) 603 #else 604 if (inp == NULL && in6p == NULL) 605 #endif 606 { 607 splx(s); 608 if (op == PRCO_SETOPT && *mp) 609 (void) m_free(*mp); 610 return (ECONNRESET); 611 } 612 if (level != IPPROTO_TCP) { 613 switch (family) { 614 case PF_INET: 615 error = ip_ctloutput(op, so, level, optname, mp); 616 break; 617 #ifdef INET6 618 case PF_INET6: 619 error = ip6_ctloutput(op, so, level, optname, mp); 620 break; 621 #endif 622 } 623 splx(s); 624 return (error); 625 } 626 if (inp) 627 tp = intotcpcb(inp); 628 #ifdef INET6 629 else if (in6p) 630 tp = in6totcpcb(in6p); 631 #endif 632 else 633 tp = NULL; 634 635 switch (op) { 636 637 case PRCO_SETOPT: 638 m = *mp; 639 switch (optname) { 640 641 case TCP_NODELAY: 642 if (m == NULL || m->m_len < sizeof (int)) 643 error = EINVAL; 644 else if (*mtod(m, int *)) 645 tp->t_flags |= TF_NODELAY; 646 else 647 tp->t_flags &= ~TF_NODELAY; 648 break; 649 650 case TCP_MAXSEG: 651 if (m && (i = *mtod(m, int *)) > 0 && 652 i <= tp->t_peermss) 653 tp->t_peermss = i; /* limit on send size */ 654 else 655 error = EINVAL; 656 break; 657 658 default: 659 error = ENOPROTOOPT; 660 break; 661 } 662 if (m) 663 (void) m_free(m); 664 break; 665 666 case PRCO_GETOPT: 667 *mp = m = m_get(M_WAIT, MT_SOOPTS); 668 m->m_len = sizeof(int); 669 670 switch (optname) { 671 case TCP_NODELAY: 672 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 673 break; 674 case TCP_MAXSEG: 675 *mtod(m, int *) = tp->t_peermss; 676 break; 677 default: 678 error = ENOPROTOOPT; 679 break; 680 } 681 break; 682 } 683 splx(s); 684 return (error); 685 } 686 687 #ifndef TCP_SENDSPACE 688 #define TCP_SENDSPACE 1024*16; 689 #endif 690 int tcp_sendspace = TCP_SENDSPACE; 691 #ifndef TCP_RECVSPACE 692 #define TCP_RECVSPACE 1024*16; 693 #endif 694 int tcp_recvspace = TCP_RECVSPACE; 695 696 /* 697 * Attach TCP protocol to socket, allocating 698 * internet protocol control block, tcp control block, 699 * bufer space, and entering LISTEN state if to accept connections. 700 */ 701 int 702 tcp_attach(so) 703 struct socket *so; 704 { 705 struct tcpcb *tp; 706 struct inpcb *inp; 707 #ifdef INET6 708 struct in6pcb *in6p; 709 #endif 710 int error; 711 int family; /* family of the socket */ 712 713 family = so->so_proto->pr_domain->dom_family; 714 715 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 716 error = soreserve(so, tcp_sendspace, tcp_recvspace); 717 if (error) 718 return (error); 719 } 720 switch (family) { 721 case PF_INET: 722 error = in_pcballoc(so, &tcbtable); 723 if (error) 724 return (error); 725 inp = sotoinpcb(so); 726 #ifdef INET6 727 in6p = NULL; 728 #endif 729 break; 730 #ifdef INET6 731 case PF_INET6: 732 error = in6_pcballoc(so, &tcb6); 733 if (error) 734 return (error); 735 inp = NULL; 736 in6p = sotoin6pcb(so); 737 break; 738 #endif 739 default: 740 return EAFNOSUPPORT; 741 } 742 #ifdef IPSEC 743 if (inp) { 744 error = ipsec_init_policy(so, &inp->inp_sp); 745 if (error != 0) { 746 in_pcbdetach(inp); 747 return (error); 748 } 749 } 750 #ifdef INET6 751 else if (in6p) { 752 error = ipsec_init_policy(so, &in6p->in6p_sp); 753 if (error != 0) { 754 in6_pcbdetach(in6p); 755 return (error); 756 } 757 } 758 #endif 759 #endif /*IPSEC*/ 760 if (inp) 761 tp = tcp_newtcpcb(family, (void *)inp); 762 #ifdef INET6 763 else if (in6p) 764 tp = tcp_newtcpcb(family, (void *)in6p); 765 #endif 766 else 767 tp = NULL; 768 769 if (tp == 0) { 770 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 771 772 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 773 if (inp) 774 in_pcbdetach(inp); 775 #ifdef INET6 776 else if (in6p) 777 in6_pcbdetach(in6p); 778 #endif 779 so->so_state |= nofd; 780 return (ENOBUFS); 781 } 782 tp->t_state = TCPS_CLOSED; 783 return (0); 784 } 785 786 /* 787 * Initiate (or continue) disconnect. 788 * If embryonic state, just send reset (once). 789 * If in ``let data drain'' option and linger null, just drop. 790 * Otherwise (hard), mark socket disconnecting and drop 791 * current input data; switch states based on user close, and 792 * send segment to peer (with FIN). 793 */ 794 struct tcpcb * 795 tcp_disconnect(tp) 796 struct tcpcb *tp; 797 { 798 struct socket *so; 799 800 if (tp->t_inpcb) 801 so = tp->t_inpcb->inp_socket; 802 #ifdef INET6 803 else if (tp->t_in6pcb) 804 so = tp->t_in6pcb->in6p_socket; 805 #endif 806 else 807 so = NULL; 808 809 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 810 tp = tcp_close(tp); 811 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 812 tp = tcp_drop(tp, 0); 813 else { 814 soisdisconnecting(so); 815 sbflush(&so->so_rcv); 816 tp = tcp_usrclosed(tp); 817 if (tp) 818 (void) tcp_output(tp); 819 } 820 return (tp); 821 } 822 823 /* 824 * User issued close, and wish to trail through shutdown states: 825 * if never received SYN, just forget it. If got a SYN from peer, 826 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 827 * If already got a FIN from peer, then almost done; go to LAST_ACK 828 * state. In all other cases, have already sent FIN to peer (e.g. 829 * after PRU_SHUTDOWN), and just have to play tedious game waiting 830 * for peer to send FIN or not respond to keep-alives, etc. 831 * We can let the user exit from the close as soon as the FIN is acked. 832 */ 833 struct tcpcb * 834 tcp_usrclosed(tp) 835 struct tcpcb *tp; 836 { 837 838 switch (tp->t_state) { 839 840 case TCPS_CLOSED: 841 case TCPS_LISTEN: 842 case TCPS_SYN_SENT: 843 tp->t_state = TCPS_CLOSED; 844 tp = tcp_close(tp); 845 break; 846 847 case TCPS_SYN_RECEIVED: 848 case TCPS_ESTABLISHED: 849 tp->t_state = TCPS_FIN_WAIT_1; 850 break; 851 852 case TCPS_CLOSE_WAIT: 853 tp->t_state = TCPS_LAST_ACK; 854 break; 855 } 856 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 857 struct socket *so; 858 if (tp->t_inpcb) 859 so = tp->t_inpcb->inp_socket; 860 #ifdef INET6 861 else if (tp->t_in6pcb) 862 so = tp->t_in6pcb->in6p_socket; 863 #endif 864 else 865 so = NULL; 866 soisdisconnected(so); 867 /* 868 * If we are in FIN_WAIT_2, we arrived here because the 869 * application did a shutdown of the send side. Like the 870 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 871 * a full close, we start a timer to make sure sockets are 872 * not left in FIN_WAIT_2 forever. 873 */ 874 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 875 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 876 } 877 return (tp); 878 } 879 880 static struct { 881 unsigned int valid : 1; 882 unsigned int rdonly : 1; 883 int *var; 884 int val; 885 } tcp_ctlvars[] = TCPCTL_VARIABLES; 886 887 /* 888 * Sysctl for tcp variables. 889 */ 890 int 891 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 892 int *name; 893 u_int namelen; 894 void *oldp; 895 size_t *oldlenp; 896 void *newp; 897 size_t newlen; 898 { 899 900 /* All sysctl names at this level are terminal. */ 901 if (namelen != 1) 902 return (ENOTDIR); 903 904 /* 905 * The sysctl specifies usec-between-RST, so we must 906 * convert from/to a timeval. 907 */ 908 if (name[0] == TCPCTL_RSTRATELIMIT) { 909 int rate_usec, error, s; 910 911 rate_usec = (tcp_rst_ratelim.tv_sec * 1000000) + 912 tcp_rst_ratelim.tv_usec; 913 error = sysctl_int(oldp, oldlenp, newp, newlen, &rate_usec); 914 if (error) 915 return (error); 916 if (rate_usec < 0) 917 return (EINVAL); 918 s = splsoftnet(); 919 tcp_rst_ratelim.tv_sec = rate_usec / 1000000; 920 tcp_rst_ratelim.tv_usec = rate_usec % 1000000; 921 splx(s); 922 923 return (0); 924 } 925 926 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 927 && tcp_ctlvars[name[0]].valid) { 928 if (tcp_ctlvars[name[0]].rdonly) 929 return (sysctl_rdint(oldp, oldlenp, newp, 930 tcp_ctlvars[name[0]].val)); 931 else 932 return (sysctl_int(oldp, oldlenp, newp, newlen, 933 tcp_ctlvars[name[0]].var)); 934 } 935 936 return (ENOPROTOOPT); 937 } 938