1 /* $NetBSD: tcp_usrreq.c,v 1.42 1999/07/09 22:57:23 thorpej Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/kernel.h> 110 #include <sys/malloc.h> 111 #include <sys/mbuf.h> 112 #include <sys/socket.h> 113 #include <sys/socketvar.h> 114 #include <sys/protosw.h> 115 #include <sys/errno.h> 116 #include <sys/stat.h> 117 #include <sys/proc.h> 118 #include <sys/ucred.h> 119 #include <sys/domain.h> 120 121 #include <vm/vm.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 134 #ifdef INET6 135 #ifndef INET 136 #include <netinet/in.h> 137 #endif 138 #include <netinet/ip6.h> 139 #include <netinet6/in6_pcb.h> 140 #include <netinet6/ip6_var.h> 141 #endif 142 143 #include <netinet/tcp.h> 144 #include <netinet/tcp_fsm.h> 145 #include <netinet/tcp_seq.h> 146 #include <netinet/tcp_timer.h> 147 #include <netinet/tcp_var.h> 148 #include <netinet/tcpip.h> 149 #include <netinet/tcp_debug.h> 150 151 #include "opt_tcp_recvspace.h" 152 #include "opt_tcp_sendspace.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 register struct inpcb *inp; 177 #ifdef INET6 178 register struct in6pcb *in6p; 179 #endif 180 register struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 int ostate; 184 int family; /* family of the socket */ 185 186 family = so->so_proto->pr_domain->dom_family; 187 188 if (req == PRU_CONTROL) { 189 switch (family) { 190 case PF_INET: 191 return (in_control(so, (long)m, (caddr_t)nam, 192 (struct ifnet *)control, p)); 193 #ifdef INET6 194 case PF_INET6: 195 return (in6_control(so, (long)m, (caddr_t)nam, 196 (struct ifnet *)control, p)); 197 #endif 198 default: 199 return EAFNOSUPPORT; 200 } 201 } 202 203 s = splsoftnet(); 204 switch (family) { 205 case PF_INET: 206 inp = sotoinpcb(so); 207 #ifdef INET6 208 in6p = NULL; 209 #endif 210 break; 211 #ifdef INET6 212 case PF_INET6: 213 inp = NULL; 214 in6p = sotoin6pcb(so); 215 break; 216 #endif 217 default: 218 splx(s); 219 return EAFNOSUPPORT; 220 } 221 222 #ifdef DIAGNOSTIC 223 if (req != PRU_SEND && req != PRU_SENDOOB && control) 224 panic("tcp_usrreq: unexpected control mbuf"); 225 #endif 226 /* 227 * When a TCP is attached to a socket, then there will be 228 * a (struct inpcb) pointed at by the socket, and this 229 * structure will point at a subsidary (struct tcpcb). 230 */ 231 #ifndef INET6 232 if (inp == 0 && req != PRU_ATTACH) 233 #else 234 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 235 #endif 236 { 237 error = EINVAL; 238 goto release; 239 } 240 if (inp) { 241 tp = intotcpcb(inp); 242 /* WHAT IF TP IS 0? */ 243 #ifdef KPROF 244 tcp_acounts[tp->t_state][req]++; 245 #endif 246 ostate = tp->t_state; 247 } 248 #ifdef INET6 249 else if (in6p) { 250 tp = in6totcpcb(in6p); 251 /* WHAT IF TP IS 0? */ 252 #ifdef KPROF 253 tcp_acounts[tp->t_state][req]++; 254 #endif 255 ostate = tp->t_state; 256 } 257 #endif 258 else 259 ostate = 0; 260 261 switch (req) { 262 263 /* 264 * TCP attaches to socket via PRU_ATTACH, reserving space, 265 * and an internet control block. 266 */ 267 case PRU_ATTACH: 268 #ifndef INET6 269 if (inp != 0) 270 #else 271 if (inp != 0 || in6p != 0) 272 #endif 273 { 274 error = EISCONN; 275 break; 276 } 277 error = tcp_attach(so); 278 if (error) 279 break; 280 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 281 so->so_linger = TCP_LINGERTIME; 282 tp = sototcpcb(so); 283 break; 284 285 /* 286 * PRU_DETACH detaches the TCP protocol from the socket. 287 */ 288 case PRU_DETACH: 289 tp = tcp_disconnect(tp); 290 break; 291 292 /* 293 * Give the socket an address. 294 */ 295 case PRU_BIND: 296 switch (family) { 297 case PF_INET: 298 error = in_pcbbind(inp, nam, p); 299 break; 300 #ifdef INET6 301 case PF_INET6: 302 error = in6_pcbbind(in6p, nam /*, p*/ ); 303 /* mapped addr case */ 304 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 305 tp->t_family = AF_INET; 306 break; 307 #endif 308 } 309 break; 310 311 /* 312 * Prepare to accept connections. 313 */ 314 case PRU_LISTEN: 315 if (inp && inp->inp_lport == 0) { 316 error = in_pcbbind(inp, (struct mbuf *)0, 317 (struct proc *)0); 318 if (error) 319 break; 320 } 321 #ifdef INET6 322 else if (in6p && in6p->in6p_lport == 0) { 323 error = in6_pcbbind(in6p, (struct mbuf *)0 /*, 324 (struct proc *)0 */ ); 325 if (error) 326 break; 327 } 328 #endif 329 tp->t_state = TCPS_LISTEN; 330 break; 331 332 /* 333 * Initiate connection to peer. 334 * Create a template for use in transmissions on this connection. 335 * Enter SYN_SENT state, and mark socket as connecting. 336 * Start keep-alive timer, and seed output sequence space. 337 * Send initial segment on connection. 338 */ 339 case PRU_CONNECT: 340 if (inp) { 341 if (inp->inp_lport == 0) { 342 error = in_pcbbind(inp, (struct mbuf *)0, 343 (struct proc *)0); 344 if (error) 345 break; 346 } 347 error = in_pcbconnect(inp, nam); 348 } 349 #ifdef INET6 350 else if (in6p) { 351 if (in6p->in6p_lport == 0) { 352 error = in6_pcbbind(in6p, (struct mbuf *)0 /*, 353 (struct proc *)0 */ ); 354 if (error) 355 break; 356 } 357 error = in6_pcbconnect(in6p, nam); 358 /* mapped addr case */ 359 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 360 tp->t_family = AF_INET; 361 } 362 #endif 363 if (error) 364 break; 365 tp->t_template = tcp_template(tp); 366 if (tp->t_template == 0) { 367 if (inp) 368 in_pcbdisconnect(inp); 369 #ifdef INET6 370 else if (in6p) 371 in6_pcbdisconnect(in6p); 372 #endif 373 error = ENOBUFS; 374 break; 375 } 376 /* Compute window scaling to request. */ 377 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 378 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 379 tp->request_r_scale++; 380 soisconnecting(so); 381 tcpstat.tcps_connattempt++; 382 tp->t_state = TCPS_SYN_SENT; 383 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 384 tp->iss = tcp_new_iss(tp, sizeof(struct tcpcb), 0); 385 tcp_sendseqinit(tp); 386 error = tcp_output(tp); 387 break; 388 389 /* 390 * Create a TCP connection between two sockets. 391 */ 392 case PRU_CONNECT2: 393 error = EOPNOTSUPP; 394 break; 395 396 /* 397 * Initiate disconnect from peer. 398 * If connection never passed embryonic stage, just drop; 399 * else if don't need to let data drain, then can just drop anyways, 400 * else have to begin TCP shutdown process: mark socket disconnecting, 401 * drain unread data, state switch to reflect user close, and 402 * send segment (e.g. FIN) to peer. Socket will be really disconnected 403 * when peer sends FIN and acks ours. 404 * 405 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 406 */ 407 case PRU_DISCONNECT: 408 tp = tcp_disconnect(tp); 409 break; 410 411 /* 412 * Accept a connection. Essentially all the work is 413 * done at higher levels; just return the address 414 * of the peer, storing through addr. 415 */ 416 case PRU_ACCEPT: 417 if (inp) 418 in_setpeeraddr(inp, nam); 419 #ifdef INET6 420 else if (in6p) 421 in6_setpeeraddr(in6p, nam); 422 #endif 423 break; 424 425 /* 426 * Mark the connection as being incapable of further output. 427 */ 428 case PRU_SHUTDOWN: 429 socantsendmore(so); 430 tp = tcp_usrclosed(tp); 431 if (tp) 432 error = tcp_output(tp); 433 break; 434 435 /* 436 * After a receive, possibly send window update to peer. 437 */ 438 case PRU_RCVD: 439 (void) tcp_output(tp); 440 break; 441 442 /* 443 * Do a send by putting data in output queue and updating urgent 444 * marker if URG set. Possibly send more data. 445 */ 446 case PRU_SEND: 447 if (control && control->m_len) { 448 m_freem(control); 449 m_freem(m); 450 error = EINVAL; 451 break; 452 } 453 sbappend(&so->so_snd, m); 454 error = tcp_output(tp); 455 break; 456 457 /* 458 * Abort the TCP. 459 */ 460 case PRU_ABORT: 461 tp = tcp_drop(tp, ECONNABORTED); 462 break; 463 464 case PRU_SENSE: 465 /* 466 * stat: don't bother with a blocksize. 467 */ 468 splx(s); 469 return (0); 470 471 case PRU_RCVOOB: 472 if (control && control->m_len) { 473 m_freem(control); 474 m_freem(m); 475 error = EINVAL; 476 break; 477 } 478 if ((so->so_oobmark == 0 && 479 (so->so_state & SS_RCVATMARK) == 0) || 480 so->so_options & SO_OOBINLINE || 481 tp->t_oobflags & TCPOOB_HADDATA) { 482 error = EINVAL; 483 break; 484 } 485 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 486 error = EWOULDBLOCK; 487 break; 488 } 489 m->m_len = 1; 490 *mtod(m, caddr_t) = tp->t_iobc; 491 if (((long)nam & MSG_PEEK) == 0) 492 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 493 break; 494 495 case PRU_SENDOOB: 496 if (sbspace(&so->so_snd) < -512) { 497 m_freem(m); 498 error = ENOBUFS; 499 break; 500 } 501 /* 502 * According to RFC961 (Assigned Protocols), 503 * the urgent pointer points to the last octet 504 * of urgent data. We continue, however, 505 * to consider it to indicate the first octet 506 * of data past the urgent section. 507 * Otherwise, snd_up should be one lower. 508 */ 509 sbappend(&so->so_snd, m); 510 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 511 tp->t_force = 1; 512 error = tcp_output(tp); 513 tp->t_force = 0; 514 break; 515 516 case PRU_SOCKADDR: 517 if (inp) 518 in_setsockaddr(inp, nam); 519 #ifdef INET6 520 else if (in6p) 521 in6_setsockaddr(in6p, nam); 522 #endif 523 break; 524 525 case PRU_PEERADDR: 526 if (inp) 527 in_setpeeraddr(inp, nam); 528 #ifdef INET6 529 else if (in6p) 530 in6_setpeeraddr(in6p, nam); 531 #endif 532 break; 533 534 /* 535 * TCP slow timer went off; going through this 536 * routine for tracing's sake. 537 */ 538 case PRU_SLOWTIMO: 539 tp = tcp_timers(tp, (long)nam); 540 req |= (long)nam << 8; /* for debug's sake */ 541 break; 542 543 default: 544 panic("tcp_usrreq"); 545 } 546 if (tp && (so->so_options & SO_DEBUG)) 547 tcp_trace(TA_USER, ostate, tp, NULL, req); 548 549 release: 550 splx(s); 551 return (error); 552 } 553 554 int 555 tcp_ctloutput(op, so, level, optname, mp) 556 int op; 557 struct socket *so; 558 int level, optname; 559 struct mbuf **mp; 560 { 561 int error = 0, s; 562 struct inpcb *inp; 563 #ifdef INET6 564 register struct in6pcb *in6p; 565 #endif 566 register struct tcpcb *tp; 567 register struct mbuf *m; 568 register int i; 569 int family; /* family of the socket */ 570 571 family = so->so_proto->pr_domain->dom_family; 572 573 s = splsoftnet(); 574 switch (family) { 575 case PF_INET: 576 inp = sotoinpcb(so); 577 #ifdef INET6 578 in6p = NULL; 579 #endif 580 break; 581 #ifdef INET6 582 case PF_INET6: 583 inp = NULL; 584 in6p = sotoin6pcb(so); 585 break; 586 #endif 587 default: 588 splx(s); 589 return EAFNOSUPPORT; 590 } 591 #ifndef INET6 592 if (inp == NULL) 593 #else 594 if (inp == NULL && in6p == NULL) 595 #endif 596 { 597 splx(s); 598 if (op == PRCO_SETOPT && *mp) 599 (void) m_free(*mp); 600 return (ECONNRESET); 601 } 602 if (level != IPPROTO_TCP) { 603 switch (family) { 604 case PF_INET: 605 error = ip_ctloutput(op, so, level, optname, mp); 606 break; 607 #ifdef INET6 608 case PF_INET6: 609 error = ip6_ctloutput(op, so, level, optname, mp); 610 break; 611 #endif 612 } 613 splx(s); 614 return (error); 615 } 616 if (inp) 617 tp = intotcpcb(inp); 618 #ifdef INET6 619 else if (in6p) 620 tp = in6totcpcb(in6p); 621 #endif 622 else 623 tp = NULL; 624 625 switch (op) { 626 627 case PRCO_SETOPT: 628 m = *mp; 629 switch (optname) { 630 631 case TCP_NODELAY: 632 if (m == NULL || m->m_len < sizeof (int)) 633 error = EINVAL; 634 else if (*mtod(m, int *)) 635 tp->t_flags |= TF_NODELAY; 636 else 637 tp->t_flags &= ~TF_NODELAY; 638 break; 639 640 case TCP_MAXSEG: 641 if (m && (i = *mtod(m, int *)) > 0 && 642 i <= tp->t_peermss) 643 tp->t_peermss = i; /* limit on send size */ 644 else 645 error = EINVAL; 646 break; 647 648 default: 649 error = ENOPROTOOPT; 650 break; 651 } 652 if (m) 653 (void) m_free(m); 654 break; 655 656 case PRCO_GETOPT: 657 *mp = m = m_get(M_WAIT, MT_SOOPTS); 658 m->m_len = sizeof(int); 659 660 switch (optname) { 661 case TCP_NODELAY: 662 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 663 break; 664 case TCP_MAXSEG: 665 *mtod(m, int *) = tp->t_peermss; 666 break; 667 default: 668 error = ENOPROTOOPT; 669 break; 670 } 671 break; 672 } 673 splx(s); 674 return (error); 675 } 676 677 #ifndef TCP_SENDSPACE 678 #define TCP_SENDSPACE 1024*16; 679 #endif 680 int tcp_sendspace = TCP_SENDSPACE; 681 #ifndef TCP_RECVSPACE 682 #define TCP_RECVSPACE 1024*16; 683 #endif 684 int tcp_recvspace = TCP_RECVSPACE; 685 686 /* 687 * Attach TCP protocol to socket, allocating 688 * internet protocol control block, tcp control block, 689 * bufer space, and entering LISTEN state if to accept connections. 690 */ 691 int 692 tcp_attach(so) 693 struct socket *so; 694 { 695 register struct tcpcb *tp; 696 struct inpcb *inp; 697 #ifdef INET6 698 struct in6pcb *in6p; 699 #endif 700 int error; 701 int family; /* family of the socket */ 702 703 family = so->so_proto->pr_domain->dom_family; 704 705 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 706 error = soreserve(so, tcp_sendspace, tcp_recvspace); 707 if (error) 708 return (error); 709 } 710 switch (family) { 711 case PF_INET: 712 error = in_pcballoc(so, &tcbtable); 713 if (error) 714 return (error); 715 inp = sotoinpcb(so); 716 #ifdef INET6 717 in6p = NULL; 718 #endif 719 break; 720 #ifdef INET6 721 case PF_INET6: 722 error = in6_pcballoc(so, &tcb6); 723 if (error) 724 return (error); 725 inp = NULL; 726 in6p = sotoin6pcb(so); 727 break; 728 #endif 729 default: 730 return EAFNOSUPPORT; 731 } 732 #ifdef IPSEC 733 if (inp && (error = ipsec_init_policy(&inp->inp_sp)) != 0) { 734 in_pcbdetach(inp); 735 return (error); 736 } 737 #ifdef INET6 738 else if (in6p && (error = ipsec_init_policy(&in6p->in6p_sp)) != 0) { 739 in6_pcbdetach(in6p); 740 return (error); 741 } 742 #endif 743 #endif /*IPSEC*/ 744 if (inp) 745 tp = tcp_newtcpcb(family, (void *)inp); 746 #ifdef INET6 747 else if (in6p) 748 tp = tcp_newtcpcb(family, (void *)in6p); 749 #endif 750 else 751 tp = NULL; 752 753 if (tp == 0) { 754 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 755 756 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 757 if (inp) 758 in_pcbdetach(inp); 759 #ifdef INET6 760 else if (in6p) 761 in6_pcbdetach(in6p); 762 #endif 763 so->so_state |= nofd; 764 return (ENOBUFS); 765 } 766 tp->t_state = TCPS_CLOSED; 767 return (0); 768 } 769 770 /* 771 * Initiate (or continue) disconnect. 772 * If embryonic state, just send reset (once). 773 * If in ``let data drain'' option and linger null, just drop. 774 * Otherwise (hard), mark socket disconnecting and drop 775 * current input data; switch states based on user close, and 776 * send segment to peer (with FIN). 777 */ 778 struct tcpcb * 779 tcp_disconnect(tp) 780 register struct tcpcb *tp; 781 { 782 struct socket *so; 783 784 if (tp->t_inpcb) 785 so = tp->t_inpcb->inp_socket; 786 #ifdef INET6 787 else if (tp->t_in6pcb) 788 so = tp->t_in6pcb->in6p_socket; 789 #endif 790 else 791 so = NULL; 792 793 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 794 tp = tcp_close(tp); 795 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 796 tp = tcp_drop(tp, 0); 797 else { 798 soisdisconnecting(so); 799 sbflush(&so->so_rcv); 800 tp = tcp_usrclosed(tp); 801 if (tp) 802 (void) tcp_output(tp); 803 } 804 return (tp); 805 } 806 807 /* 808 * User issued close, and wish to trail through shutdown states: 809 * if never received SYN, just forget it. If got a SYN from peer, 810 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 811 * If already got a FIN from peer, then almost done; go to LAST_ACK 812 * state. In all other cases, have already sent FIN to peer (e.g. 813 * after PRU_SHUTDOWN), and just have to play tedious game waiting 814 * for peer to send FIN or not respond to keep-alives, etc. 815 * We can let the user exit from the close as soon as the FIN is acked. 816 */ 817 struct tcpcb * 818 tcp_usrclosed(tp) 819 register struct tcpcb *tp; 820 { 821 822 switch (tp->t_state) { 823 824 case TCPS_CLOSED: 825 case TCPS_LISTEN: 826 case TCPS_SYN_SENT: 827 tp->t_state = TCPS_CLOSED; 828 tp = tcp_close(tp); 829 break; 830 831 case TCPS_SYN_RECEIVED: 832 case TCPS_ESTABLISHED: 833 tp->t_state = TCPS_FIN_WAIT_1; 834 break; 835 836 case TCPS_CLOSE_WAIT: 837 tp->t_state = TCPS_LAST_ACK; 838 break; 839 } 840 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 841 struct socket *so; 842 if (tp->t_inpcb) 843 so = tp->t_inpcb->inp_socket; 844 #ifdef INET6 845 else if (tp->t_in6pcb) 846 so = tp->t_in6pcb->in6p_socket; 847 #endif 848 else 849 so = NULL; 850 soisdisconnected(so); 851 /* 852 * If we are in FIN_WAIT_2, we arrived here because the 853 * application did a shutdown of the send side. Like the 854 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 855 * a full close, we start a timer to make sure sockets are 856 * not left in FIN_WAIT_2 forever. 857 */ 858 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 859 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 860 } 861 return (tp); 862 } 863 864 static struct { 865 unsigned int valid : 1; 866 unsigned int rdonly : 1; 867 int *var; 868 int val; 869 } tcp_ctlvars[] = TCPCTL_VARIABLES; 870 871 /* 872 * Sysctl for tcp variables. 873 */ 874 int 875 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 876 int *name; 877 u_int namelen; 878 void *oldp; 879 size_t *oldlenp; 880 void *newp; 881 size_t newlen; 882 { 883 /* All sysctl names at this level are terminal. */ 884 if (namelen != 1) 885 return (ENOTDIR); 886 887 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 888 && tcp_ctlvars[name[0]].valid) { 889 if (tcp_ctlvars[name[0]].rdonly) 890 return (sysctl_rdint(oldp, oldlenp, newp, 891 tcp_ctlvars[name[0]].val)); 892 else 893 return (sysctl_int(oldp, oldlenp, newp, newlen, 894 tcp_ctlvars[name[0]].var)); 895 } 896 897 return (ENOPROTOOPT); 898 } 899