1 /* $NetBSD: tcp_usrreq.c,v 1.43 1999/12/13 15:17:21 itojun Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/kernel.h> 110 #include <sys/malloc.h> 111 #include <sys/mbuf.h> 112 #include <sys/socket.h> 113 #include <sys/socketvar.h> 114 #include <sys/protosw.h> 115 #include <sys/errno.h> 116 #include <sys/stat.h> 117 #include <sys/proc.h> 118 #include <sys/ucred.h> 119 #include <sys/domain.h> 120 121 #include <vm/vm.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 134 #ifdef INET6 135 #ifndef INET 136 #include <netinet/in.h> 137 #endif 138 #include <netinet/ip6.h> 139 #include <netinet6/in6_pcb.h> 140 #include <netinet6/ip6_var.h> 141 #endif 142 143 #include <netinet/tcp.h> 144 #include <netinet/tcp_fsm.h> 145 #include <netinet/tcp_seq.h> 146 #include <netinet/tcp_timer.h> 147 #include <netinet/tcp_var.h> 148 #include <netinet/tcpip.h> 149 #include <netinet/tcp_debug.h> 150 151 #include "opt_tcp_recvspace.h" 152 #include "opt_tcp_sendspace.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 register struct inpcb *inp; 177 #ifdef INET6 178 register struct in6pcb *in6p; 179 #endif 180 register struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 int ostate; 184 int family; /* family of the socket */ 185 186 family = so->so_proto->pr_domain->dom_family; 187 188 if (req == PRU_CONTROL) { 189 switch (family) { 190 case PF_INET: 191 return (in_control(so, (long)m, (caddr_t)nam, 192 (struct ifnet *)control, p)); 193 #ifdef INET6 194 case PF_INET6: 195 return (in6_control(so, (long)m, (caddr_t)nam, 196 (struct ifnet *)control, p)); 197 #endif 198 default: 199 return EAFNOSUPPORT; 200 } 201 } 202 203 s = splsoftnet(); 204 switch (family) { 205 case PF_INET: 206 inp = sotoinpcb(so); 207 #ifdef INET6 208 in6p = NULL; 209 #endif 210 break; 211 #ifdef INET6 212 case PF_INET6: 213 inp = NULL; 214 in6p = sotoin6pcb(so); 215 break; 216 #endif 217 default: 218 splx(s); 219 return EAFNOSUPPORT; 220 } 221 222 #ifdef DIAGNOSTIC 223 if (req != PRU_SEND && req != PRU_SENDOOB && control) 224 panic("tcp_usrreq: unexpected control mbuf"); 225 #endif 226 /* 227 * When a TCP is attached to a socket, then there will be 228 * a (struct inpcb) pointed at by the socket, and this 229 * structure will point at a subsidary (struct tcpcb). 230 */ 231 #ifndef INET6 232 if (inp == 0 && req != PRU_ATTACH) 233 #else 234 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 235 #endif 236 { 237 error = EINVAL; 238 goto release; 239 } 240 if (inp) { 241 tp = intotcpcb(inp); 242 /* WHAT IF TP IS 0? */ 243 #ifdef KPROF 244 tcp_acounts[tp->t_state][req]++; 245 #endif 246 ostate = tp->t_state; 247 } 248 #ifdef INET6 249 else if (in6p) { 250 tp = in6totcpcb(in6p); 251 /* WHAT IF TP IS 0? */ 252 #ifdef KPROF 253 tcp_acounts[tp->t_state][req]++; 254 #endif 255 ostate = tp->t_state; 256 } 257 #endif 258 else 259 ostate = 0; 260 261 switch (req) { 262 263 /* 264 * TCP attaches to socket via PRU_ATTACH, reserving space, 265 * and an internet control block. 266 */ 267 case PRU_ATTACH: 268 #ifndef INET6 269 if (inp != 0) 270 #else 271 if (inp != 0 || in6p != 0) 272 #endif 273 { 274 error = EISCONN; 275 break; 276 } 277 error = tcp_attach(so); 278 if (error) 279 break; 280 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 281 so->so_linger = TCP_LINGERTIME; 282 tp = sototcpcb(so); 283 break; 284 285 /* 286 * PRU_DETACH detaches the TCP protocol from the socket. 287 */ 288 case PRU_DETACH: 289 tp = tcp_disconnect(tp); 290 break; 291 292 /* 293 * Give the socket an address. 294 */ 295 case PRU_BIND: 296 switch (family) { 297 case PF_INET: 298 error = in_pcbbind(inp, nam, p); 299 break; 300 #ifdef INET6 301 case PF_INET6: 302 error = in6_pcbbind(in6p, nam /*, p*/ ); 303 /* mapped addr case */ 304 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 305 tp->t_family = AF_INET; 306 break; 307 #endif 308 } 309 break; 310 311 /* 312 * Prepare to accept connections. 313 */ 314 case PRU_LISTEN: 315 if (inp && inp->inp_lport == 0) { 316 error = in_pcbbind(inp, (struct mbuf *)0, 317 (struct proc *)0); 318 if (error) 319 break; 320 } 321 #ifdef INET6 322 else if (in6p && in6p->in6p_lport == 0) { 323 error = in6_pcbbind(in6p, (struct mbuf *)0 /*, 324 (struct proc *)0 */ ); 325 if (error) 326 break; 327 } 328 #endif 329 tp->t_state = TCPS_LISTEN; 330 break; 331 332 /* 333 * Initiate connection to peer. 334 * Create a template for use in transmissions on this connection. 335 * Enter SYN_SENT state, and mark socket as connecting. 336 * Start keep-alive timer, and seed output sequence space. 337 * Send initial segment on connection. 338 */ 339 case PRU_CONNECT: 340 if (inp) { 341 if (inp->inp_lport == 0) { 342 error = in_pcbbind(inp, (struct mbuf *)0, 343 (struct proc *)0); 344 if (error) 345 break; 346 } 347 error = in_pcbconnect(inp, nam); 348 } 349 #ifdef INET6 350 else if (in6p) { 351 if (in6p->in6p_lport == 0) { 352 error = in6_pcbbind(in6p, (struct mbuf *)0 /*, 353 (struct proc *)0 */ ); 354 if (error) 355 break; 356 } 357 error = in6_pcbconnect(in6p, nam); 358 /* mapped addr case */ 359 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 360 tp->t_family = AF_INET; 361 } 362 #endif 363 if (error) 364 break; 365 tp->t_template = tcp_template(tp); 366 if (tp->t_template == 0) { 367 if (inp) 368 in_pcbdisconnect(inp); 369 #ifdef INET6 370 else if (in6p) 371 in6_pcbdisconnect(in6p); 372 #endif 373 error = ENOBUFS; 374 break; 375 } 376 /* Compute window scaling to request. */ 377 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 378 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 379 tp->request_r_scale++; 380 soisconnecting(so); 381 tcpstat.tcps_connattempt++; 382 tp->t_state = TCPS_SYN_SENT; 383 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 384 tp->iss = tcp_new_iss(tp, sizeof(struct tcpcb), 0); 385 tcp_sendseqinit(tp); 386 error = tcp_output(tp); 387 break; 388 389 /* 390 * Create a TCP connection between two sockets. 391 */ 392 case PRU_CONNECT2: 393 error = EOPNOTSUPP; 394 break; 395 396 /* 397 * Initiate disconnect from peer. 398 * If connection never passed embryonic stage, just drop; 399 * else if don't need to let data drain, then can just drop anyways, 400 * else have to begin TCP shutdown process: mark socket disconnecting, 401 * drain unread data, state switch to reflect user close, and 402 * send segment (e.g. FIN) to peer. Socket will be really disconnected 403 * when peer sends FIN and acks ours. 404 * 405 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 406 */ 407 case PRU_DISCONNECT: 408 tp = tcp_disconnect(tp); 409 break; 410 411 /* 412 * Accept a connection. Essentially all the work is 413 * done at higher levels; just return the address 414 * of the peer, storing through addr. 415 */ 416 case PRU_ACCEPT: 417 if (inp) 418 in_setpeeraddr(inp, nam); 419 #ifdef INET6 420 else if (in6p) 421 in6_setpeeraddr(in6p, nam); 422 #endif 423 break; 424 425 /* 426 * Mark the connection as being incapable of further output. 427 */ 428 case PRU_SHUTDOWN: 429 socantsendmore(so); 430 tp = tcp_usrclosed(tp); 431 if (tp) 432 error = tcp_output(tp); 433 break; 434 435 /* 436 * After a receive, possibly send window update to peer. 437 */ 438 case PRU_RCVD: 439 (void) tcp_output(tp); 440 break; 441 442 /* 443 * Do a send by putting data in output queue and updating urgent 444 * marker if URG set. Possibly send more data. 445 */ 446 case PRU_SEND: 447 if (control && control->m_len) { 448 m_freem(control); 449 m_freem(m); 450 error = EINVAL; 451 break; 452 } 453 sbappend(&so->so_snd, m); 454 error = tcp_output(tp); 455 break; 456 457 /* 458 * Abort the TCP. 459 */ 460 case PRU_ABORT: 461 tp = tcp_drop(tp, ECONNABORTED); 462 break; 463 464 case PRU_SENSE: 465 /* 466 * stat: don't bother with a blocksize. 467 */ 468 splx(s); 469 return (0); 470 471 case PRU_RCVOOB: 472 if (control && control->m_len) { 473 m_freem(control); 474 m_freem(m); 475 error = EINVAL; 476 break; 477 } 478 if ((so->so_oobmark == 0 && 479 (so->so_state & SS_RCVATMARK) == 0) || 480 so->so_options & SO_OOBINLINE || 481 tp->t_oobflags & TCPOOB_HADDATA) { 482 error = EINVAL; 483 break; 484 } 485 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 486 error = EWOULDBLOCK; 487 break; 488 } 489 m->m_len = 1; 490 *mtod(m, caddr_t) = tp->t_iobc; 491 if (((long)nam & MSG_PEEK) == 0) 492 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 493 break; 494 495 case PRU_SENDOOB: 496 if (sbspace(&so->so_snd) < -512) { 497 m_freem(m); 498 error = ENOBUFS; 499 break; 500 } 501 /* 502 * According to RFC961 (Assigned Protocols), 503 * the urgent pointer points to the last octet 504 * of urgent data. We continue, however, 505 * to consider it to indicate the first octet 506 * of data past the urgent section. 507 * Otherwise, snd_up should be one lower. 508 */ 509 sbappend(&so->so_snd, m); 510 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 511 tp->t_force = 1; 512 error = tcp_output(tp); 513 tp->t_force = 0; 514 break; 515 516 case PRU_SOCKADDR: 517 if (inp) 518 in_setsockaddr(inp, nam); 519 #ifdef INET6 520 else if (in6p) 521 in6_setsockaddr(in6p, nam); 522 #endif 523 break; 524 525 case PRU_PEERADDR: 526 if (inp) 527 in_setpeeraddr(inp, nam); 528 #ifdef INET6 529 else if (in6p) 530 in6_setpeeraddr(in6p, nam); 531 #endif 532 break; 533 534 /* 535 * TCP slow timer went off; going through this 536 * routine for tracing's sake. 537 */ 538 case PRU_SLOWTIMO: 539 tp = tcp_timers(tp, (long)nam); 540 req |= (long)nam << 8; /* for debug's sake */ 541 break; 542 543 default: 544 panic("tcp_usrreq"); 545 } 546 if (tp && (so->so_options & SO_DEBUG)) 547 tcp_trace(TA_USER, ostate, tp, NULL, req); 548 549 release: 550 splx(s); 551 return (error); 552 } 553 554 int 555 tcp_ctloutput(op, so, level, optname, mp) 556 int op; 557 struct socket *so; 558 int level, optname; 559 struct mbuf **mp; 560 { 561 int error = 0, s; 562 struct inpcb *inp; 563 #ifdef INET6 564 register struct in6pcb *in6p; 565 #endif 566 register struct tcpcb *tp; 567 register struct mbuf *m; 568 register int i; 569 int family; /* family of the socket */ 570 571 family = so->so_proto->pr_domain->dom_family; 572 573 s = splsoftnet(); 574 switch (family) { 575 case PF_INET: 576 inp = sotoinpcb(so); 577 #ifdef INET6 578 in6p = NULL; 579 #endif 580 break; 581 #ifdef INET6 582 case PF_INET6: 583 inp = NULL; 584 in6p = sotoin6pcb(so); 585 break; 586 #endif 587 default: 588 splx(s); 589 return EAFNOSUPPORT; 590 } 591 #ifndef INET6 592 if (inp == NULL) 593 #else 594 if (inp == NULL && in6p == NULL) 595 #endif 596 { 597 splx(s); 598 if (op == PRCO_SETOPT && *mp) 599 (void) m_free(*mp); 600 return (ECONNRESET); 601 } 602 if (level != IPPROTO_TCP) { 603 switch (family) { 604 case PF_INET: 605 error = ip_ctloutput(op, so, level, optname, mp); 606 break; 607 #ifdef INET6 608 case PF_INET6: 609 error = ip6_ctloutput(op, so, level, optname, mp); 610 break; 611 #endif 612 } 613 splx(s); 614 return (error); 615 } 616 if (inp) 617 tp = intotcpcb(inp); 618 #ifdef INET6 619 else if (in6p) 620 tp = in6totcpcb(in6p); 621 #endif 622 else 623 tp = NULL; 624 625 switch (op) { 626 627 case PRCO_SETOPT: 628 m = *mp; 629 switch (optname) { 630 631 case TCP_NODELAY: 632 if (m == NULL || m->m_len < sizeof (int)) 633 error = EINVAL; 634 else if (*mtod(m, int *)) 635 tp->t_flags |= TF_NODELAY; 636 else 637 tp->t_flags &= ~TF_NODELAY; 638 break; 639 640 case TCP_MAXSEG: 641 if (m && (i = *mtod(m, int *)) > 0 && 642 i <= tp->t_peermss) 643 tp->t_peermss = i; /* limit on send size */ 644 else 645 error = EINVAL; 646 break; 647 648 default: 649 error = ENOPROTOOPT; 650 break; 651 } 652 if (m) 653 (void) m_free(m); 654 break; 655 656 case PRCO_GETOPT: 657 *mp = m = m_get(M_WAIT, MT_SOOPTS); 658 m->m_len = sizeof(int); 659 660 switch (optname) { 661 case TCP_NODELAY: 662 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 663 break; 664 case TCP_MAXSEG: 665 *mtod(m, int *) = tp->t_peermss; 666 break; 667 default: 668 error = ENOPROTOOPT; 669 break; 670 } 671 break; 672 } 673 splx(s); 674 return (error); 675 } 676 677 #ifndef TCP_SENDSPACE 678 #define TCP_SENDSPACE 1024*16; 679 #endif 680 int tcp_sendspace = TCP_SENDSPACE; 681 #ifndef TCP_RECVSPACE 682 #define TCP_RECVSPACE 1024*16; 683 #endif 684 int tcp_recvspace = TCP_RECVSPACE; 685 686 /* 687 * Attach TCP protocol to socket, allocating 688 * internet protocol control block, tcp control block, 689 * bufer space, and entering LISTEN state if to accept connections. 690 */ 691 int 692 tcp_attach(so) 693 struct socket *so; 694 { 695 register struct tcpcb *tp; 696 struct inpcb *inp; 697 #ifdef INET6 698 struct in6pcb *in6p; 699 #endif 700 int error; 701 int family; /* family of the socket */ 702 703 family = so->so_proto->pr_domain->dom_family; 704 705 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 706 error = soreserve(so, tcp_sendspace, tcp_recvspace); 707 if (error) 708 return (error); 709 } 710 switch (family) { 711 case PF_INET: 712 error = in_pcballoc(so, &tcbtable); 713 if (error) 714 return (error); 715 inp = sotoinpcb(so); 716 #ifdef INET6 717 in6p = NULL; 718 #endif 719 break; 720 #ifdef INET6 721 case PF_INET6: 722 error = in6_pcballoc(so, &tcb6); 723 if (error) 724 return (error); 725 inp = NULL; 726 in6p = sotoin6pcb(so); 727 break; 728 #endif 729 default: 730 return EAFNOSUPPORT; 731 } 732 #ifdef IPSEC 733 if (inp) { 734 error = ipsec_init_policy(&inp->inp_sp); 735 if (error != 0) { 736 in_pcbdetach(inp); 737 return (error); 738 } 739 } 740 #ifdef INET6 741 else if (in6p) { 742 error = ipsec_init_policy(&in6p->in6p_sp); 743 if (error != 0) { 744 in6_pcbdetach(in6p); 745 return (error); 746 } 747 } 748 #endif 749 #endif /*IPSEC*/ 750 if (inp) 751 tp = tcp_newtcpcb(family, (void *)inp); 752 #ifdef INET6 753 else if (in6p) 754 tp = tcp_newtcpcb(family, (void *)in6p); 755 #endif 756 else 757 tp = NULL; 758 759 if (tp == 0) { 760 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 761 762 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 763 if (inp) 764 in_pcbdetach(inp); 765 #ifdef INET6 766 else if (in6p) 767 in6_pcbdetach(in6p); 768 #endif 769 so->so_state |= nofd; 770 return (ENOBUFS); 771 } 772 tp->t_state = TCPS_CLOSED; 773 return (0); 774 } 775 776 /* 777 * Initiate (or continue) disconnect. 778 * If embryonic state, just send reset (once). 779 * If in ``let data drain'' option and linger null, just drop. 780 * Otherwise (hard), mark socket disconnecting and drop 781 * current input data; switch states based on user close, and 782 * send segment to peer (with FIN). 783 */ 784 struct tcpcb * 785 tcp_disconnect(tp) 786 register struct tcpcb *tp; 787 { 788 struct socket *so; 789 790 if (tp->t_inpcb) 791 so = tp->t_inpcb->inp_socket; 792 #ifdef INET6 793 else if (tp->t_in6pcb) 794 so = tp->t_in6pcb->in6p_socket; 795 #endif 796 else 797 so = NULL; 798 799 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 800 tp = tcp_close(tp); 801 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 802 tp = tcp_drop(tp, 0); 803 else { 804 soisdisconnecting(so); 805 sbflush(&so->so_rcv); 806 tp = tcp_usrclosed(tp); 807 if (tp) 808 (void) tcp_output(tp); 809 } 810 return (tp); 811 } 812 813 /* 814 * User issued close, and wish to trail through shutdown states: 815 * if never received SYN, just forget it. If got a SYN from peer, 816 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 817 * If already got a FIN from peer, then almost done; go to LAST_ACK 818 * state. In all other cases, have already sent FIN to peer (e.g. 819 * after PRU_SHUTDOWN), and just have to play tedious game waiting 820 * for peer to send FIN or not respond to keep-alives, etc. 821 * We can let the user exit from the close as soon as the FIN is acked. 822 */ 823 struct tcpcb * 824 tcp_usrclosed(tp) 825 register struct tcpcb *tp; 826 { 827 828 switch (tp->t_state) { 829 830 case TCPS_CLOSED: 831 case TCPS_LISTEN: 832 case TCPS_SYN_SENT: 833 tp->t_state = TCPS_CLOSED; 834 tp = tcp_close(tp); 835 break; 836 837 case TCPS_SYN_RECEIVED: 838 case TCPS_ESTABLISHED: 839 tp->t_state = TCPS_FIN_WAIT_1; 840 break; 841 842 case TCPS_CLOSE_WAIT: 843 tp->t_state = TCPS_LAST_ACK; 844 break; 845 } 846 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 847 struct socket *so; 848 if (tp->t_inpcb) 849 so = tp->t_inpcb->inp_socket; 850 #ifdef INET6 851 else if (tp->t_in6pcb) 852 so = tp->t_in6pcb->in6p_socket; 853 #endif 854 else 855 so = NULL; 856 soisdisconnected(so); 857 /* 858 * If we are in FIN_WAIT_2, we arrived here because the 859 * application did a shutdown of the send side. Like the 860 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 861 * a full close, we start a timer to make sure sockets are 862 * not left in FIN_WAIT_2 forever. 863 */ 864 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 865 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 866 } 867 return (tp); 868 } 869 870 static struct { 871 unsigned int valid : 1; 872 unsigned int rdonly : 1; 873 int *var; 874 int val; 875 } tcp_ctlvars[] = TCPCTL_VARIABLES; 876 877 /* 878 * Sysctl for tcp variables. 879 */ 880 int 881 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 882 int *name; 883 u_int namelen; 884 void *oldp; 885 size_t *oldlenp; 886 void *newp; 887 size_t newlen; 888 { 889 /* All sysctl names at this level are terminal. */ 890 if (namelen != 1) 891 return (ENOTDIR); 892 893 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 894 && tcp_ctlvars[name[0]].valid) { 895 if (tcp_ctlvars[name[0]].rdonly) 896 return (sysctl_rdint(oldp, oldlenp, newp, 897 tcp_ctlvars[name[0]].val)); 898 else 899 return (sysctl_int(oldp, oldlenp, newp, newlen, 900 tcp_ctlvars[name[0]].var)); 901 } 902 903 return (ENOPROTOOPT); 904 } 905