1 /* $NetBSD: tcp_usrreq.c,v 1.61 2001/03/20 20:07:52 thorpej Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/kernel.h> 110 #include <sys/malloc.h> 111 #include <sys/mbuf.h> 112 #include <sys/socket.h> 113 #include <sys/socketvar.h> 114 #include <sys/protosw.h> 115 #include <sys/errno.h> 116 #include <sys/stat.h> 117 #include <sys/proc.h> 118 #include <sys/ucred.h> 119 #include <sys/domain.h> 120 121 #include <uvm/uvm_extern.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 134 #ifdef INET6 135 #ifndef INET 136 #include <netinet/in.h> 137 #endif 138 #include <netinet/ip6.h> 139 #include <netinet6/in6_pcb.h> 140 #include <netinet6/ip6_var.h> 141 #endif 142 143 #include <netinet/tcp.h> 144 #include <netinet/tcp_fsm.h> 145 #include <netinet/tcp_seq.h> 146 #include <netinet/tcp_timer.h> 147 #include <netinet/tcp_var.h> 148 #include <netinet/tcpip.h> 149 #include <netinet/tcp_debug.h> 150 151 #include "opt_tcp_recvspace.h" 152 #include "opt_tcp_sendspace.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 struct inpcb *inp; 177 #ifdef INET6 178 struct in6pcb *in6p; 179 #endif 180 struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 int ostate; 184 int family; /* family of the socket */ 185 186 family = so->so_proto->pr_domain->dom_family; 187 188 if (req == PRU_CONTROL) { 189 switch (family) { 190 #ifdef INET 191 case PF_INET: 192 return (in_control(so, (long)m, (caddr_t)nam, 193 (struct ifnet *)control, p)); 194 #endif 195 #ifdef INET6 196 case PF_INET6: 197 return (in6_control(so, (long)m, (caddr_t)nam, 198 (struct ifnet *)control, p)); 199 #endif 200 default: 201 return EAFNOSUPPORT; 202 } 203 } 204 205 if (req == PRU_PURGEIF) { 206 switch (family) { 207 #ifdef INET 208 case PF_INET: 209 in_purgeif((struct ifnet *)control); 210 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 211 break; 212 #endif 213 #ifdef INET6 214 case PF_INET6: 215 in6_purgeif((struct ifnet *)control); 216 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 217 break; 218 #endif 219 default: 220 return (EAFNOSUPPORT); 221 } 222 return (0); 223 } 224 225 s = splsoftnet(); 226 switch (family) { 227 #ifdef INET 228 case PF_INET: 229 inp = sotoinpcb(so); 230 #ifdef INET6 231 in6p = NULL; 232 #endif 233 break; 234 #endif 235 #ifdef INET6 236 case PF_INET6: 237 inp = NULL; 238 in6p = sotoin6pcb(so); 239 break; 240 #endif 241 default: 242 splx(s); 243 return EAFNOSUPPORT; 244 } 245 246 #ifdef DIAGNOSTIC 247 #ifdef INET6 248 if (inp && in6p) 249 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 250 #endif 251 if (req != PRU_SEND && req != PRU_SENDOOB && control) 252 panic("tcp_usrreq: unexpected control mbuf"); 253 #endif 254 /* 255 * When a TCP is attached to a socket, then there will be 256 * a (struct inpcb) pointed at by the socket, and this 257 * structure will point at a subsidary (struct tcpcb). 258 */ 259 #ifndef INET6 260 if (inp == 0 && req != PRU_ATTACH) 261 #else 262 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 263 #endif 264 { 265 error = EINVAL; 266 goto release; 267 } 268 #ifdef INET 269 if (inp) { 270 tp = intotcpcb(inp); 271 /* WHAT IF TP IS 0? */ 272 #ifdef KPROF 273 tcp_acounts[tp->t_state][req]++; 274 #endif 275 ostate = tp->t_state; 276 } 277 #endif 278 #ifdef INET6 279 if (in6p) { 280 tp = in6totcpcb(in6p); 281 /* WHAT IF TP IS 0? */ 282 #ifdef KPROF 283 tcp_acounts[tp->t_state][req]++; 284 #endif 285 ostate = tp->t_state; 286 } 287 #endif 288 else 289 ostate = 0; 290 291 switch (req) { 292 293 /* 294 * TCP attaches to socket via PRU_ATTACH, reserving space, 295 * and an internet control block. 296 */ 297 case PRU_ATTACH: 298 #ifndef INET6 299 if (inp != 0) 300 #else 301 if (inp != 0 || in6p != 0) 302 #endif 303 { 304 error = EISCONN; 305 break; 306 } 307 error = tcp_attach(so); 308 if (error) 309 break; 310 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 311 so->so_linger = TCP_LINGERTIME; 312 tp = sototcpcb(so); 313 break; 314 315 /* 316 * PRU_DETACH detaches the TCP protocol from the socket. 317 */ 318 case PRU_DETACH: 319 tp = tcp_disconnect(tp); 320 break; 321 322 /* 323 * Give the socket an address. 324 */ 325 case PRU_BIND: 326 switch (family) { 327 #ifdef INET 328 case PF_INET: 329 error = in_pcbbind(inp, nam, p); 330 break; 331 #endif 332 #ifdef INET6 333 case PF_INET6: 334 error = in6_pcbbind(in6p, nam, p); 335 if (!error) { 336 /* mapped addr case */ 337 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 338 tp->t_family = AF_INET; 339 else 340 tp->t_family = AF_INET6; 341 } 342 break; 343 #endif 344 } 345 break; 346 347 /* 348 * Prepare to accept connections. 349 */ 350 case PRU_LISTEN: 351 #ifdef INET 352 if (inp && inp->inp_lport == 0) { 353 error = in_pcbbind(inp, (struct mbuf *)0, 354 (struct proc *)0); 355 if (error) 356 break; 357 } 358 #endif 359 #ifdef INET6 360 if (in6p && in6p->in6p_lport == 0) { 361 error = in6_pcbbind(in6p, (struct mbuf *)0, 362 (struct proc *)0); 363 if (error) 364 break; 365 } 366 #endif 367 tp->t_state = TCPS_LISTEN; 368 break; 369 370 /* 371 * Initiate connection to peer. 372 * Create a template for use in transmissions on this connection. 373 * Enter SYN_SENT state, and mark socket as connecting. 374 * Start keep-alive timer, and seed output sequence space. 375 * Send initial segment on connection. 376 */ 377 case PRU_CONNECT: 378 #ifdef INET 379 if (inp) { 380 if (inp->inp_lport == 0) { 381 error = in_pcbbind(inp, (struct mbuf *)0, 382 (struct proc *)0); 383 if (error) 384 break; 385 } 386 error = in_pcbconnect(inp, nam); 387 } 388 #endif 389 #ifdef INET6 390 if (in6p) { 391 if (in6p->in6p_lport == 0) { 392 error = in6_pcbbind(in6p, (struct mbuf *)0, 393 (struct proc *)0); 394 if (error) 395 break; 396 } 397 error = in6_pcbconnect(in6p, nam); 398 if (!error) { 399 /* mapped addr case */ 400 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 401 tp->t_family = AF_INET; 402 else 403 tp->t_family = AF_INET6; 404 } 405 } 406 #endif 407 if (error) 408 break; 409 tp->t_template = tcp_template(tp); 410 if (tp->t_template == 0) { 411 #ifdef INET 412 if (inp) 413 in_pcbdisconnect(inp); 414 #endif 415 #ifdef INET6 416 if (in6p) 417 in6_pcbdisconnect(in6p); 418 #endif 419 error = ENOBUFS; 420 break; 421 } 422 /* Compute window scaling to request. */ 423 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 424 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 425 tp->request_r_scale++; 426 soisconnecting(so); 427 tcpstat.tcps_connattempt++; 428 tp->t_state = TCPS_SYN_SENT; 429 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 430 tp->iss = tcp_new_iss(tp, 0); 431 tcp_sendseqinit(tp); 432 error = tcp_output(tp); 433 break; 434 435 /* 436 * Create a TCP connection between two sockets. 437 */ 438 case PRU_CONNECT2: 439 error = EOPNOTSUPP; 440 break; 441 442 /* 443 * Initiate disconnect from peer. 444 * If connection never passed embryonic stage, just drop; 445 * else if don't need to let data drain, then can just drop anyways, 446 * else have to begin TCP shutdown process: mark socket disconnecting, 447 * drain unread data, state switch to reflect user close, and 448 * send segment (e.g. FIN) to peer. Socket will be really disconnected 449 * when peer sends FIN and acks ours. 450 * 451 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 452 */ 453 case PRU_DISCONNECT: 454 tp = tcp_disconnect(tp); 455 break; 456 457 /* 458 * Accept a connection. Essentially all the work is 459 * done at higher levels; just return the address 460 * of the peer, storing through addr. 461 */ 462 case PRU_ACCEPT: 463 #ifdef INET 464 if (inp) 465 in_setpeeraddr(inp, nam); 466 #endif 467 #ifdef INET6 468 if (in6p) 469 in6_setpeeraddr(in6p, nam); 470 #endif 471 break; 472 473 /* 474 * Mark the connection as being incapable of further output. 475 */ 476 case PRU_SHUTDOWN: 477 socantsendmore(so); 478 tp = tcp_usrclosed(tp); 479 if (tp) 480 error = tcp_output(tp); 481 break; 482 483 /* 484 * After a receive, possibly send window update to peer. 485 */ 486 case PRU_RCVD: 487 /* 488 * soreceive() calls this function when a user receives 489 * ancillary data on a listening socket. We don't call 490 * tcp_output in such a case, since there is no header 491 * template for a listening socket and hence the kernel 492 * will panic. 493 */ 494 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 495 (void) tcp_output(tp); 496 break; 497 498 /* 499 * Do a send by putting data in output queue and updating urgent 500 * marker if URG set. Possibly send more data. 501 */ 502 case PRU_SEND: 503 if (control && control->m_len) { 504 m_freem(control); 505 m_freem(m); 506 error = EINVAL; 507 break; 508 } 509 sbappend(&so->so_snd, m); 510 error = tcp_output(tp); 511 break; 512 513 /* 514 * Abort the TCP. 515 */ 516 case PRU_ABORT: 517 tp = tcp_drop(tp, ECONNABORTED); 518 break; 519 520 case PRU_SENSE: 521 /* 522 * stat: don't bother with a blocksize. 523 */ 524 splx(s); 525 return (0); 526 527 case PRU_RCVOOB: 528 if (control && control->m_len) { 529 m_freem(control); 530 m_freem(m); 531 error = EINVAL; 532 break; 533 } 534 if ((so->so_oobmark == 0 && 535 (so->so_state & SS_RCVATMARK) == 0) || 536 so->so_options & SO_OOBINLINE || 537 tp->t_oobflags & TCPOOB_HADDATA) { 538 error = EINVAL; 539 break; 540 } 541 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 542 error = EWOULDBLOCK; 543 break; 544 } 545 m->m_len = 1; 546 *mtod(m, caddr_t) = tp->t_iobc; 547 if (((long)nam & MSG_PEEK) == 0) 548 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 549 break; 550 551 case PRU_SENDOOB: 552 if (sbspace(&so->so_snd) < -512) { 553 m_freem(m); 554 error = ENOBUFS; 555 break; 556 } 557 /* 558 * According to RFC961 (Assigned Protocols), 559 * the urgent pointer points to the last octet 560 * of urgent data. We continue, however, 561 * to consider it to indicate the first octet 562 * of data past the urgent section. 563 * Otherwise, snd_up should be one lower. 564 */ 565 sbappend(&so->so_snd, m); 566 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 567 tp->t_force = 1; 568 error = tcp_output(tp); 569 tp->t_force = 0; 570 break; 571 572 case PRU_SOCKADDR: 573 #ifdef INET 574 if (inp) 575 in_setsockaddr(inp, nam); 576 #endif 577 #ifdef INET6 578 if (in6p) 579 in6_setsockaddr(in6p, nam); 580 #endif 581 break; 582 583 case PRU_PEERADDR: 584 #ifdef INET 585 if (inp) 586 in_setpeeraddr(inp, nam); 587 #endif 588 #ifdef INET6 589 if (in6p) 590 in6_setpeeraddr(in6p, nam); 591 #endif 592 break; 593 594 /* 595 * TCP slow timer went off; going through this 596 * routine for tracing's sake. 597 */ 598 case PRU_SLOWTIMO: 599 tp = tcp_timers(tp, (long)nam); 600 req |= (long)nam << 8; /* for debug's sake */ 601 break; 602 603 default: 604 panic("tcp_usrreq"); 605 } 606 if (tp && (so->so_options & SO_DEBUG)) 607 tcp_trace(TA_USER, ostate, tp, NULL, req); 608 609 release: 610 splx(s); 611 return (error); 612 } 613 614 int 615 tcp_ctloutput(op, so, level, optname, mp) 616 int op; 617 struct socket *so; 618 int level, optname; 619 struct mbuf **mp; 620 { 621 int error = 0, s; 622 struct inpcb *inp; 623 #ifdef INET6 624 struct in6pcb *in6p; 625 #endif 626 struct tcpcb *tp; 627 struct mbuf *m; 628 int i; 629 int family; /* family of the socket */ 630 631 family = so->so_proto->pr_domain->dom_family; 632 633 s = splsoftnet(); 634 switch (family) { 635 #ifdef INET 636 case PF_INET: 637 inp = sotoinpcb(so); 638 #ifdef INET6 639 in6p = NULL; 640 #endif 641 break; 642 #endif 643 #ifdef INET6 644 case PF_INET6: 645 inp = NULL; 646 in6p = sotoin6pcb(so); 647 break; 648 #endif 649 default: 650 splx(s); 651 return EAFNOSUPPORT; 652 } 653 #ifndef INET6 654 if (inp == NULL) 655 #else 656 if (inp == NULL && in6p == NULL) 657 #endif 658 { 659 splx(s); 660 if (op == PRCO_SETOPT && *mp) 661 (void) m_free(*mp); 662 return (ECONNRESET); 663 } 664 if (level != IPPROTO_TCP) { 665 switch (family) { 666 #ifdef INET 667 case PF_INET: 668 error = ip_ctloutput(op, so, level, optname, mp); 669 break; 670 #endif 671 #ifdef INET6 672 case PF_INET6: 673 error = ip6_ctloutput(op, so, level, optname, mp); 674 break; 675 #endif 676 } 677 splx(s); 678 return (error); 679 } 680 if (inp) 681 tp = intotcpcb(inp); 682 #ifdef INET6 683 else if (in6p) 684 tp = in6totcpcb(in6p); 685 #endif 686 else 687 tp = NULL; 688 689 switch (op) { 690 691 case PRCO_SETOPT: 692 m = *mp; 693 switch (optname) { 694 695 case TCP_NODELAY: 696 if (m == NULL || m->m_len < sizeof (int)) 697 error = EINVAL; 698 else if (*mtod(m, int *)) 699 tp->t_flags |= TF_NODELAY; 700 else 701 tp->t_flags &= ~TF_NODELAY; 702 break; 703 704 case TCP_MAXSEG: 705 if (m && (i = *mtod(m, int *)) > 0 && 706 i <= tp->t_peermss) 707 tp->t_peermss = i; /* limit on send size */ 708 else 709 error = EINVAL; 710 break; 711 712 default: 713 error = ENOPROTOOPT; 714 break; 715 } 716 if (m) 717 (void) m_free(m); 718 break; 719 720 case PRCO_GETOPT: 721 *mp = m = m_get(M_WAIT, MT_SOOPTS); 722 m->m_len = sizeof(int); 723 724 switch (optname) { 725 case TCP_NODELAY: 726 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 727 break; 728 case TCP_MAXSEG: 729 *mtod(m, int *) = tp->t_peermss; 730 break; 731 default: 732 error = ENOPROTOOPT; 733 break; 734 } 735 break; 736 } 737 splx(s); 738 return (error); 739 } 740 741 #ifndef TCP_SENDSPACE 742 #define TCP_SENDSPACE 1024*16; 743 #endif 744 int tcp_sendspace = TCP_SENDSPACE; 745 #ifndef TCP_RECVSPACE 746 #define TCP_RECVSPACE 1024*16; 747 #endif 748 int tcp_recvspace = TCP_RECVSPACE; 749 750 /* 751 * Attach TCP protocol to socket, allocating 752 * internet protocol control block, tcp control block, 753 * bufer space, and entering LISTEN state if to accept connections. 754 */ 755 int 756 tcp_attach(so) 757 struct socket *so; 758 { 759 struct tcpcb *tp; 760 struct inpcb *inp; 761 #ifdef INET6 762 struct in6pcb *in6p; 763 #endif 764 int error; 765 int family; /* family of the socket */ 766 767 family = so->so_proto->pr_domain->dom_family; 768 769 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 770 error = soreserve(so, tcp_sendspace, tcp_recvspace); 771 if (error) 772 return (error); 773 } 774 switch (family) { 775 #ifdef INET 776 case PF_INET: 777 error = in_pcballoc(so, &tcbtable); 778 if (error) 779 return (error); 780 inp = sotoinpcb(so); 781 #ifdef INET6 782 in6p = NULL; 783 #endif 784 break; 785 #endif 786 #ifdef INET6 787 case PF_INET6: 788 error = in6_pcballoc(so, &tcb6); 789 if (error) 790 return (error); 791 inp = NULL; 792 in6p = sotoin6pcb(so); 793 break; 794 #endif 795 default: 796 return EAFNOSUPPORT; 797 } 798 #ifdef IPSEC 799 #ifdef INET 800 if (inp) { 801 error = ipsec_init_policy(so, &inp->inp_sp); 802 if (error != 0) { 803 in_pcbdetach(inp); 804 return (error); 805 } 806 } 807 #endif 808 #ifdef INET6 809 if (in6p) { 810 error = ipsec_init_policy(so, &in6p->in6p_sp); 811 if (error != 0) { 812 in6_pcbdetach(in6p); 813 return (error); 814 } 815 } 816 #endif 817 #endif /*IPSEC*/ 818 if (inp) 819 tp = tcp_newtcpcb(family, (void *)inp); 820 #ifdef INET6 821 else if (in6p) 822 tp = tcp_newtcpcb(family, (void *)in6p); 823 #endif 824 else 825 tp = NULL; 826 827 if (tp == 0) { 828 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 829 830 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 831 #ifdef INET 832 if (inp) 833 in_pcbdetach(inp); 834 #endif 835 #ifdef INET6 836 if (in6p) 837 in6_pcbdetach(in6p); 838 #endif 839 so->so_state |= nofd; 840 return (ENOBUFS); 841 } 842 tp->t_state = TCPS_CLOSED; 843 return (0); 844 } 845 846 /* 847 * Initiate (or continue) disconnect. 848 * If embryonic state, just send reset (once). 849 * If in ``let data drain'' option and linger null, just drop. 850 * Otherwise (hard), mark socket disconnecting and drop 851 * current input data; switch states based on user close, and 852 * send segment to peer (with FIN). 853 */ 854 struct tcpcb * 855 tcp_disconnect(tp) 856 struct tcpcb *tp; 857 { 858 struct socket *so; 859 860 if (tp->t_inpcb) 861 so = tp->t_inpcb->inp_socket; 862 #ifdef INET6 863 else if (tp->t_in6pcb) 864 so = tp->t_in6pcb->in6p_socket; 865 #endif 866 else 867 so = NULL; 868 869 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 870 tp = tcp_close(tp); 871 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 872 tp = tcp_drop(tp, 0); 873 else { 874 soisdisconnecting(so); 875 sbflush(&so->so_rcv); 876 tp = tcp_usrclosed(tp); 877 if (tp) 878 (void) tcp_output(tp); 879 } 880 return (tp); 881 } 882 883 /* 884 * User issued close, and wish to trail through shutdown states: 885 * if never received SYN, just forget it. If got a SYN from peer, 886 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 887 * If already got a FIN from peer, then almost done; go to LAST_ACK 888 * state. In all other cases, have already sent FIN to peer (e.g. 889 * after PRU_SHUTDOWN), and just have to play tedious game waiting 890 * for peer to send FIN or not respond to keep-alives, etc. 891 * We can let the user exit from the close as soon as the FIN is acked. 892 */ 893 struct tcpcb * 894 tcp_usrclosed(tp) 895 struct tcpcb *tp; 896 { 897 898 switch (tp->t_state) { 899 900 case TCPS_CLOSED: 901 case TCPS_LISTEN: 902 case TCPS_SYN_SENT: 903 tp->t_state = TCPS_CLOSED; 904 tp = tcp_close(tp); 905 break; 906 907 case TCPS_SYN_RECEIVED: 908 case TCPS_ESTABLISHED: 909 tp->t_state = TCPS_FIN_WAIT_1; 910 break; 911 912 case TCPS_CLOSE_WAIT: 913 tp->t_state = TCPS_LAST_ACK; 914 break; 915 } 916 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 917 struct socket *so; 918 if (tp->t_inpcb) 919 so = tp->t_inpcb->inp_socket; 920 #ifdef INET6 921 else if (tp->t_in6pcb) 922 so = tp->t_in6pcb->in6p_socket; 923 #endif 924 else 925 so = NULL; 926 soisdisconnected(so); 927 /* 928 * If we are in FIN_WAIT_2, we arrived here because the 929 * application did a shutdown of the send side. Like the 930 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 931 * a full close, we start a timer to make sure sockets are 932 * not left in FIN_WAIT_2 forever. 933 */ 934 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 935 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 936 } 937 return (tp); 938 } 939 940 static const struct { 941 unsigned int valid : 1; 942 unsigned int rdonly : 1; 943 int *var; 944 int val; 945 } tcp_ctlvars[] = TCPCTL_VARIABLES; 946 947 /* 948 * Sysctl for tcp variables. 949 */ 950 int 951 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 952 int *name; 953 u_int namelen; 954 void *oldp; 955 size_t *oldlenp; 956 void *newp; 957 size_t newlen; 958 { 959 960 /* All sysctl names at this level are terminal. */ 961 if (namelen != 1) 962 return (ENOTDIR); 963 964 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 965 && tcp_ctlvars[name[0]].valid) { 966 if (tcp_ctlvars[name[0]].rdonly) 967 return (sysctl_rdint(oldp, oldlenp, newp, 968 tcp_ctlvars[name[0]].val)); 969 else 970 return (sysctl_int(oldp, oldlenp, newp, newlen, 971 tcp_ctlvars[name[0]].var)); 972 } 973 974 return (ENOPROTOOPT); 975 } 976