1 /* $NetBSD: tcp_usrreq.c,v 1.64 2001/07/25 23:28:02 itojun Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 #include "opt_tcp_debug.h" 107 108 #include <sys/param.h> 109 #include <sys/systm.h> 110 #include <sys/kernel.h> 111 #include <sys/malloc.h> 112 #include <sys/mbuf.h> 113 #include <sys/socket.h> 114 #include <sys/socketvar.h> 115 #include <sys/protosw.h> 116 #include <sys/errno.h> 117 #include <sys/stat.h> 118 #include <sys/proc.h> 119 #include <sys/ucred.h> 120 #include <sys/domain.h> 121 122 #include <uvm/uvm_extern.h> 123 #include <sys/sysctl.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #endif 143 144 #include <netinet/tcp.h> 145 #include <netinet/tcp_fsm.h> 146 #include <netinet/tcp_seq.h> 147 #include <netinet/tcp_timer.h> 148 #include <netinet/tcp_var.h> 149 #include <netinet/tcpip.h> 150 #include <netinet/tcp_debug.h> 151 152 #include "opt_tcp_recvspace.h" 153 #include "opt_tcp_sendspace.h" 154 155 #ifdef IPSEC 156 #include <netinet6/ipsec.h> 157 #endif /*IPSEC*/ 158 159 /* 160 * TCP protocol interface to socket abstraction. 161 */ 162 extern char *tcpstates[]; 163 164 /* 165 * Process a TCP user request for TCP tb. If this is a send request 166 * then m is the mbuf chain of send data. If this is a timer expiration 167 * (called from the software clock routine), then timertype tells which timer. 168 */ 169 /*ARGSUSED*/ 170 int 171 tcp_usrreq(so, req, m, nam, control, p) 172 struct socket *so; 173 int req; 174 struct mbuf *m, *nam, *control; 175 struct proc *p; 176 { 177 struct inpcb *inp; 178 #ifdef INET6 179 struct in6pcb *in6p; 180 #endif 181 struct tcpcb *tp = NULL; 182 int s; 183 int error = 0; 184 int ostate; 185 int family; /* family of the socket */ 186 187 family = so->so_proto->pr_domain->dom_family; 188 189 if (req == PRU_CONTROL) { 190 switch (family) { 191 #ifdef INET 192 case PF_INET: 193 return (in_control(so, (long)m, (caddr_t)nam, 194 (struct ifnet *)control, p)); 195 #endif 196 #ifdef INET6 197 case PF_INET6: 198 return (in6_control(so, (long)m, (caddr_t)nam, 199 (struct ifnet *)control, p)); 200 #endif 201 default: 202 return EAFNOSUPPORT; 203 } 204 } 205 206 if (req == PRU_PURGEIF) { 207 switch (family) { 208 #ifdef INET 209 case PF_INET: 210 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 211 in_purgeif((struct ifnet *)control); 212 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 213 break; 214 #endif 215 #ifdef INET6 216 case PF_INET6: 217 in6_pcbpurgeif0(&tcb6, (struct ifnet *)control); 218 in6_purgeif((struct ifnet *)control); 219 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 220 break; 221 #endif 222 default: 223 return (EAFNOSUPPORT); 224 } 225 return (0); 226 } 227 228 s = splsoftnet(); 229 switch (family) { 230 #ifdef INET 231 case PF_INET: 232 inp = sotoinpcb(so); 233 #ifdef INET6 234 in6p = NULL; 235 #endif 236 break; 237 #endif 238 #ifdef INET6 239 case PF_INET6: 240 inp = NULL; 241 in6p = sotoin6pcb(so); 242 break; 243 #endif 244 default: 245 splx(s); 246 return EAFNOSUPPORT; 247 } 248 249 #ifdef DIAGNOSTIC 250 #ifdef INET6 251 if (inp && in6p) 252 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 253 #endif 254 if (req != PRU_SEND && req != PRU_SENDOOB && control) 255 panic("tcp_usrreq: unexpected control mbuf"); 256 #endif 257 /* 258 * When a TCP is attached to a socket, then there will be 259 * a (struct inpcb) pointed at by the socket, and this 260 * structure will point at a subsidary (struct tcpcb). 261 */ 262 #ifndef INET6 263 if (inp == 0 && req != PRU_ATTACH) 264 #else 265 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 266 #endif 267 { 268 error = EINVAL; 269 goto release; 270 } 271 #ifdef INET 272 if (inp) { 273 tp = intotcpcb(inp); 274 /* WHAT IF TP IS 0? */ 275 #ifdef KPROF 276 tcp_acounts[tp->t_state][req]++; 277 #endif 278 ostate = tp->t_state; 279 } 280 #endif 281 #ifdef INET6 282 if (in6p) { 283 tp = in6totcpcb(in6p); 284 /* WHAT IF TP IS 0? */ 285 #ifdef KPROF 286 tcp_acounts[tp->t_state][req]++; 287 #endif 288 ostate = tp->t_state; 289 } 290 #endif 291 else 292 ostate = 0; 293 294 switch (req) { 295 296 /* 297 * TCP attaches to socket via PRU_ATTACH, reserving space, 298 * and an internet control block. 299 */ 300 case PRU_ATTACH: 301 #ifndef INET6 302 if (inp != 0) 303 #else 304 if (inp != 0 || in6p != 0) 305 #endif 306 { 307 error = EISCONN; 308 break; 309 } 310 error = tcp_attach(so); 311 if (error) 312 break; 313 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 314 so->so_linger = TCP_LINGERTIME; 315 tp = sototcpcb(so); 316 break; 317 318 /* 319 * PRU_DETACH detaches the TCP protocol from the socket. 320 */ 321 case PRU_DETACH: 322 tp = tcp_disconnect(tp); 323 break; 324 325 /* 326 * Give the socket an address. 327 */ 328 case PRU_BIND: 329 switch (family) { 330 #ifdef INET 331 case PF_INET: 332 error = in_pcbbind(inp, nam, p); 333 break; 334 #endif 335 #ifdef INET6 336 case PF_INET6: 337 error = in6_pcbbind(in6p, nam, p); 338 if (!error) { 339 /* mapped addr case */ 340 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 341 tp->t_family = AF_INET; 342 else 343 tp->t_family = AF_INET6; 344 } 345 break; 346 #endif 347 } 348 break; 349 350 /* 351 * Prepare to accept connections. 352 */ 353 case PRU_LISTEN: 354 #ifdef INET 355 if (inp && inp->inp_lport == 0) { 356 error = in_pcbbind(inp, (struct mbuf *)0, 357 (struct proc *)0); 358 if (error) 359 break; 360 } 361 #endif 362 #ifdef INET6 363 if (in6p && in6p->in6p_lport == 0) { 364 error = in6_pcbbind(in6p, (struct mbuf *)0, 365 (struct proc *)0); 366 if (error) 367 break; 368 } 369 #endif 370 tp->t_state = TCPS_LISTEN; 371 break; 372 373 /* 374 * Initiate connection to peer. 375 * Create a template for use in transmissions on this connection. 376 * Enter SYN_SENT state, and mark socket as connecting. 377 * Start keep-alive timer, and seed output sequence space. 378 * Send initial segment on connection. 379 */ 380 case PRU_CONNECT: 381 #ifdef INET 382 if (inp) { 383 if (inp->inp_lport == 0) { 384 error = in_pcbbind(inp, (struct mbuf *)0, 385 (struct proc *)0); 386 if (error) 387 break; 388 } 389 error = in_pcbconnect(inp, nam); 390 } 391 #endif 392 #ifdef INET6 393 if (in6p) { 394 if (in6p->in6p_lport == 0) { 395 error = in6_pcbbind(in6p, (struct mbuf *)0, 396 (struct proc *)0); 397 if (error) 398 break; 399 } 400 error = in6_pcbconnect(in6p, nam); 401 if (!error) { 402 /* mapped addr case */ 403 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 404 tp->t_family = AF_INET; 405 else 406 tp->t_family = AF_INET6; 407 } 408 } 409 #endif 410 if (error) 411 break; 412 tp->t_template = tcp_template(tp); 413 if (tp->t_template == 0) { 414 #ifdef INET 415 if (inp) 416 in_pcbdisconnect(inp); 417 #endif 418 #ifdef INET6 419 if (in6p) 420 in6_pcbdisconnect(in6p); 421 #endif 422 error = ENOBUFS; 423 break; 424 } 425 /* Compute window scaling to request. */ 426 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 427 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 428 tp->request_r_scale++; 429 soisconnecting(so); 430 tcpstat.tcps_connattempt++; 431 tp->t_state = TCPS_SYN_SENT; 432 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 433 tp->iss = tcp_new_iss(tp, 0); 434 tcp_sendseqinit(tp); 435 error = tcp_output(tp); 436 break; 437 438 /* 439 * Create a TCP connection between two sockets. 440 */ 441 case PRU_CONNECT2: 442 error = EOPNOTSUPP; 443 break; 444 445 /* 446 * Initiate disconnect from peer. 447 * If connection never passed embryonic stage, just drop; 448 * else if don't need to let data drain, then can just drop anyways, 449 * else have to begin TCP shutdown process: mark socket disconnecting, 450 * drain unread data, state switch to reflect user close, and 451 * send segment (e.g. FIN) to peer. Socket will be really disconnected 452 * when peer sends FIN and acks ours. 453 * 454 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 455 */ 456 case PRU_DISCONNECT: 457 tp = tcp_disconnect(tp); 458 break; 459 460 /* 461 * Accept a connection. Essentially all the work is 462 * done at higher levels; just return the address 463 * of the peer, storing through addr. 464 */ 465 case PRU_ACCEPT: 466 #ifdef INET 467 if (inp) 468 in_setpeeraddr(inp, nam); 469 #endif 470 #ifdef INET6 471 if (in6p) 472 in6_setpeeraddr(in6p, nam); 473 #endif 474 break; 475 476 /* 477 * Mark the connection as being incapable of further output. 478 */ 479 case PRU_SHUTDOWN: 480 socantsendmore(so); 481 tp = tcp_usrclosed(tp); 482 if (tp) 483 error = tcp_output(tp); 484 break; 485 486 /* 487 * After a receive, possibly send window update to peer. 488 */ 489 case PRU_RCVD: 490 /* 491 * soreceive() calls this function when a user receives 492 * ancillary data on a listening socket. We don't call 493 * tcp_output in such a case, since there is no header 494 * template for a listening socket and hence the kernel 495 * will panic. 496 */ 497 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 498 (void) tcp_output(tp); 499 break; 500 501 /* 502 * Do a send by putting data in output queue and updating urgent 503 * marker if URG set. Possibly send more data. 504 */ 505 case PRU_SEND: 506 if (control && control->m_len) { 507 m_freem(control); 508 m_freem(m); 509 error = EINVAL; 510 break; 511 } 512 sbappend(&so->so_snd, m); 513 error = tcp_output(tp); 514 break; 515 516 /* 517 * Abort the TCP. 518 */ 519 case PRU_ABORT: 520 tp = tcp_drop(tp, ECONNABORTED); 521 break; 522 523 case PRU_SENSE: 524 /* 525 * stat: don't bother with a blocksize. 526 */ 527 splx(s); 528 return (0); 529 530 case PRU_RCVOOB: 531 if (control && control->m_len) { 532 m_freem(control); 533 m_freem(m); 534 error = EINVAL; 535 break; 536 } 537 if ((so->so_oobmark == 0 && 538 (so->so_state & SS_RCVATMARK) == 0) || 539 so->so_options & SO_OOBINLINE || 540 tp->t_oobflags & TCPOOB_HADDATA) { 541 error = EINVAL; 542 break; 543 } 544 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 545 error = EWOULDBLOCK; 546 break; 547 } 548 m->m_len = 1; 549 *mtod(m, caddr_t) = tp->t_iobc; 550 if (((long)nam & MSG_PEEK) == 0) 551 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 552 break; 553 554 case PRU_SENDOOB: 555 if (sbspace(&so->so_snd) < -512) { 556 m_freem(m); 557 error = ENOBUFS; 558 break; 559 } 560 /* 561 * According to RFC961 (Assigned Protocols), 562 * the urgent pointer points to the last octet 563 * of urgent data. We continue, however, 564 * to consider it to indicate the first octet 565 * of data past the urgent section. 566 * Otherwise, snd_up should be one lower. 567 */ 568 sbappend(&so->so_snd, m); 569 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 570 tp->t_force = 1; 571 error = tcp_output(tp); 572 tp->t_force = 0; 573 break; 574 575 case PRU_SOCKADDR: 576 #ifdef INET 577 if (inp) 578 in_setsockaddr(inp, nam); 579 #endif 580 #ifdef INET6 581 if (in6p) 582 in6_setsockaddr(in6p, nam); 583 #endif 584 break; 585 586 case PRU_PEERADDR: 587 #ifdef INET 588 if (inp) 589 in_setpeeraddr(inp, nam); 590 #endif 591 #ifdef INET6 592 if (in6p) 593 in6_setpeeraddr(in6p, nam); 594 #endif 595 break; 596 597 /* 598 * TCP slow timer went off; going through this 599 * routine for tracing's sake. 600 */ 601 case PRU_SLOWTIMO: 602 tp = tcp_timers(tp, (long)nam); 603 req |= (long)nam << 8; /* for debug's sake */ 604 break; 605 606 default: 607 panic("tcp_usrreq"); 608 } 609 #ifdef TCP_DEBUG 610 if (tp && (so->so_options & SO_DEBUG)) 611 tcp_trace(TA_USER, ostate, tp, NULL, req); 612 #endif 613 614 release: 615 splx(s); 616 return (error); 617 } 618 619 int 620 tcp_ctloutput(op, so, level, optname, mp) 621 int op; 622 struct socket *so; 623 int level, optname; 624 struct mbuf **mp; 625 { 626 int error = 0, s; 627 struct inpcb *inp; 628 #ifdef INET6 629 struct in6pcb *in6p; 630 #endif 631 struct tcpcb *tp; 632 struct mbuf *m; 633 int i; 634 int family; /* family of the socket */ 635 636 family = so->so_proto->pr_domain->dom_family; 637 638 s = splsoftnet(); 639 switch (family) { 640 #ifdef INET 641 case PF_INET: 642 inp = sotoinpcb(so); 643 #ifdef INET6 644 in6p = NULL; 645 #endif 646 break; 647 #endif 648 #ifdef INET6 649 case PF_INET6: 650 inp = NULL; 651 in6p = sotoin6pcb(so); 652 break; 653 #endif 654 default: 655 splx(s); 656 return EAFNOSUPPORT; 657 } 658 #ifndef INET6 659 if (inp == NULL) 660 #else 661 if (inp == NULL && in6p == NULL) 662 #endif 663 { 664 splx(s); 665 if (op == PRCO_SETOPT && *mp) 666 (void) m_free(*mp); 667 return (ECONNRESET); 668 } 669 if (level != IPPROTO_TCP) { 670 switch (family) { 671 #ifdef INET 672 case PF_INET: 673 error = ip_ctloutput(op, so, level, optname, mp); 674 break; 675 #endif 676 #ifdef INET6 677 case PF_INET6: 678 error = ip6_ctloutput(op, so, level, optname, mp); 679 break; 680 #endif 681 } 682 splx(s); 683 return (error); 684 } 685 if (inp) 686 tp = intotcpcb(inp); 687 #ifdef INET6 688 else if (in6p) 689 tp = in6totcpcb(in6p); 690 #endif 691 else 692 tp = NULL; 693 694 switch (op) { 695 696 case PRCO_SETOPT: 697 m = *mp; 698 switch (optname) { 699 700 case TCP_NODELAY: 701 if (m == NULL || m->m_len < sizeof (int)) 702 error = EINVAL; 703 else if (*mtod(m, int *)) 704 tp->t_flags |= TF_NODELAY; 705 else 706 tp->t_flags &= ~TF_NODELAY; 707 break; 708 709 case TCP_MAXSEG: 710 if (m && (i = *mtod(m, int *)) > 0 && 711 i <= tp->t_peermss) 712 tp->t_peermss = i; /* limit on send size */ 713 else 714 error = EINVAL; 715 break; 716 717 default: 718 error = ENOPROTOOPT; 719 break; 720 } 721 if (m) 722 (void) m_free(m); 723 break; 724 725 case PRCO_GETOPT: 726 *mp = m = m_get(M_WAIT, MT_SOOPTS); 727 m->m_len = sizeof(int); 728 729 switch (optname) { 730 case TCP_NODELAY: 731 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 732 break; 733 case TCP_MAXSEG: 734 *mtod(m, int *) = tp->t_peermss; 735 break; 736 default: 737 error = ENOPROTOOPT; 738 break; 739 } 740 break; 741 } 742 splx(s); 743 return (error); 744 } 745 746 #ifndef TCP_SENDSPACE 747 #define TCP_SENDSPACE 1024*16; 748 #endif 749 int tcp_sendspace = TCP_SENDSPACE; 750 #ifndef TCP_RECVSPACE 751 #define TCP_RECVSPACE 1024*16; 752 #endif 753 int tcp_recvspace = TCP_RECVSPACE; 754 755 /* 756 * Attach TCP protocol to socket, allocating 757 * internet protocol control block, tcp control block, 758 * bufer space, and entering LISTEN state if to accept connections. 759 */ 760 int 761 tcp_attach(so) 762 struct socket *so; 763 { 764 struct tcpcb *tp; 765 struct inpcb *inp; 766 #ifdef INET6 767 struct in6pcb *in6p; 768 #endif 769 int error; 770 int family; /* family of the socket */ 771 772 family = so->so_proto->pr_domain->dom_family; 773 774 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 775 error = soreserve(so, tcp_sendspace, tcp_recvspace); 776 if (error) 777 return (error); 778 } 779 switch (family) { 780 #ifdef INET 781 case PF_INET: 782 error = in_pcballoc(so, &tcbtable); 783 if (error) 784 return (error); 785 inp = sotoinpcb(so); 786 #ifdef INET6 787 in6p = NULL; 788 #endif 789 break; 790 #endif 791 #ifdef INET6 792 case PF_INET6: 793 error = in6_pcballoc(so, &tcb6); 794 if (error) 795 return (error); 796 inp = NULL; 797 in6p = sotoin6pcb(so); 798 break; 799 #endif 800 default: 801 return EAFNOSUPPORT; 802 } 803 if (inp) 804 tp = tcp_newtcpcb(family, (void *)inp); 805 #ifdef INET6 806 else if (in6p) 807 tp = tcp_newtcpcb(family, (void *)in6p); 808 #endif 809 else 810 tp = NULL; 811 812 if (tp == 0) { 813 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 814 815 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 816 #ifdef INET 817 if (inp) 818 in_pcbdetach(inp); 819 #endif 820 #ifdef INET6 821 if (in6p) 822 in6_pcbdetach(in6p); 823 #endif 824 so->so_state |= nofd; 825 return (ENOBUFS); 826 } 827 tp->t_state = TCPS_CLOSED; 828 return (0); 829 } 830 831 /* 832 * Initiate (or continue) disconnect. 833 * If embryonic state, just send reset (once). 834 * If in ``let data drain'' option and linger null, just drop. 835 * Otherwise (hard), mark socket disconnecting and drop 836 * current input data; switch states based on user close, and 837 * send segment to peer (with FIN). 838 */ 839 struct tcpcb * 840 tcp_disconnect(tp) 841 struct tcpcb *tp; 842 { 843 struct socket *so; 844 845 if (tp->t_inpcb) 846 so = tp->t_inpcb->inp_socket; 847 #ifdef INET6 848 else if (tp->t_in6pcb) 849 so = tp->t_in6pcb->in6p_socket; 850 #endif 851 else 852 so = NULL; 853 854 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 855 tp = tcp_close(tp); 856 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 857 tp = tcp_drop(tp, 0); 858 else { 859 soisdisconnecting(so); 860 sbflush(&so->so_rcv); 861 tp = tcp_usrclosed(tp); 862 if (tp) 863 (void) tcp_output(tp); 864 } 865 return (tp); 866 } 867 868 /* 869 * User issued close, and wish to trail through shutdown states: 870 * if never received SYN, just forget it. If got a SYN from peer, 871 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 872 * If already got a FIN from peer, then almost done; go to LAST_ACK 873 * state. In all other cases, have already sent FIN to peer (e.g. 874 * after PRU_SHUTDOWN), and just have to play tedious game waiting 875 * for peer to send FIN or not respond to keep-alives, etc. 876 * We can let the user exit from the close as soon as the FIN is acked. 877 */ 878 struct tcpcb * 879 tcp_usrclosed(tp) 880 struct tcpcb *tp; 881 { 882 883 switch (tp->t_state) { 884 885 case TCPS_CLOSED: 886 case TCPS_LISTEN: 887 case TCPS_SYN_SENT: 888 tp->t_state = TCPS_CLOSED; 889 tp = tcp_close(tp); 890 break; 891 892 case TCPS_SYN_RECEIVED: 893 case TCPS_ESTABLISHED: 894 tp->t_state = TCPS_FIN_WAIT_1; 895 break; 896 897 case TCPS_CLOSE_WAIT: 898 tp->t_state = TCPS_LAST_ACK; 899 break; 900 } 901 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 902 struct socket *so; 903 if (tp->t_inpcb) 904 so = tp->t_inpcb->inp_socket; 905 #ifdef INET6 906 else if (tp->t_in6pcb) 907 so = tp->t_in6pcb->in6p_socket; 908 #endif 909 else 910 so = NULL; 911 soisdisconnected(so); 912 /* 913 * If we are in FIN_WAIT_2, we arrived here because the 914 * application did a shutdown of the send side. Like the 915 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 916 * a full close, we start a timer to make sure sockets are 917 * not left in FIN_WAIT_2 forever. 918 */ 919 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 920 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 921 } 922 return (tp); 923 } 924 925 static const struct { 926 unsigned int valid : 1; 927 unsigned int rdonly : 1; 928 int *var; 929 int val; 930 } tcp_ctlvars[] = TCPCTL_VARIABLES; 931 932 /* 933 * Sysctl for tcp variables. 934 */ 935 int 936 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 937 int *name; 938 u_int namelen; 939 void *oldp; 940 size_t *oldlenp; 941 void *newp; 942 size_t newlen; 943 { 944 945 /* All sysctl names at this level are terminal. */ 946 if (namelen != 1) 947 return (ENOTDIR); 948 949 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 950 && tcp_ctlvars[name[0]].valid) { 951 if (tcp_ctlvars[name[0]].rdonly) 952 return (sysctl_rdint(oldp, oldlenp, newp, 953 tcp_ctlvars[name[0]].val)); 954 else 955 return (sysctl_int(oldp, oldlenp, newp, newlen, 956 tcp_ctlvars[name[0]].var)); 957 } 958 959 return (ENOPROTOOPT); 960 } 961