1 /* $NetBSD: tcp_usrreq.c,v 1.84 2003/09/29 21:39:35 tls Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. Neither the name of the University nor the names of its contributors 82 * may be used to endorse or promote products derived from this software 83 * without specific prior written permission. 84 * 85 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 86 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 87 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 88 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 89 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 90 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 91 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 92 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 93 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 94 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 95 * SUCH DAMAGE. 96 * 97 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 98 */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.84 2003/09/29 21:39:35 tls Exp $"); 102 103 #include "opt_inet.h" 104 #include "opt_ipsec.h" 105 #include "opt_tcp_debug.h" 106 #include "opt_mbuftrace.h" 107 108 #include <sys/param.h> 109 #include <sys/systm.h> 110 #include <sys/kernel.h> 111 #include <sys/malloc.h> 112 #include <sys/mbuf.h> 113 #include <sys/socket.h> 114 #include <sys/socketvar.h> 115 #include <sys/protosw.h> 116 #include <sys/errno.h> 117 #include <sys/stat.h> 118 #include <sys/proc.h> 119 #include <sys/domain.h> 120 #include <sys/sysctl.h> 121 122 #include <net/if.h> 123 #include <net/route.h> 124 125 #include <netinet/in.h> 126 #include <netinet/in_systm.h> 127 #include <netinet/in_var.h> 128 #include <netinet/ip.h> 129 #include <netinet/in_pcb.h> 130 #include <netinet/ip_var.h> 131 132 #ifdef INET6 133 #ifndef INET 134 #include <netinet/in.h> 135 #endif 136 #include <netinet/ip6.h> 137 #include <netinet6/in6_pcb.h> 138 #include <netinet6/ip6_var.h> 139 #endif 140 141 #include <netinet/tcp.h> 142 #include <netinet/tcp_fsm.h> 143 #include <netinet/tcp_seq.h> 144 #include <netinet/tcp_timer.h> 145 #include <netinet/tcp_var.h> 146 #include <netinet/tcpip.h> 147 #include <netinet/tcp_debug.h> 148 149 #include "opt_tcp_space.h" 150 151 #ifdef IPSEC 152 #include <netinet6/ipsec.h> 153 #endif /*IPSEC*/ 154 155 /* 156 * TCP protocol interface to socket abstraction. 157 */ 158 extern char *tcpstates[]; 159 160 static int tcp_sysctl_ident(int *, u_int, void *, size_t *, void *, size_t); 161 162 /* 163 * Process a TCP user request for TCP tb. If this is a send request 164 * then m is the mbuf chain of send data. If this is a timer expiration 165 * (called from the software clock routine), then timertype tells which timer. 166 */ 167 /*ARGSUSED*/ 168 int 169 tcp_usrreq(so, req, m, nam, control, p) 170 struct socket *so; 171 int req; 172 struct mbuf *m, *nam, *control; 173 struct proc *p; 174 { 175 struct inpcb *inp; 176 #ifdef INET6 177 struct in6pcb *in6p; 178 #endif 179 struct tcpcb *tp = NULL; 180 int s; 181 int error = 0; 182 #ifdef TCP_DEBUG 183 int ostate = 0; 184 #endif 185 int family; /* family of the socket */ 186 187 family = so->so_proto->pr_domain->dom_family; 188 189 if (req == PRU_CONTROL) { 190 switch (family) { 191 #ifdef INET 192 case PF_INET: 193 return (in_control(so, (long)m, (caddr_t)nam, 194 (struct ifnet *)control, p)); 195 #endif 196 #ifdef INET6 197 case PF_INET6: 198 return (in6_control(so, (long)m, (caddr_t)nam, 199 (struct ifnet *)control, p)); 200 #endif 201 default: 202 return EAFNOSUPPORT; 203 } 204 } 205 206 if (req == PRU_PURGEIF) { 207 switch (family) { 208 #ifdef INET 209 case PF_INET: 210 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 211 in_purgeif((struct ifnet *)control); 212 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 213 break; 214 #endif 215 #ifdef INET6 216 case PF_INET6: 217 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 218 in6_purgeif((struct ifnet *)control); 219 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 220 break; 221 #endif 222 default: 223 return (EAFNOSUPPORT); 224 } 225 return (0); 226 } 227 228 s = splsoftnet(); 229 switch (family) { 230 #ifdef INET 231 case PF_INET: 232 inp = sotoinpcb(so); 233 #ifdef INET6 234 in6p = NULL; 235 #endif 236 break; 237 #endif 238 #ifdef INET6 239 case PF_INET6: 240 inp = NULL; 241 in6p = sotoin6pcb(so); 242 break; 243 #endif 244 default: 245 splx(s); 246 return EAFNOSUPPORT; 247 } 248 249 #ifdef DIAGNOSTIC 250 #ifdef INET6 251 if (inp && in6p) 252 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 253 #endif 254 if (req != PRU_SEND && req != PRU_SENDOOB && control) 255 panic("tcp_usrreq: unexpected control mbuf"); 256 #endif 257 /* 258 * When a TCP is attached to a socket, then there will be 259 * a (struct inpcb) pointed at by the socket, and this 260 * structure will point at a subsidary (struct tcpcb). 261 */ 262 #ifndef INET6 263 if (inp == 0 && req != PRU_ATTACH) 264 #else 265 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 266 #endif 267 { 268 error = EINVAL; 269 goto release; 270 } 271 #ifdef INET 272 if (inp) { 273 tp = intotcpcb(inp); 274 /* WHAT IF TP IS 0? */ 275 #ifdef KPROF 276 tcp_acounts[tp->t_state][req]++; 277 #endif 278 #ifdef TCP_DEBUG 279 ostate = tp->t_state; 280 #endif 281 } 282 #endif 283 #ifdef INET6 284 if (in6p) { 285 tp = in6totcpcb(in6p); 286 /* WHAT IF TP IS 0? */ 287 #ifdef KPROF 288 tcp_acounts[tp->t_state][req]++; 289 #endif 290 #ifdef TCP_DEBUG 291 ostate = tp->t_state; 292 #endif 293 } 294 #endif 295 296 switch (req) { 297 298 /* 299 * TCP attaches to socket via PRU_ATTACH, reserving space, 300 * and an internet control block. 301 */ 302 case PRU_ATTACH: 303 #ifndef INET6 304 if (inp != 0) 305 #else 306 if (inp != 0 || in6p != 0) 307 #endif 308 { 309 error = EISCONN; 310 break; 311 } 312 error = tcp_attach(so); 313 if (error) 314 break; 315 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 316 so->so_linger = TCP_LINGERTIME; 317 tp = sototcpcb(so); 318 break; 319 320 /* 321 * PRU_DETACH detaches the TCP protocol from the socket. 322 */ 323 case PRU_DETACH: 324 tp = tcp_disconnect(tp); 325 break; 326 327 /* 328 * Give the socket an address. 329 */ 330 case PRU_BIND: 331 switch (family) { 332 #ifdef INET 333 case PF_INET: 334 error = in_pcbbind(inp, nam, p); 335 break; 336 #endif 337 #ifdef INET6 338 case PF_INET6: 339 error = in6_pcbbind(in6p, nam, p); 340 if (!error) { 341 /* mapped addr case */ 342 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 343 tp->t_family = AF_INET; 344 else 345 tp->t_family = AF_INET6; 346 } 347 break; 348 #endif 349 } 350 break; 351 352 /* 353 * Prepare to accept connections. 354 */ 355 case PRU_LISTEN: 356 #ifdef INET 357 if (inp && inp->inp_lport == 0) { 358 error = in_pcbbind(inp, (struct mbuf *)0, 359 (struct proc *)0); 360 if (error) 361 break; 362 } 363 #endif 364 #ifdef INET6 365 if (in6p && in6p->in6p_lport == 0) { 366 error = in6_pcbbind(in6p, (struct mbuf *)0, 367 (struct proc *)0); 368 if (error) 369 break; 370 } 371 #endif 372 tp->t_state = TCPS_LISTEN; 373 break; 374 375 /* 376 * Initiate connection to peer. 377 * Create a template for use in transmissions on this connection. 378 * Enter SYN_SENT state, and mark socket as connecting. 379 * Start keep-alive timer, and seed output sequence space. 380 * Send initial segment on connection. 381 */ 382 case PRU_CONNECT: 383 #ifdef INET 384 if (inp) { 385 if (inp->inp_lport == 0) { 386 error = in_pcbbind(inp, (struct mbuf *)0, 387 (struct proc *)0); 388 if (error) 389 break; 390 } 391 error = in_pcbconnect(inp, nam); 392 } 393 #endif 394 #ifdef INET6 395 if (in6p) { 396 if (in6p->in6p_lport == 0) { 397 error = in6_pcbbind(in6p, (struct mbuf *)0, 398 (struct proc *)0); 399 if (error) 400 break; 401 } 402 error = in6_pcbconnect(in6p, nam); 403 if (!error) { 404 /* mapped addr case */ 405 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 406 tp->t_family = AF_INET; 407 else 408 tp->t_family = AF_INET6; 409 } 410 } 411 #endif 412 if (error) 413 break; 414 tp->t_template = tcp_template(tp); 415 if (tp->t_template == 0) { 416 #ifdef INET 417 if (inp) 418 in_pcbdisconnect(inp); 419 #endif 420 #ifdef INET6 421 if (in6p) 422 in6_pcbdisconnect(in6p); 423 #endif 424 error = ENOBUFS; 425 break; 426 } 427 /* Compute window scaling to request. */ 428 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 429 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 430 tp->request_r_scale++; 431 soisconnecting(so); 432 tcpstat.tcps_connattempt++; 433 tp->t_state = TCPS_SYN_SENT; 434 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 435 tp->iss = tcp_new_iss(tp, 0); 436 tcp_sendseqinit(tp); 437 error = tcp_output(tp); 438 break; 439 440 /* 441 * Create a TCP connection between two sockets. 442 */ 443 case PRU_CONNECT2: 444 error = EOPNOTSUPP; 445 break; 446 447 /* 448 * Initiate disconnect from peer. 449 * If connection never passed embryonic stage, just drop; 450 * else if don't need to let data drain, then can just drop anyways, 451 * else have to begin TCP shutdown process: mark socket disconnecting, 452 * drain unread data, state switch to reflect user close, and 453 * send segment (e.g. FIN) to peer. Socket will be really disconnected 454 * when peer sends FIN and acks ours. 455 * 456 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 457 */ 458 case PRU_DISCONNECT: 459 tp = tcp_disconnect(tp); 460 break; 461 462 /* 463 * Accept a connection. Essentially all the work is 464 * done at higher levels; just return the address 465 * of the peer, storing through addr. 466 */ 467 case PRU_ACCEPT: 468 #ifdef INET 469 if (inp) 470 in_setpeeraddr(inp, nam); 471 #endif 472 #ifdef INET6 473 if (in6p) 474 in6_setpeeraddr(in6p, nam); 475 #endif 476 break; 477 478 /* 479 * Mark the connection as being incapable of further output. 480 */ 481 case PRU_SHUTDOWN: 482 socantsendmore(so); 483 tp = tcp_usrclosed(tp); 484 if (tp) 485 error = tcp_output(tp); 486 break; 487 488 /* 489 * After a receive, possibly send window update to peer. 490 */ 491 case PRU_RCVD: 492 /* 493 * soreceive() calls this function when a user receives 494 * ancillary data on a listening socket. We don't call 495 * tcp_output in such a case, since there is no header 496 * template for a listening socket and hence the kernel 497 * will panic. 498 */ 499 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 500 (void) tcp_output(tp); 501 break; 502 503 /* 504 * Do a send by putting data in output queue and updating urgent 505 * marker if URG set. Possibly send more data. 506 */ 507 case PRU_SEND: 508 if (control && control->m_len) { 509 m_freem(control); 510 m_freem(m); 511 error = EINVAL; 512 break; 513 } 514 sbappendstream(&so->so_snd, m); 515 error = tcp_output(tp); 516 break; 517 518 /* 519 * Abort the TCP. 520 */ 521 case PRU_ABORT: 522 tp = tcp_drop(tp, ECONNABORTED); 523 break; 524 525 case PRU_SENSE: 526 /* 527 * stat: don't bother with a blocksize. 528 */ 529 splx(s); 530 return (0); 531 532 case PRU_RCVOOB: 533 if (control && control->m_len) { 534 m_freem(control); 535 m_freem(m); 536 error = EINVAL; 537 break; 538 } 539 if ((so->so_oobmark == 0 && 540 (so->so_state & SS_RCVATMARK) == 0) || 541 so->so_options & SO_OOBINLINE || 542 tp->t_oobflags & TCPOOB_HADDATA) { 543 error = EINVAL; 544 break; 545 } 546 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 547 error = EWOULDBLOCK; 548 break; 549 } 550 m->m_len = 1; 551 *mtod(m, caddr_t) = tp->t_iobc; 552 if (((long)nam & MSG_PEEK) == 0) 553 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 554 break; 555 556 case PRU_SENDOOB: 557 if (sbspace(&so->so_snd) < -512) { 558 m_freem(m); 559 error = ENOBUFS; 560 break; 561 } 562 /* 563 * According to RFC961 (Assigned Protocols), 564 * the urgent pointer points to the last octet 565 * of urgent data. We continue, however, 566 * to consider it to indicate the first octet 567 * of data past the urgent section. 568 * Otherwise, snd_up should be one lower. 569 */ 570 sbappendstream(&so->so_snd, m); 571 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 572 tp->t_force = 1; 573 error = tcp_output(tp); 574 tp->t_force = 0; 575 break; 576 577 case PRU_SOCKADDR: 578 #ifdef INET 579 if (inp) 580 in_setsockaddr(inp, nam); 581 #endif 582 #ifdef INET6 583 if (in6p) 584 in6_setsockaddr(in6p, nam); 585 #endif 586 break; 587 588 case PRU_PEERADDR: 589 #ifdef INET 590 if (inp) 591 in_setpeeraddr(inp, nam); 592 #endif 593 #ifdef INET6 594 if (in6p) 595 in6_setpeeraddr(in6p, nam); 596 #endif 597 break; 598 599 default: 600 panic("tcp_usrreq"); 601 } 602 #ifdef TCP_DEBUG 603 if (tp && (so->so_options & SO_DEBUG)) 604 tcp_trace(TA_USER, ostate, tp, NULL, req); 605 #endif 606 607 release: 608 splx(s); 609 return (error); 610 } 611 612 int 613 tcp_ctloutput(op, so, level, optname, mp) 614 int op; 615 struct socket *so; 616 int level, optname; 617 struct mbuf **mp; 618 { 619 int error = 0, s; 620 struct inpcb *inp; 621 #ifdef INET6 622 struct in6pcb *in6p; 623 #endif 624 struct tcpcb *tp; 625 struct mbuf *m; 626 int i; 627 int family; /* family of the socket */ 628 629 family = so->so_proto->pr_domain->dom_family; 630 631 s = splsoftnet(); 632 switch (family) { 633 #ifdef INET 634 case PF_INET: 635 inp = sotoinpcb(so); 636 #ifdef INET6 637 in6p = NULL; 638 #endif 639 break; 640 #endif 641 #ifdef INET6 642 case PF_INET6: 643 inp = NULL; 644 in6p = sotoin6pcb(so); 645 break; 646 #endif 647 default: 648 splx(s); 649 return EAFNOSUPPORT; 650 } 651 #ifndef INET6 652 if (inp == NULL) 653 #else 654 if (inp == NULL && in6p == NULL) 655 #endif 656 { 657 splx(s); 658 if (op == PRCO_SETOPT && *mp) 659 (void) m_free(*mp); 660 return (ECONNRESET); 661 } 662 if (level != IPPROTO_TCP) { 663 switch (family) { 664 #ifdef INET 665 case PF_INET: 666 error = ip_ctloutput(op, so, level, optname, mp); 667 break; 668 #endif 669 #ifdef INET6 670 case PF_INET6: 671 error = ip6_ctloutput(op, so, level, optname, mp); 672 break; 673 #endif 674 } 675 splx(s); 676 return (error); 677 } 678 if (inp) 679 tp = intotcpcb(inp); 680 #ifdef INET6 681 else if (in6p) 682 tp = in6totcpcb(in6p); 683 #endif 684 else 685 tp = NULL; 686 687 switch (op) { 688 689 case PRCO_SETOPT: 690 m = *mp; 691 switch (optname) { 692 693 case TCP_NODELAY: 694 if (m == NULL || m->m_len < sizeof (int)) 695 error = EINVAL; 696 else if (*mtod(m, int *)) 697 tp->t_flags |= TF_NODELAY; 698 else 699 tp->t_flags &= ~TF_NODELAY; 700 break; 701 702 case TCP_MAXSEG: 703 if (m && (i = *mtod(m, int *)) > 0 && 704 i <= tp->t_peermss) 705 tp->t_peermss = i; /* limit on send size */ 706 else 707 error = EINVAL; 708 break; 709 710 default: 711 error = ENOPROTOOPT; 712 break; 713 } 714 if (m) 715 (void) m_free(m); 716 break; 717 718 case PRCO_GETOPT: 719 *mp = m = m_get(M_WAIT, MT_SOOPTS); 720 m->m_len = sizeof(int); 721 MCLAIM(m, so->so_mowner); 722 723 switch (optname) { 724 case TCP_NODELAY: 725 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 726 break; 727 case TCP_MAXSEG: 728 *mtod(m, int *) = tp->t_peermss; 729 break; 730 default: 731 error = ENOPROTOOPT; 732 break; 733 } 734 break; 735 } 736 splx(s); 737 return (error); 738 } 739 740 #ifndef TCP_SENDSPACE 741 #define TCP_SENDSPACE 1024*32 742 #endif 743 int tcp_sendspace = TCP_SENDSPACE; 744 #ifndef TCP_RECVSPACE 745 #define TCP_RECVSPACE 1024*32 746 #endif 747 int tcp_recvspace = TCP_RECVSPACE; 748 749 /* 750 * Attach TCP protocol to socket, allocating 751 * internet protocol control block, tcp control block, 752 * bufer space, and entering LISTEN state if to accept connections. 753 */ 754 int 755 tcp_attach(so) 756 struct socket *so; 757 { 758 struct tcpcb *tp; 759 struct inpcb *inp; 760 #ifdef INET6 761 struct in6pcb *in6p; 762 #endif 763 int error; 764 int family; /* family of the socket */ 765 766 family = so->so_proto->pr_domain->dom_family; 767 768 #ifdef MBUFTRACE 769 so->so_mowner = &tcp_mowner; 770 so->so_rcv.sb_mowner = &tcp_rx_mowner; 771 so->so_snd.sb_mowner = &tcp_tx_mowner; 772 #endif 773 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 774 error = soreserve(so, tcp_sendspace, tcp_recvspace); 775 if (error) 776 return (error); 777 } 778 switch (family) { 779 #ifdef INET 780 case PF_INET: 781 error = in_pcballoc(so, &tcbtable); 782 if (error) 783 return (error); 784 inp = sotoinpcb(so); 785 #ifdef INET6 786 in6p = NULL; 787 #endif 788 break; 789 #endif 790 #ifdef INET6 791 case PF_INET6: 792 error = in6_pcballoc(so, &tcbtable); 793 if (error) 794 return (error); 795 inp = NULL; 796 in6p = sotoin6pcb(so); 797 break; 798 #endif 799 default: 800 return EAFNOSUPPORT; 801 } 802 if (inp) 803 tp = tcp_newtcpcb(family, (void *)inp); 804 #ifdef INET6 805 else if (in6p) 806 tp = tcp_newtcpcb(family, (void *)in6p); 807 #endif 808 else 809 tp = NULL; 810 811 if (tp == 0) { 812 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 813 814 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 815 #ifdef INET 816 if (inp) 817 in_pcbdetach(inp); 818 #endif 819 #ifdef INET6 820 if (in6p) 821 in6_pcbdetach(in6p); 822 #endif 823 so->so_state |= nofd; 824 return (ENOBUFS); 825 } 826 tp->t_state = TCPS_CLOSED; 827 return (0); 828 } 829 830 /* 831 * Initiate (or continue) disconnect. 832 * If embryonic state, just send reset (once). 833 * If in ``let data drain'' option and linger null, just drop. 834 * Otherwise (hard), mark socket disconnecting and drop 835 * current input data; switch states based on user close, and 836 * send segment to peer (with FIN). 837 */ 838 struct tcpcb * 839 tcp_disconnect(tp) 840 struct tcpcb *tp; 841 { 842 struct socket *so; 843 844 if (tp->t_inpcb) 845 so = tp->t_inpcb->inp_socket; 846 #ifdef INET6 847 else if (tp->t_in6pcb) 848 so = tp->t_in6pcb->in6p_socket; 849 #endif 850 else 851 so = NULL; 852 853 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 854 tp = tcp_close(tp); 855 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 856 tp = tcp_drop(tp, 0); 857 else { 858 soisdisconnecting(so); 859 sbflush(&so->so_rcv); 860 tp = tcp_usrclosed(tp); 861 if (tp) 862 (void) tcp_output(tp); 863 } 864 return (tp); 865 } 866 867 /* 868 * User issued close, and wish to trail through shutdown states: 869 * if never received SYN, just forget it. If got a SYN from peer, 870 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 871 * If already got a FIN from peer, then almost done; go to LAST_ACK 872 * state. In all other cases, have already sent FIN to peer (e.g. 873 * after PRU_SHUTDOWN), and just have to play tedious game waiting 874 * for peer to send FIN or not respond to keep-alives, etc. 875 * We can let the user exit from the close as soon as the FIN is acked. 876 */ 877 struct tcpcb * 878 tcp_usrclosed(tp) 879 struct tcpcb *tp; 880 { 881 882 switch (tp->t_state) { 883 884 case TCPS_CLOSED: 885 case TCPS_LISTEN: 886 case TCPS_SYN_SENT: 887 tp->t_state = TCPS_CLOSED; 888 tp = tcp_close(tp); 889 break; 890 891 case TCPS_SYN_RECEIVED: 892 case TCPS_ESTABLISHED: 893 tp->t_state = TCPS_FIN_WAIT_1; 894 break; 895 896 case TCPS_CLOSE_WAIT: 897 tp->t_state = TCPS_LAST_ACK; 898 break; 899 } 900 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 901 struct socket *so; 902 if (tp->t_inpcb) 903 so = tp->t_inpcb->inp_socket; 904 #ifdef INET6 905 else if (tp->t_in6pcb) 906 so = tp->t_in6pcb->in6p_socket; 907 #endif 908 else 909 so = NULL; 910 soisdisconnected(so); 911 /* 912 * If we are in FIN_WAIT_2, we arrived here because the 913 * application did a shutdown of the send side. Like the 914 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 915 * a full close, we start a timer to make sure sockets are 916 * not left in FIN_WAIT_2 forever. 917 */ 918 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 919 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 920 } 921 return (tp); 922 } 923 924 static const struct { 925 unsigned int valid : 1; 926 unsigned int rdonly : 1; 927 int *var; 928 int val; 929 } tcp_ctlvars[] = TCPCTL_VARIABLES; 930 931 /* 932 * Sysctl for tcp variables. 933 */ 934 int 935 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 936 int *name; 937 u_int namelen; 938 void *oldp; 939 size_t *oldlenp; 940 void *newp; 941 size_t newlen; 942 { 943 int error, saved_value = 0; 944 945 if (name[0] == TCPCTL_IDENT) 946 return tcp_sysctl_ident(&name[1], namelen - 1, oldp, oldlenp, 947 newp, newlen); 948 949 /* All remaining sysctl names at this level are terminal. */ 950 if (namelen != 1) 951 return (ENOTDIR); 952 953 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 954 && tcp_ctlvars[name[0]].valid) { 955 if (tcp_ctlvars[name[0]].rdonly) { 956 return (sysctl_rdint(oldp, oldlenp, newp, 957 tcp_ctlvars[name[0]].val)); 958 } else { 959 switch (name[0]) { 960 case TCPCTL_MSSDFLT: 961 saved_value = tcp_mssdflt; 962 break; 963 } 964 error = sysctl_int(oldp, oldlenp, newp, newlen, 965 tcp_ctlvars[name[0]].var); 966 if (error) 967 return (error); 968 switch (name[0]) { 969 case TCPCTL_MSSDFLT: 970 if (tcp_mssdflt < 32) { 971 tcp_mssdflt = saved_value; 972 return (EINVAL); 973 } 974 break; 975 } 976 return (0); 977 } 978 } 979 980 return (ENOPROTOOPT); 981 } 982 983 984 static int 985 tcp_sysctl_ident(int *name, u_int namelen, void *oldp, size_t *oldlenp, 986 void *newp, size_t newlen) 987 { 988 struct inpcb *inb; 989 struct in_addr laddr, raddr; 990 u_int lport, rport; 991 uid_t uid; 992 int error; 993 994 if (*oldlenp != sizeof(uid_t)) 995 return ENOMEM; 996 if (!oldp || *oldlenp != sizeof(uid_t)) 997 return ENOMEM; 998 if (namelen != 4) 999 return EINVAL; 1000 1001 raddr.s_addr = (uint32_t)name[0]; 1002 rport = (u_int)name[1]; 1003 laddr.s_addr = (uint32_t)name[2]; 1004 lport = (u_int)name[3]; 1005 1006 inb = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport); 1007 if (inb) { 1008 struct socket *sockp = inb->inp_socket; 1009 if (sockp) 1010 uid = sockp->so_uid; 1011 else 1012 return ESRCH; 1013 } else 1014 return ESRCH; 1015 1016 if ((error = copyout(&uid, oldp, sizeof(uid))) != 0) 1017 return error; 1018 1019 return 0; 1020 } 1021