1 /* $NetBSD: tcp_usrreq.c,v 1.81 2003/06/29 22:32:00 fvdl Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.81 2003/06/29 22:32:00 fvdl Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 #include "opt_mbuftrace.h" 111 112 #include <sys/param.h> 113 #include <sys/systm.h> 114 #include <sys/kernel.h> 115 #include <sys/malloc.h> 116 #include <sys/mbuf.h> 117 #include <sys/socket.h> 118 #include <sys/socketvar.h> 119 #include <sys/protosw.h> 120 #include <sys/errno.h> 121 #include <sys/stat.h> 122 #include <sys/proc.h> 123 #include <sys/domain.h> 124 #include <sys/sysctl.h> 125 126 #include <net/if.h> 127 #include <net/route.h> 128 129 #include <netinet/in.h> 130 #include <netinet/in_systm.h> 131 #include <netinet/in_var.h> 132 #include <netinet/ip.h> 133 #include <netinet/in_pcb.h> 134 #include <netinet/ip_var.h> 135 136 #ifdef INET6 137 #ifndef INET 138 #include <netinet/in.h> 139 #endif 140 #include <netinet/ip6.h> 141 #include <netinet6/in6_pcb.h> 142 #include <netinet6/ip6_var.h> 143 #endif 144 145 #include <netinet/tcp.h> 146 #include <netinet/tcp_fsm.h> 147 #include <netinet/tcp_seq.h> 148 #include <netinet/tcp_timer.h> 149 #include <netinet/tcp_var.h> 150 #include <netinet/tcpip.h> 151 #include <netinet/tcp_debug.h> 152 153 #include "opt_tcp_space.h" 154 155 #ifdef IPSEC 156 #include <netinet6/ipsec.h> 157 #endif /*IPSEC*/ 158 159 /* 160 * TCP protocol interface to socket abstraction. 161 */ 162 extern char *tcpstates[]; 163 164 static int tcp_sysctl_ident(int *, u_int, void *, size_t *, void *, size_t); 165 166 /* 167 * Process a TCP user request for TCP tb. If this is a send request 168 * then m is the mbuf chain of send data. If this is a timer expiration 169 * (called from the software clock routine), then timertype tells which timer. 170 */ 171 /*ARGSUSED*/ 172 int 173 tcp_usrreq(so, req, m, nam, control, p) 174 struct socket *so; 175 int req; 176 struct mbuf *m, *nam, *control; 177 struct proc *p; 178 { 179 struct inpcb *inp; 180 #ifdef INET6 181 struct in6pcb *in6p; 182 #endif 183 struct tcpcb *tp = NULL; 184 int s; 185 int error = 0; 186 #ifdef TCP_DEBUG 187 int ostate = 0; 188 #endif 189 int family; /* family of the socket */ 190 191 family = so->so_proto->pr_domain->dom_family; 192 193 if (req == PRU_CONTROL) { 194 switch (family) { 195 #ifdef INET 196 case PF_INET: 197 return (in_control(so, (long)m, (caddr_t)nam, 198 (struct ifnet *)control, p)); 199 #endif 200 #ifdef INET6 201 case PF_INET6: 202 return (in6_control(so, (long)m, (caddr_t)nam, 203 (struct ifnet *)control, p)); 204 #endif 205 default: 206 return EAFNOSUPPORT; 207 } 208 } 209 210 if (req == PRU_PURGEIF) { 211 switch (family) { 212 #ifdef INET 213 case PF_INET: 214 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 215 in_purgeif((struct ifnet *)control); 216 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 217 break; 218 #endif 219 #ifdef INET6 220 case PF_INET6: 221 in6_pcbpurgeif0(&tcb6, (struct ifnet *)control); 222 in6_purgeif((struct ifnet *)control); 223 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 224 break; 225 #endif 226 default: 227 return (EAFNOSUPPORT); 228 } 229 return (0); 230 } 231 232 s = splsoftnet(); 233 switch (family) { 234 #ifdef INET 235 case PF_INET: 236 inp = sotoinpcb(so); 237 #ifdef INET6 238 in6p = NULL; 239 #endif 240 break; 241 #endif 242 #ifdef INET6 243 case PF_INET6: 244 inp = NULL; 245 in6p = sotoin6pcb(so); 246 break; 247 #endif 248 default: 249 splx(s); 250 return EAFNOSUPPORT; 251 } 252 253 #ifdef DIAGNOSTIC 254 #ifdef INET6 255 if (inp && in6p) 256 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 257 #endif 258 if (req != PRU_SEND && req != PRU_SENDOOB && control) 259 panic("tcp_usrreq: unexpected control mbuf"); 260 #endif 261 /* 262 * When a TCP is attached to a socket, then there will be 263 * a (struct inpcb) pointed at by the socket, and this 264 * structure will point at a subsidary (struct tcpcb). 265 */ 266 #ifndef INET6 267 if (inp == 0 && req != PRU_ATTACH) 268 #else 269 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 270 #endif 271 { 272 error = EINVAL; 273 goto release; 274 } 275 #ifdef INET 276 if (inp) { 277 tp = intotcpcb(inp); 278 /* WHAT IF TP IS 0? */ 279 #ifdef KPROF 280 tcp_acounts[tp->t_state][req]++; 281 #endif 282 #ifdef TCP_DEBUG 283 ostate = tp->t_state; 284 #endif 285 } 286 #endif 287 #ifdef INET6 288 if (in6p) { 289 tp = in6totcpcb(in6p); 290 /* WHAT IF TP IS 0? */ 291 #ifdef KPROF 292 tcp_acounts[tp->t_state][req]++; 293 #endif 294 #ifdef TCP_DEBUG 295 ostate = tp->t_state; 296 #endif 297 } 298 #endif 299 300 switch (req) { 301 302 /* 303 * TCP attaches to socket via PRU_ATTACH, reserving space, 304 * and an internet control block. 305 */ 306 case PRU_ATTACH: 307 #ifndef INET6 308 if (inp != 0) 309 #else 310 if (inp != 0 || in6p != 0) 311 #endif 312 { 313 error = EISCONN; 314 break; 315 } 316 error = tcp_attach(so); 317 if (error) 318 break; 319 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 320 so->so_linger = TCP_LINGERTIME; 321 tp = sototcpcb(so); 322 break; 323 324 /* 325 * PRU_DETACH detaches the TCP protocol from the socket. 326 */ 327 case PRU_DETACH: 328 tp = tcp_disconnect(tp); 329 break; 330 331 /* 332 * Give the socket an address. 333 */ 334 case PRU_BIND: 335 switch (family) { 336 #ifdef INET 337 case PF_INET: 338 error = in_pcbbind(inp, nam, p); 339 break; 340 #endif 341 #ifdef INET6 342 case PF_INET6: 343 error = in6_pcbbind(in6p, nam, p); 344 if (!error) { 345 /* mapped addr case */ 346 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 347 tp->t_family = AF_INET; 348 else 349 tp->t_family = AF_INET6; 350 } 351 break; 352 #endif 353 } 354 break; 355 356 /* 357 * Prepare to accept connections. 358 */ 359 case PRU_LISTEN: 360 #ifdef INET 361 if (inp && inp->inp_lport == 0) { 362 error = in_pcbbind(inp, (struct mbuf *)0, 363 (struct proc *)0); 364 if (error) 365 break; 366 } 367 #endif 368 #ifdef INET6 369 if (in6p && in6p->in6p_lport == 0) { 370 error = in6_pcbbind(in6p, (struct mbuf *)0, 371 (struct proc *)0); 372 if (error) 373 break; 374 } 375 #endif 376 tp->t_state = TCPS_LISTEN; 377 break; 378 379 /* 380 * Initiate connection to peer. 381 * Create a template for use in transmissions on this connection. 382 * Enter SYN_SENT state, and mark socket as connecting. 383 * Start keep-alive timer, and seed output sequence space. 384 * Send initial segment on connection. 385 */ 386 case PRU_CONNECT: 387 #ifdef INET 388 if (inp) { 389 if (inp->inp_lport == 0) { 390 error = in_pcbbind(inp, (struct mbuf *)0, 391 (struct proc *)0); 392 if (error) 393 break; 394 } 395 error = in_pcbconnect(inp, nam); 396 } 397 #endif 398 #ifdef INET6 399 if (in6p) { 400 if (in6p->in6p_lport == 0) { 401 error = in6_pcbbind(in6p, (struct mbuf *)0, 402 (struct proc *)0); 403 if (error) 404 break; 405 } 406 error = in6_pcbconnect(in6p, nam); 407 if (!error) { 408 /* mapped addr case */ 409 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 410 tp->t_family = AF_INET; 411 else 412 tp->t_family = AF_INET6; 413 } 414 } 415 #endif 416 if (error) 417 break; 418 tp->t_template = tcp_template(tp); 419 if (tp->t_template == 0) { 420 #ifdef INET 421 if (inp) 422 in_pcbdisconnect(inp); 423 #endif 424 #ifdef INET6 425 if (in6p) 426 in6_pcbdisconnect(in6p); 427 #endif 428 error = ENOBUFS; 429 break; 430 } 431 /* Compute window scaling to request. */ 432 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 433 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 434 tp->request_r_scale++; 435 soisconnecting(so); 436 tcpstat.tcps_connattempt++; 437 tp->t_state = TCPS_SYN_SENT; 438 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 439 tp->iss = tcp_new_iss(tp, 0); 440 tcp_sendseqinit(tp); 441 error = tcp_output(tp); 442 break; 443 444 /* 445 * Create a TCP connection between two sockets. 446 */ 447 case PRU_CONNECT2: 448 error = EOPNOTSUPP; 449 break; 450 451 /* 452 * Initiate disconnect from peer. 453 * If connection never passed embryonic stage, just drop; 454 * else if don't need to let data drain, then can just drop anyways, 455 * else have to begin TCP shutdown process: mark socket disconnecting, 456 * drain unread data, state switch to reflect user close, and 457 * send segment (e.g. FIN) to peer. Socket will be really disconnected 458 * when peer sends FIN and acks ours. 459 * 460 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 461 */ 462 case PRU_DISCONNECT: 463 tp = tcp_disconnect(tp); 464 break; 465 466 /* 467 * Accept a connection. Essentially all the work is 468 * done at higher levels; just return the address 469 * of the peer, storing through addr. 470 */ 471 case PRU_ACCEPT: 472 #ifdef INET 473 if (inp) 474 in_setpeeraddr(inp, nam); 475 #endif 476 #ifdef INET6 477 if (in6p) 478 in6_setpeeraddr(in6p, nam); 479 #endif 480 break; 481 482 /* 483 * Mark the connection as being incapable of further output. 484 */ 485 case PRU_SHUTDOWN: 486 socantsendmore(so); 487 tp = tcp_usrclosed(tp); 488 if (tp) 489 error = tcp_output(tp); 490 break; 491 492 /* 493 * After a receive, possibly send window update to peer. 494 */ 495 case PRU_RCVD: 496 /* 497 * soreceive() calls this function when a user receives 498 * ancillary data on a listening socket. We don't call 499 * tcp_output in such a case, since there is no header 500 * template for a listening socket and hence the kernel 501 * will panic. 502 */ 503 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 504 (void) tcp_output(tp); 505 break; 506 507 /* 508 * Do a send by putting data in output queue and updating urgent 509 * marker if URG set. Possibly send more data. 510 */ 511 case PRU_SEND: 512 if (control && control->m_len) { 513 m_freem(control); 514 m_freem(m); 515 error = EINVAL; 516 break; 517 } 518 sbappendstream(&so->so_snd, m); 519 error = tcp_output(tp); 520 break; 521 522 /* 523 * Abort the TCP. 524 */ 525 case PRU_ABORT: 526 tp = tcp_drop(tp, ECONNABORTED); 527 break; 528 529 case PRU_SENSE: 530 /* 531 * stat: don't bother with a blocksize. 532 */ 533 splx(s); 534 return (0); 535 536 case PRU_RCVOOB: 537 if (control && control->m_len) { 538 m_freem(control); 539 m_freem(m); 540 error = EINVAL; 541 break; 542 } 543 if ((so->so_oobmark == 0 && 544 (so->so_state & SS_RCVATMARK) == 0) || 545 so->so_options & SO_OOBINLINE || 546 tp->t_oobflags & TCPOOB_HADDATA) { 547 error = EINVAL; 548 break; 549 } 550 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 551 error = EWOULDBLOCK; 552 break; 553 } 554 m->m_len = 1; 555 *mtod(m, caddr_t) = tp->t_iobc; 556 if (((long)nam & MSG_PEEK) == 0) 557 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 558 break; 559 560 case PRU_SENDOOB: 561 if (sbspace(&so->so_snd) < -512) { 562 m_freem(m); 563 error = ENOBUFS; 564 break; 565 } 566 /* 567 * According to RFC961 (Assigned Protocols), 568 * the urgent pointer points to the last octet 569 * of urgent data. We continue, however, 570 * to consider it to indicate the first octet 571 * of data past the urgent section. 572 * Otherwise, snd_up should be one lower. 573 */ 574 sbappendstream(&so->so_snd, m); 575 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 576 tp->t_force = 1; 577 error = tcp_output(tp); 578 tp->t_force = 0; 579 break; 580 581 case PRU_SOCKADDR: 582 #ifdef INET 583 if (inp) 584 in_setsockaddr(inp, nam); 585 #endif 586 #ifdef INET6 587 if (in6p) 588 in6_setsockaddr(in6p, nam); 589 #endif 590 break; 591 592 case PRU_PEERADDR: 593 #ifdef INET 594 if (inp) 595 in_setpeeraddr(inp, nam); 596 #endif 597 #ifdef INET6 598 if (in6p) 599 in6_setpeeraddr(in6p, nam); 600 #endif 601 break; 602 603 default: 604 panic("tcp_usrreq"); 605 } 606 #ifdef TCP_DEBUG 607 if (tp && (so->so_options & SO_DEBUG)) 608 tcp_trace(TA_USER, ostate, tp, NULL, req); 609 #endif 610 611 release: 612 splx(s); 613 return (error); 614 } 615 616 int 617 tcp_ctloutput(op, so, level, optname, mp) 618 int op; 619 struct socket *so; 620 int level, optname; 621 struct mbuf **mp; 622 { 623 int error = 0, s; 624 struct inpcb *inp; 625 #ifdef INET6 626 struct in6pcb *in6p; 627 #endif 628 struct tcpcb *tp; 629 struct mbuf *m; 630 int i; 631 int family; /* family of the socket */ 632 633 family = so->so_proto->pr_domain->dom_family; 634 635 s = splsoftnet(); 636 switch (family) { 637 #ifdef INET 638 case PF_INET: 639 inp = sotoinpcb(so); 640 #ifdef INET6 641 in6p = NULL; 642 #endif 643 break; 644 #endif 645 #ifdef INET6 646 case PF_INET6: 647 inp = NULL; 648 in6p = sotoin6pcb(so); 649 break; 650 #endif 651 default: 652 splx(s); 653 return EAFNOSUPPORT; 654 } 655 #ifndef INET6 656 if (inp == NULL) 657 #else 658 if (inp == NULL && in6p == NULL) 659 #endif 660 { 661 splx(s); 662 if (op == PRCO_SETOPT && *mp) 663 (void) m_free(*mp); 664 return (ECONNRESET); 665 } 666 if (level != IPPROTO_TCP) { 667 switch (family) { 668 #ifdef INET 669 case PF_INET: 670 error = ip_ctloutput(op, so, level, optname, mp); 671 break; 672 #endif 673 #ifdef INET6 674 case PF_INET6: 675 error = ip6_ctloutput(op, so, level, optname, mp); 676 break; 677 #endif 678 } 679 splx(s); 680 return (error); 681 } 682 if (inp) 683 tp = intotcpcb(inp); 684 #ifdef INET6 685 else if (in6p) 686 tp = in6totcpcb(in6p); 687 #endif 688 else 689 tp = NULL; 690 691 switch (op) { 692 693 case PRCO_SETOPT: 694 m = *mp; 695 switch (optname) { 696 697 case TCP_NODELAY: 698 if (m == NULL || m->m_len < sizeof (int)) 699 error = EINVAL; 700 else if (*mtod(m, int *)) 701 tp->t_flags |= TF_NODELAY; 702 else 703 tp->t_flags &= ~TF_NODELAY; 704 break; 705 706 case TCP_MAXSEG: 707 if (m && (i = *mtod(m, int *)) > 0 && 708 i <= tp->t_peermss) 709 tp->t_peermss = i; /* limit on send size */ 710 else 711 error = EINVAL; 712 break; 713 714 default: 715 error = ENOPROTOOPT; 716 break; 717 } 718 if (m) 719 (void) m_free(m); 720 break; 721 722 case PRCO_GETOPT: 723 *mp = m = m_get(M_WAIT, MT_SOOPTS); 724 m->m_len = sizeof(int); 725 MCLAIM(m, so->so_mowner); 726 727 switch (optname) { 728 case TCP_NODELAY: 729 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 730 break; 731 case TCP_MAXSEG: 732 *mtod(m, int *) = tp->t_peermss; 733 break; 734 default: 735 error = ENOPROTOOPT; 736 break; 737 } 738 break; 739 } 740 splx(s); 741 return (error); 742 } 743 744 #ifndef TCP_SENDSPACE 745 #define TCP_SENDSPACE 1024*16 746 #endif 747 int tcp_sendspace = TCP_SENDSPACE; 748 #ifndef TCP_RECVSPACE 749 #define TCP_RECVSPACE 1024*16 750 #endif 751 int tcp_recvspace = TCP_RECVSPACE; 752 753 /* 754 * Attach TCP protocol to socket, allocating 755 * internet protocol control block, tcp control block, 756 * bufer space, and entering LISTEN state if to accept connections. 757 */ 758 int 759 tcp_attach(so) 760 struct socket *so; 761 { 762 struct tcpcb *tp; 763 struct inpcb *inp; 764 #ifdef INET6 765 struct in6pcb *in6p; 766 #endif 767 int error; 768 int family; /* family of the socket */ 769 770 family = so->so_proto->pr_domain->dom_family; 771 772 #ifdef MBUFTRACE 773 so->so_mowner = &tcp_mowner; 774 so->so_rcv.sb_mowner = &tcp_rx_mowner; 775 so->so_snd.sb_mowner = &tcp_tx_mowner; 776 #endif 777 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 778 error = soreserve(so, tcp_sendspace, tcp_recvspace); 779 if (error) 780 return (error); 781 } 782 switch (family) { 783 #ifdef INET 784 case PF_INET: 785 error = in_pcballoc(so, &tcbtable); 786 if (error) 787 return (error); 788 inp = sotoinpcb(so); 789 #ifdef INET6 790 in6p = NULL; 791 #endif 792 break; 793 #endif 794 #ifdef INET6 795 case PF_INET6: 796 error = in6_pcballoc(so, &tcb6); 797 if (error) 798 return (error); 799 inp = NULL; 800 in6p = sotoin6pcb(so); 801 break; 802 #endif 803 default: 804 return EAFNOSUPPORT; 805 } 806 if (inp) 807 tp = tcp_newtcpcb(family, (void *)inp); 808 #ifdef INET6 809 else if (in6p) 810 tp = tcp_newtcpcb(family, (void *)in6p); 811 #endif 812 else 813 tp = NULL; 814 815 if (tp == 0) { 816 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 817 818 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 819 #ifdef INET 820 if (inp) 821 in_pcbdetach(inp); 822 #endif 823 #ifdef INET6 824 if (in6p) 825 in6_pcbdetach(in6p); 826 #endif 827 so->so_state |= nofd; 828 return (ENOBUFS); 829 } 830 tp->t_state = TCPS_CLOSED; 831 return (0); 832 } 833 834 /* 835 * Initiate (or continue) disconnect. 836 * If embryonic state, just send reset (once). 837 * If in ``let data drain'' option and linger null, just drop. 838 * Otherwise (hard), mark socket disconnecting and drop 839 * current input data; switch states based on user close, and 840 * send segment to peer (with FIN). 841 */ 842 struct tcpcb * 843 tcp_disconnect(tp) 844 struct tcpcb *tp; 845 { 846 struct socket *so; 847 848 if (tp->t_inpcb) 849 so = tp->t_inpcb->inp_socket; 850 #ifdef INET6 851 else if (tp->t_in6pcb) 852 so = tp->t_in6pcb->in6p_socket; 853 #endif 854 else 855 so = NULL; 856 857 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 858 tp = tcp_close(tp); 859 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 860 tp = tcp_drop(tp, 0); 861 else { 862 soisdisconnecting(so); 863 sbflush(&so->so_rcv); 864 tp = tcp_usrclosed(tp); 865 if (tp) 866 (void) tcp_output(tp); 867 } 868 return (tp); 869 } 870 871 /* 872 * User issued close, and wish to trail through shutdown states: 873 * if never received SYN, just forget it. If got a SYN from peer, 874 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 875 * If already got a FIN from peer, then almost done; go to LAST_ACK 876 * state. In all other cases, have already sent FIN to peer (e.g. 877 * after PRU_SHUTDOWN), and just have to play tedious game waiting 878 * for peer to send FIN or not respond to keep-alives, etc. 879 * We can let the user exit from the close as soon as the FIN is acked. 880 */ 881 struct tcpcb * 882 tcp_usrclosed(tp) 883 struct tcpcb *tp; 884 { 885 886 switch (tp->t_state) { 887 888 case TCPS_CLOSED: 889 case TCPS_LISTEN: 890 case TCPS_SYN_SENT: 891 tp->t_state = TCPS_CLOSED; 892 tp = tcp_close(tp); 893 break; 894 895 case TCPS_SYN_RECEIVED: 896 case TCPS_ESTABLISHED: 897 tp->t_state = TCPS_FIN_WAIT_1; 898 break; 899 900 case TCPS_CLOSE_WAIT: 901 tp->t_state = TCPS_LAST_ACK; 902 break; 903 } 904 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 905 struct socket *so; 906 if (tp->t_inpcb) 907 so = tp->t_inpcb->inp_socket; 908 #ifdef INET6 909 else if (tp->t_in6pcb) 910 so = tp->t_in6pcb->in6p_socket; 911 #endif 912 else 913 so = NULL; 914 soisdisconnected(so); 915 /* 916 * If we are in FIN_WAIT_2, we arrived here because the 917 * application did a shutdown of the send side. Like the 918 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 919 * a full close, we start a timer to make sure sockets are 920 * not left in FIN_WAIT_2 forever. 921 */ 922 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 923 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 924 } 925 return (tp); 926 } 927 928 static const struct { 929 unsigned int valid : 1; 930 unsigned int rdonly : 1; 931 int *var; 932 int val; 933 } tcp_ctlvars[] = TCPCTL_VARIABLES; 934 935 /* 936 * Sysctl for tcp variables. 937 */ 938 int 939 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 940 int *name; 941 u_int namelen; 942 void *oldp; 943 size_t *oldlenp; 944 void *newp; 945 size_t newlen; 946 { 947 int error, saved_value = 0; 948 949 if (name[0] == TCPCTL_IDENT) 950 return tcp_sysctl_ident(&name[1], namelen - 1, oldp, oldlenp, 951 newp, newlen); 952 953 /* All remaining sysctl names at this level are terminal. */ 954 if (namelen != 1) 955 return (ENOTDIR); 956 957 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 958 && tcp_ctlvars[name[0]].valid) { 959 if (tcp_ctlvars[name[0]].rdonly) { 960 return (sysctl_rdint(oldp, oldlenp, newp, 961 tcp_ctlvars[name[0]].val)); 962 } else { 963 switch (name[0]) { 964 case TCPCTL_MSSDFLT: 965 saved_value = tcp_mssdflt; 966 break; 967 } 968 error = sysctl_int(oldp, oldlenp, newp, newlen, 969 tcp_ctlvars[name[0]].var); 970 if (error) 971 return (error); 972 switch (name[0]) { 973 case TCPCTL_MSSDFLT: 974 if (tcp_mssdflt < 32) { 975 tcp_mssdflt = saved_value; 976 return (EINVAL); 977 } 978 break; 979 } 980 return (0); 981 } 982 } 983 984 return (ENOPROTOOPT); 985 } 986 987 988 static int 989 tcp_sysctl_ident(int *name, u_int namelen, void *oldp, size_t *oldlenp, 990 void *newp, size_t newlen) 991 { 992 struct inpcb *inb; 993 struct in_addr laddr, raddr; 994 u_int lport, rport; 995 uid_t uid; 996 int error; 997 998 if (*oldlenp != sizeof(uid_t)) 999 return ENOMEM; 1000 if (!oldp || *oldlenp != sizeof(uid_t)) 1001 return ENOMEM; 1002 if (namelen != 4) 1003 return EINVAL; 1004 1005 raddr.s_addr = (uint32_t)name[0]; 1006 rport = (u_int)name[1]; 1007 laddr.s_addr = (uint32_t)name[2]; 1008 lport = (u_int)name[3]; 1009 1010 inb = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport); 1011 if (inb) { 1012 struct socket *sockp = inb->inp_socket; 1013 if (sockp) 1014 uid = sockp->so_uid; 1015 else 1016 return ESRCH; 1017 } else 1018 return ESRCH; 1019 1020 if ((error = copyout(&uid, oldp, sizeof(uid))) != 0) 1021 return error; 1022 1023 return 0; 1024 } 1025