1 /* $NetBSD: tcp_usrreq.c,v 1.76 2003/04/19 20:58:36 christos Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.76 2003/04/19 20:58:36 christos Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/kernel.h> 114 #include <sys/malloc.h> 115 #include <sys/mbuf.h> 116 #include <sys/socket.h> 117 #include <sys/socketvar.h> 118 #include <sys/protosw.h> 119 #include <sys/errno.h> 120 #include <sys/stat.h> 121 #include <sys/proc.h> 122 #include <sys/domain.h> 123 #include <sys/sysctl.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #endif 143 144 #include <netinet/tcp.h> 145 #include <netinet/tcp_fsm.h> 146 #include <netinet/tcp_seq.h> 147 #include <netinet/tcp_timer.h> 148 #include <netinet/tcp_var.h> 149 #include <netinet/tcpip.h> 150 #include <netinet/tcp_debug.h> 151 152 #include "opt_tcp_space.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 static int tcp_sysctl_ident(void *, size_t *, void *, size_t); 164 165 /* 166 * Process a TCP user request for TCP tb. If this is a send request 167 * then m is the mbuf chain of send data. If this is a timer expiration 168 * (called from the software clock routine), then timertype tells which timer. 169 */ 170 /*ARGSUSED*/ 171 int 172 tcp_usrreq(so, req, m, nam, control, p) 173 struct socket *so; 174 int req; 175 struct mbuf *m, *nam, *control; 176 struct proc *p; 177 { 178 struct inpcb *inp; 179 #ifdef INET6 180 struct in6pcb *in6p; 181 #endif 182 struct tcpcb *tp = NULL; 183 int s; 184 int error = 0; 185 #ifdef TCP_DEBUG 186 int ostate = 0; 187 #endif 188 int family; /* family of the socket */ 189 190 family = so->so_proto->pr_domain->dom_family; 191 192 if (req == PRU_CONTROL) { 193 switch (family) { 194 #ifdef INET 195 case PF_INET: 196 return (in_control(so, (long)m, (caddr_t)nam, 197 (struct ifnet *)control, p)); 198 #endif 199 #ifdef INET6 200 case PF_INET6: 201 return (in6_control(so, (long)m, (caddr_t)nam, 202 (struct ifnet *)control, p)); 203 #endif 204 default: 205 return EAFNOSUPPORT; 206 } 207 } 208 209 if (req == PRU_PURGEIF) { 210 switch (family) { 211 #ifdef INET 212 case PF_INET: 213 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 214 in_purgeif((struct ifnet *)control); 215 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 216 break; 217 #endif 218 #ifdef INET6 219 case PF_INET6: 220 in6_pcbpurgeif0(&tcb6, (struct ifnet *)control); 221 in6_purgeif((struct ifnet *)control); 222 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 223 break; 224 #endif 225 default: 226 return (EAFNOSUPPORT); 227 } 228 return (0); 229 } 230 231 s = splsoftnet(); 232 switch (family) { 233 #ifdef INET 234 case PF_INET: 235 inp = sotoinpcb(so); 236 #ifdef INET6 237 in6p = NULL; 238 #endif 239 break; 240 #endif 241 #ifdef INET6 242 case PF_INET6: 243 inp = NULL; 244 in6p = sotoin6pcb(so); 245 break; 246 #endif 247 default: 248 splx(s); 249 return EAFNOSUPPORT; 250 } 251 252 #ifdef DIAGNOSTIC 253 #ifdef INET6 254 if (inp && in6p) 255 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 256 #endif 257 if (req != PRU_SEND && req != PRU_SENDOOB && control) 258 panic("tcp_usrreq: unexpected control mbuf"); 259 #endif 260 /* 261 * When a TCP is attached to a socket, then there will be 262 * a (struct inpcb) pointed at by the socket, and this 263 * structure will point at a subsidary (struct tcpcb). 264 */ 265 #ifndef INET6 266 if (inp == 0 && req != PRU_ATTACH) 267 #else 268 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 269 #endif 270 { 271 error = EINVAL; 272 goto release; 273 } 274 #ifdef INET 275 if (inp) { 276 tp = intotcpcb(inp); 277 /* WHAT IF TP IS 0? */ 278 #ifdef KPROF 279 tcp_acounts[tp->t_state][req]++; 280 #endif 281 #ifdef TCP_DEBUG 282 ostate = tp->t_state; 283 #endif 284 } 285 #endif 286 #ifdef INET6 287 if (in6p) { 288 tp = in6totcpcb(in6p); 289 /* WHAT IF TP IS 0? */ 290 #ifdef KPROF 291 tcp_acounts[tp->t_state][req]++; 292 #endif 293 #ifdef TCP_DEBUG 294 ostate = tp->t_state; 295 #endif 296 } 297 #endif 298 299 switch (req) { 300 301 /* 302 * TCP attaches to socket via PRU_ATTACH, reserving space, 303 * and an internet control block. 304 */ 305 case PRU_ATTACH: 306 #ifndef INET6 307 if (inp != 0) 308 #else 309 if (inp != 0 || in6p != 0) 310 #endif 311 { 312 error = EISCONN; 313 break; 314 } 315 error = tcp_attach(so); 316 if (error) 317 break; 318 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 319 so->so_linger = TCP_LINGERTIME; 320 tp = sototcpcb(so); 321 break; 322 323 /* 324 * PRU_DETACH detaches the TCP protocol from the socket. 325 */ 326 case PRU_DETACH: 327 tp = tcp_disconnect(tp); 328 break; 329 330 /* 331 * Give the socket an address. 332 */ 333 case PRU_BIND: 334 switch (family) { 335 #ifdef INET 336 case PF_INET: 337 error = in_pcbbind(inp, nam, p); 338 break; 339 #endif 340 #ifdef INET6 341 case PF_INET6: 342 error = in6_pcbbind(in6p, nam, p); 343 if (!error) { 344 /* mapped addr case */ 345 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 346 tp->t_family = AF_INET; 347 else 348 tp->t_family = AF_INET6; 349 } 350 break; 351 #endif 352 } 353 break; 354 355 /* 356 * Prepare to accept connections. 357 */ 358 case PRU_LISTEN: 359 #ifdef INET 360 if (inp && inp->inp_lport == 0) { 361 error = in_pcbbind(inp, (struct mbuf *)0, 362 (struct proc *)0); 363 if (error) 364 break; 365 } 366 #endif 367 #ifdef INET6 368 if (in6p && in6p->in6p_lport == 0) { 369 error = in6_pcbbind(in6p, (struct mbuf *)0, 370 (struct proc *)0); 371 if (error) 372 break; 373 } 374 #endif 375 tp->t_state = TCPS_LISTEN; 376 break; 377 378 /* 379 * Initiate connection to peer. 380 * Create a template for use in transmissions on this connection. 381 * Enter SYN_SENT state, and mark socket as connecting. 382 * Start keep-alive timer, and seed output sequence space. 383 * Send initial segment on connection. 384 */ 385 case PRU_CONNECT: 386 #ifdef INET 387 if (inp) { 388 if (inp->inp_lport == 0) { 389 error = in_pcbbind(inp, (struct mbuf *)0, 390 (struct proc *)0); 391 if (error) 392 break; 393 } 394 error = in_pcbconnect(inp, nam); 395 } 396 #endif 397 #ifdef INET6 398 if (in6p) { 399 if (in6p->in6p_lport == 0) { 400 error = in6_pcbbind(in6p, (struct mbuf *)0, 401 (struct proc *)0); 402 if (error) 403 break; 404 } 405 error = in6_pcbconnect(in6p, nam); 406 if (!error) { 407 /* mapped addr case */ 408 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 409 tp->t_family = AF_INET; 410 else 411 tp->t_family = AF_INET6; 412 } 413 } 414 #endif 415 if (error) 416 break; 417 tp->t_template = tcp_template(tp); 418 if (tp->t_template == 0) { 419 #ifdef INET 420 if (inp) 421 in_pcbdisconnect(inp); 422 #endif 423 #ifdef INET6 424 if (in6p) 425 in6_pcbdisconnect(in6p); 426 #endif 427 error = ENOBUFS; 428 break; 429 } 430 /* Compute window scaling to request. */ 431 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 432 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 433 tp->request_r_scale++; 434 soisconnecting(so); 435 tcpstat.tcps_connattempt++; 436 tp->t_state = TCPS_SYN_SENT; 437 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 438 tp->iss = tcp_new_iss(tp, 0); 439 tcp_sendseqinit(tp); 440 error = tcp_output(tp); 441 break; 442 443 /* 444 * Create a TCP connection between two sockets. 445 */ 446 case PRU_CONNECT2: 447 error = EOPNOTSUPP; 448 break; 449 450 /* 451 * Initiate disconnect from peer. 452 * If connection never passed embryonic stage, just drop; 453 * else if don't need to let data drain, then can just drop anyways, 454 * else have to begin TCP shutdown process: mark socket disconnecting, 455 * drain unread data, state switch to reflect user close, and 456 * send segment (e.g. FIN) to peer. Socket will be really disconnected 457 * when peer sends FIN and acks ours. 458 * 459 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 460 */ 461 case PRU_DISCONNECT: 462 tp = tcp_disconnect(tp); 463 break; 464 465 /* 466 * Accept a connection. Essentially all the work is 467 * done at higher levels; just return the address 468 * of the peer, storing through addr. 469 */ 470 case PRU_ACCEPT: 471 #ifdef INET 472 if (inp) 473 in_setpeeraddr(inp, nam); 474 #endif 475 #ifdef INET6 476 if (in6p) 477 in6_setpeeraddr(in6p, nam); 478 #endif 479 break; 480 481 /* 482 * Mark the connection as being incapable of further output. 483 */ 484 case PRU_SHUTDOWN: 485 socantsendmore(so); 486 tp = tcp_usrclosed(tp); 487 if (tp) 488 error = tcp_output(tp); 489 break; 490 491 /* 492 * After a receive, possibly send window update to peer. 493 */ 494 case PRU_RCVD: 495 /* 496 * soreceive() calls this function when a user receives 497 * ancillary data on a listening socket. We don't call 498 * tcp_output in such a case, since there is no header 499 * template for a listening socket and hence the kernel 500 * will panic. 501 */ 502 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 503 (void) tcp_output(tp); 504 break; 505 506 /* 507 * Do a send by putting data in output queue and updating urgent 508 * marker if URG set. Possibly send more data. 509 */ 510 case PRU_SEND: 511 if (control && control->m_len) { 512 m_freem(control); 513 m_freem(m); 514 error = EINVAL; 515 break; 516 } 517 sbappendstream(&so->so_snd, m); 518 error = tcp_output(tp); 519 break; 520 521 /* 522 * Abort the TCP. 523 */ 524 case PRU_ABORT: 525 tp = tcp_drop(tp, ECONNABORTED); 526 break; 527 528 case PRU_SENSE: 529 /* 530 * stat: don't bother with a blocksize. 531 */ 532 splx(s); 533 return (0); 534 535 case PRU_RCVOOB: 536 if (control && control->m_len) { 537 m_freem(control); 538 m_freem(m); 539 error = EINVAL; 540 break; 541 } 542 if ((so->so_oobmark == 0 && 543 (so->so_state & SS_RCVATMARK) == 0) || 544 so->so_options & SO_OOBINLINE || 545 tp->t_oobflags & TCPOOB_HADDATA) { 546 error = EINVAL; 547 break; 548 } 549 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 550 error = EWOULDBLOCK; 551 break; 552 } 553 m->m_len = 1; 554 *mtod(m, caddr_t) = tp->t_iobc; 555 if (((long)nam & MSG_PEEK) == 0) 556 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 557 break; 558 559 case PRU_SENDOOB: 560 if (sbspace(&so->so_snd) < -512) { 561 m_freem(m); 562 error = ENOBUFS; 563 break; 564 } 565 /* 566 * According to RFC961 (Assigned Protocols), 567 * the urgent pointer points to the last octet 568 * of urgent data. We continue, however, 569 * to consider it to indicate the first octet 570 * of data past the urgent section. 571 * Otherwise, snd_up should be one lower. 572 */ 573 sbappendstream(&so->so_snd, m); 574 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 575 tp->t_force = 1; 576 error = tcp_output(tp); 577 tp->t_force = 0; 578 break; 579 580 case PRU_SOCKADDR: 581 #ifdef INET 582 if (inp) 583 in_setsockaddr(inp, nam); 584 #endif 585 #ifdef INET6 586 if (in6p) 587 in6_setsockaddr(in6p, nam); 588 #endif 589 break; 590 591 case PRU_PEERADDR: 592 #ifdef INET 593 if (inp) 594 in_setpeeraddr(inp, nam); 595 #endif 596 #ifdef INET6 597 if (in6p) 598 in6_setpeeraddr(in6p, nam); 599 #endif 600 break; 601 602 default: 603 panic("tcp_usrreq"); 604 } 605 #ifdef TCP_DEBUG 606 if (tp && (so->so_options & SO_DEBUG)) 607 tcp_trace(TA_USER, ostate, tp, NULL, req); 608 #endif 609 610 release: 611 splx(s); 612 return (error); 613 } 614 615 int 616 tcp_ctloutput(op, so, level, optname, mp) 617 int op; 618 struct socket *so; 619 int level, optname; 620 struct mbuf **mp; 621 { 622 int error = 0, s; 623 struct inpcb *inp; 624 #ifdef INET6 625 struct in6pcb *in6p; 626 #endif 627 struct tcpcb *tp; 628 struct mbuf *m; 629 int i; 630 int family; /* family of the socket */ 631 632 family = so->so_proto->pr_domain->dom_family; 633 634 s = splsoftnet(); 635 switch (family) { 636 #ifdef INET 637 case PF_INET: 638 inp = sotoinpcb(so); 639 #ifdef INET6 640 in6p = NULL; 641 #endif 642 break; 643 #endif 644 #ifdef INET6 645 case PF_INET6: 646 inp = NULL; 647 in6p = sotoin6pcb(so); 648 break; 649 #endif 650 default: 651 splx(s); 652 return EAFNOSUPPORT; 653 } 654 #ifndef INET6 655 if (inp == NULL) 656 #else 657 if (inp == NULL && in6p == NULL) 658 #endif 659 { 660 splx(s); 661 if (op == PRCO_SETOPT && *mp) 662 (void) m_free(*mp); 663 return (ECONNRESET); 664 } 665 if (level != IPPROTO_TCP) { 666 switch (family) { 667 #ifdef INET 668 case PF_INET: 669 error = ip_ctloutput(op, so, level, optname, mp); 670 break; 671 #endif 672 #ifdef INET6 673 case PF_INET6: 674 error = ip6_ctloutput(op, so, level, optname, mp); 675 break; 676 #endif 677 } 678 splx(s); 679 return (error); 680 } 681 if (inp) 682 tp = intotcpcb(inp); 683 #ifdef INET6 684 else if (in6p) 685 tp = in6totcpcb(in6p); 686 #endif 687 else 688 tp = NULL; 689 690 switch (op) { 691 692 case PRCO_SETOPT: 693 m = *mp; 694 switch (optname) { 695 696 case TCP_NODELAY: 697 if (m == NULL || m->m_len < sizeof (int)) 698 error = EINVAL; 699 else if (*mtod(m, int *)) 700 tp->t_flags |= TF_NODELAY; 701 else 702 tp->t_flags &= ~TF_NODELAY; 703 break; 704 705 case TCP_MAXSEG: 706 if (m && (i = *mtod(m, int *)) > 0 && 707 i <= tp->t_peermss) 708 tp->t_peermss = i; /* limit on send size */ 709 else 710 error = EINVAL; 711 break; 712 713 default: 714 error = ENOPROTOOPT; 715 break; 716 } 717 if (m) 718 (void) m_free(m); 719 break; 720 721 case PRCO_GETOPT: 722 *mp = m = m_get(M_WAIT, MT_SOOPTS); 723 m->m_len = sizeof(int); 724 MCLAIM(m, so->so_mowner); 725 726 switch (optname) { 727 case TCP_NODELAY: 728 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 729 break; 730 case TCP_MAXSEG: 731 *mtod(m, int *) = tp->t_peermss; 732 break; 733 default: 734 error = ENOPROTOOPT; 735 break; 736 } 737 break; 738 } 739 splx(s); 740 return (error); 741 } 742 743 #ifndef TCP_SENDSPACE 744 #define TCP_SENDSPACE 1024*16 745 #endif 746 int tcp_sendspace = TCP_SENDSPACE; 747 #ifndef TCP_RECVSPACE 748 #define TCP_RECVSPACE 1024*16 749 #endif 750 int tcp_recvspace = TCP_RECVSPACE; 751 752 /* 753 * Attach TCP protocol to socket, allocating 754 * internet protocol control block, tcp control block, 755 * bufer space, and entering LISTEN state if to accept connections. 756 */ 757 int 758 tcp_attach(so) 759 struct socket *so; 760 { 761 struct tcpcb *tp; 762 struct inpcb *inp; 763 #ifdef INET6 764 struct in6pcb *in6p; 765 #endif 766 int error; 767 int family; /* family of the socket */ 768 769 family = so->so_proto->pr_domain->dom_family; 770 771 #ifdef MBUFTRACE 772 so->so_mowner = &tcp_mowner; 773 so->so_rcv.sb_mowner = &tcp_rx_mowner; 774 so->so_snd.sb_mowner = &tcp_tx_mowner; 775 #endif 776 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 777 error = soreserve(so, tcp_sendspace, tcp_recvspace); 778 if (error) 779 return (error); 780 } 781 switch (family) { 782 #ifdef INET 783 case PF_INET: 784 error = in_pcballoc(so, &tcbtable); 785 if (error) 786 return (error); 787 inp = sotoinpcb(so); 788 #ifdef INET6 789 in6p = NULL; 790 #endif 791 break; 792 #endif 793 #ifdef INET6 794 case PF_INET6: 795 error = in6_pcballoc(so, &tcb6); 796 if (error) 797 return (error); 798 inp = NULL; 799 in6p = sotoin6pcb(so); 800 break; 801 #endif 802 default: 803 return EAFNOSUPPORT; 804 } 805 if (inp) 806 tp = tcp_newtcpcb(family, (void *)inp); 807 #ifdef INET6 808 else if (in6p) 809 tp = tcp_newtcpcb(family, (void *)in6p); 810 #endif 811 else 812 tp = NULL; 813 814 if (tp == 0) { 815 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 816 817 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 818 #ifdef INET 819 if (inp) 820 in_pcbdetach(inp); 821 #endif 822 #ifdef INET6 823 if (in6p) 824 in6_pcbdetach(in6p); 825 #endif 826 so->so_state |= nofd; 827 return (ENOBUFS); 828 } 829 tp->t_state = TCPS_CLOSED; 830 return (0); 831 } 832 833 /* 834 * Initiate (or continue) disconnect. 835 * If embryonic state, just send reset (once). 836 * If in ``let data drain'' option and linger null, just drop. 837 * Otherwise (hard), mark socket disconnecting and drop 838 * current input data; switch states based on user close, and 839 * send segment to peer (with FIN). 840 */ 841 struct tcpcb * 842 tcp_disconnect(tp) 843 struct tcpcb *tp; 844 { 845 struct socket *so; 846 847 if (tp->t_inpcb) 848 so = tp->t_inpcb->inp_socket; 849 #ifdef INET6 850 else if (tp->t_in6pcb) 851 so = tp->t_in6pcb->in6p_socket; 852 #endif 853 else 854 so = NULL; 855 856 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 857 tp = tcp_close(tp); 858 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 859 tp = tcp_drop(tp, 0); 860 else { 861 soisdisconnecting(so); 862 sbflush(&so->so_rcv); 863 tp = tcp_usrclosed(tp); 864 if (tp) 865 (void) tcp_output(tp); 866 } 867 return (tp); 868 } 869 870 /* 871 * User issued close, and wish to trail through shutdown states: 872 * if never received SYN, just forget it. If got a SYN from peer, 873 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 874 * If already got a FIN from peer, then almost done; go to LAST_ACK 875 * state. In all other cases, have already sent FIN to peer (e.g. 876 * after PRU_SHUTDOWN), and just have to play tedious game waiting 877 * for peer to send FIN or not respond to keep-alives, etc. 878 * We can let the user exit from the close as soon as the FIN is acked. 879 */ 880 struct tcpcb * 881 tcp_usrclosed(tp) 882 struct tcpcb *tp; 883 { 884 885 switch (tp->t_state) { 886 887 case TCPS_CLOSED: 888 case TCPS_LISTEN: 889 case TCPS_SYN_SENT: 890 tp->t_state = TCPS_CLOSED; 891 tp = tcp_close(tp); 892 break; 893 894 case TCPS_SYN_RECEIVED: 895 case TCPS_ESTABLISHED: 896 tp->t_state = TCPS_FIN_WAIT_1; 897 break; 898 899 case TCPS_CLOSE_WAIT: 900 tp->t_state = TCPS_LAST_ACK; 901 break; 902 } 903 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 904 struct socket *so; 905 if (tp->t_inpcb) 906 so = tp->t_inpcb->inp_socket; 907 #ifdef INET6 908 else if (tp->t_in6pcb) 909 so = tp->t_in6pcb->in6p_socket; 910 #endif 911 else 912 so = NULL; 913 soisdisconnected(so); 914 /* 915 * If we are in FIN_WAIT_2, we arrived here because the 916 * application did a shutdown of the send side. Like the 917 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 918 * a full close, we start a timer to make sure sockets are 919 * not left in FIN_WAIT_2 forever. 920 */ 921 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 922 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 923 } 924 return (tp); 925 } 926 927 static const struct { 928 unsigned int valid : 1; 929 unsigned int rdonly : 1; 930 int *var; 931 int val; 932 } tcp_ctlvars[] = TCPCTL_VARIABLES; 933 934 /* 935 * Sysctl for tcp variables. 936 */ 937 int 938 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 939 int *name; 940 u_int namelen; 941 void *oldp; 942 size_t *oldlenp; 943 void *newp; 944 size_t newlen; 945 { 946 int error, saved_value = 0; 947 948 /* All sysctl names at this level are terminal. */ 949 if (namelen != 1) 950 return (ENOTDIR); 951 952 if (name[0] == TCPCTL_IDENT) 953 return tcp_sysctl_ident(oldp, oldlenp, newp, newlen); 954 955 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 956 && tcp_ctlvars[name[0]].valid) { 957 if (tcp_ctlvars[name[0]].rdonly) { 958 return (sysctl_rdint(oldp, oldlenp, newp, 959 tcp_ctlvars[name[0]].val)); 960 } else { 961 switch (name[0]) { 962 case TCPCTL_MSSDFLT: 963 saved_value = tcp_mssdflt; 964 break; 965 } 966 error = sysctl_int(oldp, oldlenp, newp, newlen, 967 tcp_ctlvars[name[0]].var); 968 if (error) 969 return (error); 970 switch (name[0]) { 971 case TCPCTL_MSSDFLT: 972 if (tcp_mssdflt < 32) { 973 tcp_mssdflt = saved_value; 974 return (EINVAL); 975 } 976 break; 977 } 978 return (0); 979 } 980 } 981 982 return (ENOPROTOOPT); 983 } 984 985 986 static int 987 tcp_sysctl_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 988 { 989 struct sysctl_tcp_ident_args args; 990 struct socket *sockp; 991 struct inpcb *inb; 992 uid_t uid; 993 int error; 994 995 if (newlen != sizeof(args)) 996 return EINVAL; 997 if (!newp) 998 return EFAULT; 999 if (*oldlenp != sizeof(uid_t)) 1000 return ENOMEM; 1001 if (!oldp || *oldlenp != sizeof(uid_t)) 1002 return ENOMEM; 1003 if ((error = copyin(newp, &args, newlen)) != 0) 1004 return error; 1005 1006 inb = in_pcblookup_connect(&tcbtable, args.raddr, args.rport, 1007 args.laddr, args.lport); 1008 if (inb) { 1009 sockp = inb->inp_socket; 1010 if (sockp) 1011 uid = sockp->so_uid; 1012 else 1013 return ESRCH; 1014 } else 1015 return ESRCH; 1016 1017 if ((error = copyout(&uid, oldp, sizeof(uid))) != 0) 1018 return error; 1019 1020 return 0; 1021 } 1022