1 /* $NetBSD: tcp_usrreq.c,v 1.108 2005/08/10 13:06:49 yamt Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the NetBSD 53 * Foundation, Inc. and its contributors. 54 * 4. Neither the name of The NetBSD Foundation nor the names of its 55 * contributors may be used to endorse or promote products derived 56 * from this software without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 59 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 60 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 61 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 62 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 63 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 64 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 65 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 66 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 67 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 68 * POSSIBILITY OF SUCH DAMAGE. 69 */ 70 71 /* 72 * Copyright (c) 1982, 1986, 1988, 1993, 1995 73 * The Regents of the University of California. All rights reserved. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. Neither the name of the University nor the names of its contributors 84 * may be used to endorse or promote products derived from this software 85 * without specific prior written permission. 86 * 87 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 88 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 89 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 90 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 91 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 92 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 93 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 94 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 95 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 96 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 97 * SUCH DAMAGE. 98 * 99 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 100 */ 101 102 #include <sys/cdefs.h> 103 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.108 2005/08/10 13:06:49 yamt Exp $"); 104 105 #include "opt_inet.h" 106 #include "opt_ipsec.h" 107 #include "opt_tcp_debug.h" 108 #include "opt_mbuftrace.h" 109 110 #include <sys/param.h> 111 #include <sys/systm.h> 112 #include <sys/kernel.h> 113 #include <sys/malloc.h> 114 #include <sys/mbuf.h> 115 #include <sys/socket.h> 116 #include <sys/socketvar.h> 117 #include <sys/protosw.h> 118 #include <sys/errno.h> 119 #include <sys/stat.h> 120 #include <sys/proc.h> 121 #include <sys/domain.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 #include <netinet/in_offload.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #endif 143 144 #include <netinet/tcp.h> 145 #include <netinet/tcp_fsm.h> 146 #include <netinet/tcp_seq.h> 147 #include <netinet/tcp_timer.h> 148 #include <netinet/tcp_var.h> 149 #include <netinet/tcpip.h> 150 #include <netinet/tcp_debug.h> 151 152 #include "opt_tcp_space.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 162 /* 163 * Process a TCP user request for TCP tb. If this is a send request 164 * then m is the mbuf chain of send data. If this is a timer expiration 165 * (called from the software clock routine), then timertype tells which timer. 166 */ 167 /*ARGSUSED*/ 168 int 169 tcp_usrreq(struct socket *so, int req, 170 struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) 171 { 172 struct inpcb *inp; 173 #ifdef INET6 174 struct in6pcb *in6p; 175 #endif 176 struct tcpcb *tp = NULL; 177 int s; 178 int error = 0; 179 #ifdef TCP_DEBUG 180 int ostate = 0; 181 #endif 182 int family; /* family of the socket */ 183 184 family = so->so_proto->pr_domain->dom_family; 185 186 if (req == PRU_CONTROL) { 187 switch (family) { 188 #ifdef INET 189 case PF_INET: 190 return (in_control(so, (long)m, (caddr_t)nam, 191 (struct ifnet *)control, p)); 192 #endif 193 #ifdef INET6 194 case PF_INET6: 195 return (in6_control(so, (long)m, (caddr_t)nam, 196 (struct ifnet *)control, p)); 197 #endif 198 default: 199 return EAFNOSUPPORT; 200 } 201 } 202 203 if (req == PRU_PURGEIF) { 204 switch (family) { 205 #ifdef INET 206 case PF_INET: 207 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 208 in_purgeif((struct ifnet *)control); 209 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 210 break; 211 #endif 212 #ifdef INET6 213 case PF_INET6: 214 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 215 in6_purgeif((struct ifnet *)control); 216 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 217 break; 218 #endif 219 default: 220 return (EAFNOSUPPORT); 221 } 222 return (0); 223 } 224 225 s = splsoftnet(); 226 switch (family) { 227 #ifdef INET 228 case PF_INET: 229 inp = sotoinpcb(so); 230 #ifdef INET6 231 in6p = NULL; 232 #endif 233 break; 234 #endif 235 #ifdef INET6 236 case PF_INET6: 237 inp = NULL; 238 in6p = sotoin6pcb(so); 239 break; 240 #endif 241 default: 242 splx(s); 243 return EAFNOSUPPORT; 244 } 245 246 #ifdef DIAGNOSTIC 247 #ifdef INET6 248 if (inp && in6p) 249 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 250 #endif 251 if (req != PRU_SEND && req != PRU_SENDOOB && control) 252 panic("tcp_usrreq: unexpected control mbuf"); 253 #endif 254 /* 255 * When a TCP is attached to a socket, then there will be 256 * a (struct inpcb) pointed at by the socket, and this 257 * structure will point at a subsidary (struct tcpcb). 258 */ 259 #ifndef INET6 260 if (inp == 0 && req != PRU_ATTACH) 261 #else 262 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 263 #endif 264 { 265 error = EINVAL; 266 goto release; 267 } 268 #ifdef INET 269 if (inp) { 270 tp = intotcpcb(inp); 271 /* WHAT IF TP IS 0? */ 272 #ifdef KPROF 273 tcp_acounts[tp->t_state][req]++; 274 #endif 275 #ifdef TCP_DEBUG 276 ostate = tp->t_state; 277 #endif 278 } 279 #endif 280 #ifdef INET6 281 if (in6p) { 282 tp = in6totcpcb(in6p); 283 /* WHAT IF TP IS 0? */ 284 #ifdef KPROF 285 tcp_acounts[tp->t_state][req]++; 286 #endif 287 #ifdef TCP_DEBUG 288 ostate = tp->t_state; 289 #endif 290 } 291 #endif 292 293 switch (req) { 294 295 /* 296 * TCP attaches to socket via PRU_ATTACH, reserving space, 297 * and an internet control block. 298 */ 299 case PRU_ATTACH: 300 #ifndef INET6 301 if (inp != 0) 302 #else 303 if (inp != 0 || in6p != 0) 304 #endif 305 { 306 error = EISCONN; 307 break; 308 } 309 error = tcp_attach(so); 310 if (error) 311 break; 312 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 313 so->so_linger = TCP_LINGERTIME; 314 tp = sototcpcb(so); 315 break; 316 317 /* 318 * PRU_DETACH detaches the TCP protocol from the socket. 319 */ 320 case PRU_DETACH: 321 tp = tcp_disconnect(tp); 322 break; 323 324 /* 325 * Give the socket an address. 326 */ 327 case PRU_BIND: 328 switch (family) { 329 #ifdef INET 330 case PF_INET: 331 error = in_pcbbind(inp, nam, p); 332 break; 333 #endif 334 #ifdef INET6 335 case PF_INET6: 336 error = in6_pcbbind(in6p, nam, p); 337 if (!error) { 338 /* mapped addr case */ 339 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 340 tp->t_family = AF_INET; 341 else 342 tp->t_family = AF_INET6; 343 } 344 break; 345 #endif 346 } 347 break; 348 349 /* 350 * Prepare to accept connections. 351 */ 352 case PRU_LISTEN: 353 #ifdef INET 354 if (inp && inp->inp_lport == 0) { 355 error = in_pcbbind(inp, (struct mbuf *)0, 356 (struct proc *)0); 357 if (error) 358 break; 359 } 360 #endif 361 #ifdef INET6 362 if (in6p && in6p->in6p_lport == 0) { 363 error = in6_pcbbind(in6p, (struct mbuf *)0, 364 (struct proc *)0); 365 if (error) 366 break; 367 } 368 #endif 369 tp->t_state = TCPS_LISTEN; 370 break; 371 372 /* 373 * Initiate connection to peer. 374 * Create a template for use in transmissions on this connection. 375 * Enter SYN_SENT state, and mark socket as connecting. 376 * Start keep-alive timer, and seed output sequence space. 377 * Send initial segment on connection. 378 */ 379 case PRU_CONNECT: 380 #ifdef INET 381 if (inp) { 382 if (inp->inp_lport == 0) { 383 error = in_pcbbind(inp, (struct mbuf *)0, 384 (struct proc *)0); 385 if (error) 386 break; 387 } 388 error = in_pcbconnect(inp, nam); 389 } 390 #endif 391 #ifdef INET6 392 if (in6p) { 393 if (in6p->in6p_lport == 0) { 394 error = in6_pcbbind(in6p, (struct mbuf *)0, 395 (struct proc *)0); 396 if (error) 397 break; 398 } 399 error = in6_pcbconnect(in6p, nam); 400 if (!error) { 401 /* mapped addr case */ 402 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 403 tp->t_family = AF_INET; 404 else 405 tp->t_family = AF_INET6; 406 } 407 } 408 #endif 409 if (error) 410 break; 411 tp->t_template = tcp_template(tp); 412 if (tp->t_template == 0) { 413 #ifdef INET 414 if (inp) 415 in_pcbdisconnect(inp); 416 #endif 417 #ifdef INET6 418 if (in6p) 419 in6_pcbdisconnect(in6p); 420 #endif 421 error = ENOBUFS; 422 break; 423 } 424 /* Compute window scaling to request. */ 425 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 426 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 427 tp->request_r_scale++; 428 soisconnecting(so); 429 tcpstat.tcps_connattempt++; 430 tp->t_state = TCPS_SYN_SENT; 431 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 432 tp->iss = tcp_new_iss(tp, 0); 433 tcp_sendseqinit(tp); 434 error = tcp_output(tp); 435 break; 436 437 /* 438 * Create a TCP connection between two sockets. 439 */ 440 case PRU_CONNECT2: 441 error = EOPNOTSUPP; 442 break; 443 444 /* 445 * Initiate disconnect from peer. 446 * If connection never passed embryonic stage, just drop; 447 * else if don't need to let data drain, then can just drop anyways, 448 * else have to begin TCP shutdown process: mark socket disconnecting, 449 * drain unread data, state switch to reflect user close, and 450 * send segment (e.g. FIN) to peer. Socket will be really disconnected 451 * when peer sends FIN and acks ours. 452 * 453 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 454 */ 455 case PRU_DISCONNECT: 456 tp = tcp_disconnect(tp); 457 break; 458 459 /* 460 * Accept a connection. Essentially all the work is 461 * done at higher levels; just return the address 462 * of the peer, storing through addr. 463 */ 464 case PRU_ACCEPT: 465 #ifdef INET 466 if (inp) 467 in_setpeeraddr(inp, nam); 468 #endif 469 #ifdef INET6 470 if (in6p) 471 in6_setpeeraddr(in6p, nam); 472 #endif 473 break; 474 475 /* 476 * Mark the connection as being incapable of further output. 477 */ 478 case PRU_SHUTDOWN: 479 socantsendmore(so); 480 tp = tcp_usrclosed(tp); 481 if (tp) 482 error = tcp_output(tp); 483 break; 484 485 /* 486 * After a receive, possibly send window update to peer. 487 */ 488 case PRU_RCVD: 489 /* 490 * soreceive() calls this function when a user receives 491 * ancillary data on a listening socket. We don't call 492 * tcp_output in such a case, since there is no header 493 * template for a listening socket and hence the kernel 494 * will panic. 495 */ 496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 497 (void) tcp_output(tp); 498 break; 499 500 /* 501 * Do a send by putting data in output queue and updating urgent 502 * marker if URG set. Possibly send more data. 503 */ 504 case PRU_SEND: 505 if (control && control->m_len) { 506 m_freem(control); 507 m_freem(m); 508 error = EINVAL; 509 break; 510 } 511 sbappendstream(&so->so_snd, m); 512 error = tcp_output(tp); 513 break; 514 515 /* 516 * Abort the TCP. 517 */ 518 case PRU_ABORT: 519 tp = tcp_drop(tp, ECONNABORTED); 520 break; 521 522 case PRU_SENSE: 523 /* 524 * stat: don't bother with a blocksize. 525 */ 526 splx(s); 527 return (0); 528 529 case PRU_RCVOOB: 530 if (control && control->m_len) { 531 m_freem(control); 532 m_freem(m); 533 error = EINVAL; 534 break; 535 } 536 if ((so->so_oobmark == 0 && 537 (so->so_state & SS_RCVATMARK) == 0) || 538 so->so_options & SO_OOBINLINE || 539 tp->t_oobflags & TCPOOB_HADDATA) { 540 error = EINVAL; 541 break; 542 } 543 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 544 error = EWOULDBLOCK; 545 break; 546 } 547 m->m_len = 1; 548 *mtod(m, caddr_t) = tp->t_iobc; 549 if (((long)nam & MSG_PEEK) == 0) 550 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 551 break; 552 553 case PRU_SENDOOB: 554 if (sbspace(&so->so_snd) < -512) { 555 m_freem(m); 556 error = ENOBUFS; 557 break; 558 } 559 /* 560 * According to RFC961 (Assigned Protocols), 561 * the urgent pointer points to the last octet 562 * of urgent data. We continue, however, 563 * to consider it to indicate the first octet 564 * of data past the urgent section. 565 * Otherwise, snd_up should be one lower. 566 */ 567 sbappendstream(&so->so_snd, m); 568 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 569 tp->t_force = 1; 570 error = tcp_output(tp); 571 tp->t_force = 0; 572 break; 573 574 case PRU_SOCKADDR: 575 #ifdef INET 576 if (inp) 577 in_setsockaddr(inp, nam); 578 #endif 579 #ifdef INET6 580 if (in6p) 581 in6_setsockaddr(in6p, nam); 582 #endif 583 break; 584 585 case PRU_PEERADDR: 586 #ifdef INET 587 if (inp) 588 in_setpeeraddr(inp, nam); 589 #endif 590 #ifdef INET6 591 if (in6p) 592 in6_setpeeraddr(in6p, nam); 593 #endif 594 break; 595 596 default: 597 panic("tcp_usrreq"); 598 } 599 #ifdef TCP_DEBUG 600 if (tp && (so->so_options & SO_DEBUG)) 601 tcp_trace(TA_USER, ostate, tp, NULL, req); 602 #endif 603 604 release: 605 splx(s); 606 return (error); 607 } 608 609 int 610 tcp_ctloutput(int op, struct socket *so, int level, int optname, 611 struct mbuf **mp) 612 { 613 int error = 0, s; 614 struct inpcb *inp; 615 #ifdef INET6 616 struct in6pcb *in6p; 617 #endif 618 struct tcpcb *tp; 619 struct mbuf *m; 620 int i; 621 int family; /* family of the socket */ 622 623 family = so->so_proto->pr_domain->dom_family; 624 625 s = splsoftnet(); 626 switch (family) { 627 #ifdef INET 628 case PF_INET: 629 inp = sotoinpcb(so); 630 #ifdef INET6 631 in6p = NULL; 632 #endif 633 break; 634 #endif 635 #ifdef INET6 636 case PF_INET6: 637 inp = NULL; 638 in6p = sotoin6pcb(so); 639 break; 640 #endif 641 default: 642 splx(s); 643 return EAFNOSUPPORT; 644 } 645 #ifndef INET6 646 if (inp == NULL) 647 #else 648 if (inp == NULL && in6p == NULL) 649 #endif 650 { 651 splx(s); 652 if (op == PRCO_SETOPT && *mp) 653 (void) m_free(*mp); 654 return (ECONNRESET); 655 } 656 if (level != IPPROTO_TCP) { 657 switch (family) { 658 #ifdef INET 659 case PF_INET: 660 error = ip_ctloutput(op, so, level, optname, mp); 661 break; 662 #endif 663 #ifdef INET6 664 case PF_INET6: 665 error = ip6_ctloutput(op, so, level, optname, mp); 666 break; 667 #endif 668 } 669 splx(s); 670 return (error); 671 } 672 if (inp) 673 tp = intotcpcb(inp); 674 #ifdef INET6 675 else if (in6p) 676 tp = in6totcpcb(in6p); 677 #endif 678 else 679 tp = NULL; 680 681 switch (op) { 682 683 case PRCO_SETOPT: 684 m = *mp; 685 switch (optname) { 686 687 #ifdef TCP_SIGNATURE 688 case TCP_MD5SIG: 689 if (m == NULL || m->m_len < sizeof (int)) 690 error = EINVAL; 691 if (error) 692 break; 693 if (*mtod(m, int *) > 0) 694 tp->t_flags |= TF_SIGNATURE; 695 else 696 tp->t_flags &= ~TF_SIGNATURE; 697 break; 698 #endif /* TCP_SIGNATURE */ 699 700 case TCP_NODELAY: 701 if (m == NULL || m->m_len < sizeof (int)) 702 error = EINVAL; 703 else if (*mtod(m, int *)) 704 tp->t_flags |= TF_NODELAY; 705 else 706 tp->t_flags &= ~TF_NODELAY; 707 break; 708 709 case TCP_MAXSEG: 710 if (m && (i = *mtod(m, int *)) > 0 && 711 i <= tp->t_peermss) 712 tp->t_peermss = i; /* limit on send size */ 713 else 714 error = EINVAL; 715 break; 716 717 default: 718 error = ENOPROTOOPT; 719 break; 720 } 721 if (m) 722 (void) m_free(m); 723 break; 724 725 case PRCO_GETOPT: 726 *mp = m = m_get(M_WAIT, MT_SOOPTS); 727 m->m_len = sizeof(int); 728 MCLAIM(m, so->so_mowner); 729 730 switch (optname) { 731 #ifdef TCP_SIGNATURE 732 case TCP_MD5SIG: 733 *mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 734 break; 735 #endif 736 case TCP_NODELAY: 737 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 738 break; 739 case TCP_MAXSEG: 740 *mtod(m, int *) = tp->t_peermss; 741 break; 742 default: 743 error = ENOPROTOOPT; 744 break; 745 } 746 break; 747 } 748 splx(s); 749 return (error); 750 } 751 752 #ifndef TCP_SENDSPACE 753 #define TCP_SENDSPACE 1024*32 754 #endif 755 int tcp_sendspace = TCP_SENDSPACE; 756 #ifndef TCP_RECVSPACE 757 #define TCP_RECVSPACE 1024*32 758 #endif 759 int tcp_recvspace = TCP_RECVSPACE; 760 761 /* 762 * Attach TCP protocol to socket, allocating 763 * internet protocol control block, tcp control block, 764 * bufer space, and entering LISTEN state if to accept connections. 765 */ 766 int 767 tcp_attach(struct socket *so) 768 { 769 struct tcpcb *tp; 770 struct inpcb *inp; 771 #ifdef INET6 772 struct in6pcb *in6p; 773 #endif 774 int error; 775 int family; /* family of the socket */ 776 777 family = so->so_proto->pr_domain->dom_family; 778 779 #ifdef MBUFTRACE 780 so->so_mowner = &tcp_mowner; 781 so->so_rcv.sb_mowner = &tcp_rx_mowner; 782 so->so_snd.sb_mowner = &tcp_tx_mowner; 783 #endif 784 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 785 error = soreserve(so, tcp_sendspace, tcp_recvspace); 786 if (error) 787 return (error); 788 } 789 switch (family) { 790 #ifdef INET 791 case PF_INET: 792 error = in_pcballoc(so, &tcbtable); 793 if (error) 794 return (error); 795 inp = sotoinpcb(so); 796 #ifdef INET6 797 in6p = NULL; 798 #endif 799 break; 800 #endif 801 #ifdef INET6 802 case PF_INET6: 803 error = in6_pcballoc(so, &tcbtable); 804 if (error) 805 return (error); 806 inp = NULL; 807 in6p = sotoin6pcb(so); 808 break; 809 #endif 810 default: 811 return EAFNOSUPPORT; 812 } 813 if (inp) 814 tp = tcp_newtcpcb(family, (void *)inp); 815 #ifdef INET6 816 else if (in6p) 817 tp = tcp_newtcpcb(family, (void *)in6p); 818 #endif 819 else 820 tp = NULL; 821 822 if (tp == 0) { 823 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 824 825 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 826 #ifdef INET 827 if (inp) 828 in_pcbdetach(inp); 829 #endif 830 #ifdef INET6 831 if (in6p) 832 in6_pcbdetach(in6p); 833 #endif 834 so->so_state |= nofd; 835 return (ENOBUFS); 836 } 837 tp->t_state = TCPS_CLOSED; 838 return (0); 839 } 840 841 /* 842 * Initiate (or continue) disconnect. 843 * If embryonic state, just send reset (once). 844 * If in ``let data drain'' option and linger null, just drop. 845 * Otherwise (hard), mark socket disconnecting and drop 846 * current input data; switch states based on user close, and 847 * send segment to peer (with FIN). 848 */ 849 struct tcpcb * 850 tcp_disconnect(struct tcpcb *tp) 851 { 852 struct socket *so; 853 854 if (tp->t_inpcb) 855 so = tp->t_inpcb->inp_socket; 856 #ifdef INET6 857 else if (tp->t_in6pcb) 858 so = tp->t_in6pcb->in6p_socket; 859 #endif 860 else 861 so = NULL; 862 863 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 864 tp = tcp_close(tp); 865 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 866 tp = tcp_drop(tp, 0); 867 else { 868 soisdisconnecting(so); 869 sbflush(&so->so_rcv); 870 tp = tcp_usrclosed(tp); 871 if (tp) 872 (void) tcp_output(tp); 873 } 874 return (tp); 875 } 876 877 /* 878 * User issued close, and wish to trail through shutdown states: 879 * if never received SYN, just forget it. If got a SYN from peer, 880 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 881 * If already got a FIN from peer, then almost done; go to LAST_ACK 882 * state. In all other cases, have already sent FIN to peer (e.g. 883 * after PRU_SHUTDOWN), and just have to play tedious game waiting 884 * for peer to send FIN or not respond to keep-alives, etc. 885 * We can let the user exit from the close as soon as the FIN is acked. 886 */ 887 struct tcpcb * 888 tcp_usrclosed(struct tcpcb *tp) 889 { 890 891 switch (tp->t_state) { 892 893 case TCPS_CLOSED: 894 case TCPS_LISTEN: 895 case TCPS_SYN_SENT: 896 tp->t_state = TCPS_CLOSED; 897 tp = tcp_close(tp); 898 break; 899 900 case TCPS_SYN_RECEIVED: 901 case TCPS_ESTABLISHED: 902 tp->t_state = TCPS_FIN_WAIT_1; 903 break; 904 905 case TCPS_CLOSE_WAIT: 906 tp->t_state = TCPS_LAST_ACK; 907 break; 908 } 909 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 910 struct socket *so; 911 if (tp->t_inpcb) 912 so = tp->t_inpcb->inp_socket; 913 #ifdef INET6 914 else if (tp->t_in6pcb) 915 so = tp->t_in6pcb->in6p_socket; 916 #endif 917 else 918 so = NULL; 919 soisdisconnected(so); 920 /* 921 * If we are in FIN_WAIT_2, we arrived here because the 922 * application did a shutdown of the send side. Like the 923 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 924 * a full close, we start a timer to make sure sockets are 925 * not left in FIN_WAIT_2 forever. 926 */ 927 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 928 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 929 } 930 return (tp); 931 } 932 933 /* 934 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 935 * than 32. 936 */ 937 static int 938 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 939 { 940 int error, mssdflt; 941 struct sysctlnode node; 942 943 mssdflt = tcp_mssdflt; 944 node = *rnode; 945 node.sysctl_data = &mssdflt; 946 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 947 if (error || newp == NULL) 948 return (error); 949 950 if (mssdflt < 32) 951 return (EINVAL); 952 tcp_mssdflt = mssdflt; 953 954 return (0); 955 } 956 957 /* 958 * sysctl helper routine for setting port related values under 959 * net.inet.ip and net.inet6.ip6. does basic range checking and does 960 * additional checks for each type. this code has placed in 961 * tcp_input.c since INET and INET6 both use the same tcp code. 962 * 963 * this helper is not static so that both inet and inet6 can use it. 964 */ 965 int 966 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 967 { 968 int error, tmp; 969 int apmin, apmax; 970 #ifndef IPNOPRIVPORTS 971 int lpmin, lpmax; 972 #endif /* IPNOPRIVPORTS */ 973 struct sysctlnode node; 974 975 if (namelen != 0) 976 return (EINVAL); 977 978 switch (name[-3]) { 979 #ifdef INET 980 case PF_INET: 981 apmin = anonportmin; 982 apmax = anonportmax; 983 #ifndef IPNOPRIVPORTS 984 lpmin = lowportmin; 985 lpmax = lowportmax; 986 #endif /* IPNOPRIVPORTS */ 987 break; 988 #endif /* INET */ 989 #ifdef INET6 990 case PF_INET6: 991 apmin = ip6_anonportmin; 992 apmax = ip6_anonportmax; 993 #ifndef IPNOPRIVPORTS 994 lpmin = ip6_lowportmin; 995 lpmax = ip6_lowportmax; 996 #endif /* IPNOPRIVPORTS */ 997 break; 998 #endif /* INET6 */ 999 default: 1000 return (EINVAL); 1001 } 1002 1003 /* 1004 * insert temporary copy into node, perform lookup on 1005 * temporary, then restore pointer 1006 */ 1007 node = *rnode; 1008 tmp = *(int*)rnode->sysctl_data; 1009 node.sysctl_data = &tmp; 1010 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1011 if (error || newp == NULL) 1012 return (error); 1013 1014 /* 1015 * simple port range check 1016 */ 1017 if (tmp < 0 || tmp > 65535) 1018 return (EINVAL); 1019 1020 /* 1021 * per-node range checks 1022 */ 1023 switch (rnode->sysctl_num) { 1024 case IPCTL_ANONPORTMIN: 1025 if (tmp >= apmax) 1026 return (EINVAL); 1027 #ifndef IPNOPRIVPORTS 1028 if (tmp < IPPORT_RESERVED) 1029 return (EINVAL); 1030 #endif /* IPNOPRIVPORTS */ 1031 break; 1032 1033 case IPCTL_ANONPORTMAX: 1034 if (apmin >= tmp) 1035 return (EINVAL); 1036 #ifndef IPNOPRIVPORTS 1037 if (tmp < IPPORT_RESERVED) 1038 return (EINVAL); 1039 #endif /* IPNOPRIVPORTS */ 1040 break; 1041 1042 #ifndef IPNOPRIVPORTS 1043 case IPCTL_LOWPORTMIN: 1044 if (tmp >= lpmax || 1045 tmp > IPPORT_RESERVEDMAX || 1046 tmp < IPPORT_RESERVEDMIN) 1047 return (EINVAL); 1048 break; 1049 1050 case IPCTL_LOWPORTMAX: 1051 if (lpmin >= tmp || 1052 tmp > IPPORT_RESERVEDMAX || 1053 tmp < IPPORT_RESERVEDMIN) 1054 return (EINVAL); 1055 break; 1056 #endif /* IPNOPRIVPORTS */ 1057 1058 default: 1059 return (EINVAL); 1060 } 1061 1062 *(int*)rnode->sysctl_data = tmp; 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * sysctl helper routine for the net.inet.tcp.ident and 1069 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1070 * old way of looking up the ident information for ipv4 which involves 1071 * stuffing the port/addr pairs into the mib lookup. 1072 */ 1073 static int 1074 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1075 { 1076 #ifdef INET 1077 struct inpcb *inb; 1078 struct sockaddr_in *si4[2]; 1079 #endif /* INET */ 1080 #ifdef INET6 1081 struct in6pcb *in6b; 1082 struct sockaddr_in6 *si6[2]; 1083 #endif /* INET6 */ 1084 struct sockaddr_storage sa[2]; 1085 struct socket *sockp; 1086 size_t sz; 1087 uid_t uid; 1088 int error, pf; 1089 1090 if (namelen != 4 && namelen != 0) 1091 return (EINVAL); 1092 if (name[-2] != IPPROTO_TCP) 1093 return (EINVAL); 1094 pf = name[-3]; 1095 1096 /* old style lookup, ipv4 only */ 1097 if (namelen == 4) { 1098 #ifdef INET 1099 struct in_addr laddr, raddr; 1100 u_int lport, rport; 1101 1102 if (pf != PF_INET) 1103 return (EPROTONOSUPPORT); 1104 raddr.s_addr = (uint32_t)name[0]; 1105 rport = (u_int)name[1]; 1106 laddr.s_addr = (uint32_t)name[2]; 1107 lport = (u_int)name[3]; 1108 inb = in_pcblookup_connect(&tcbtable, raddr, rport, 1109 laddr, lport); 1110 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1111 return (ESRCH); 1112 uid = sockp->so_uidinfo->ui_uid; 1113 if (oldp) { 1114 sz = MIN(sizeof(uid), *oldlenp); 1115 error = copyout(&uid, oldp, sz); 1116 if (error) 1117 return (error); 1118 } 1119 *oldlenp = sizeof(uid); 1120 return (0); 1121 #else /* INET */ 1122 return (EINVAL); 1123 #endif /* INET */ 1124 } 1125 1126 if (newp == NULL || newlen != sizeof(sa)) 1127 return (EINVAL); 1128 error = copyin(newp, &sa, newlen); 1129 if (error) 1130 return (error); 1131 1132 /* 1133 * requested families must match 1134 */ 1135 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1136 return (EINVAL); 1137 1138 switch (pf) { 1139 #ifdef INET 1140 case PF_INET: 1141 si4[0] = (struct sockaddr_in*)&sa[0]; 1142 si4[1] = (struct sockaddr_in*)&sa[1]; 1143 if (si4[0]->sin_len != sizeof(*si4[0]) || 1144 si4[0]->sin_len != si4[1]->sin_len) 1145 return (EINVAL); 1146 inb = in_pcblookup_connect(&tcbtable, 1147 si4[0]->sin_addr, si4[0]->sin_port, 1148 si4[1]->sin_addr, si4[1]->sin_port); 1149 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1150 return (ESRCH); 1151 break; 1152 #endif /* INET */ 1153 #ifdef INET6 1154 case PF_INET6: 1155 si6[0] = (struct sockaddr_in6*)&sa[0]; 1156 si6[1] = (struct sockaddr_in6*)&sa[1]; 1157 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1158 si6[0]->sin6_len != si6[1]->sin6_len) 1159 return (EINVAL); 1160 in6b = in6_pcblookup_connect(&tcbtable, 1161 &si6[0]->sin6_addr, si6[0]->sin6_port, 1162 &si6[1]->sin6_addr, si6[1]->sin6_port, 0); 1163 if (in6b == NULL || (sockp = in6b->in6p_socket) == NULL) 1164 return (ESRCH); 1165 break; 1166 #endif /* INET6 */ 1167 default: 1168 return (EPROTONOSUPPORT); 1169 } 1170 1171 uid = sockp->so_uidinfo->ui_uid; 1172 if (oldp) { 1173 sz = MIN(sizeof(uid), *oldlenp); 1174 error = copyout(&uid, oldp, sz); 1175 if (error) 1176 return (error); 1177 } 1178 *oldlenp = sizeof(uid); 1179 1180 return (0); 1181 } 1182 1183 /* 1184 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1185 * inet/inet6, as well as raw pcbs for each. specifically not 1186 * declared static so that raw sockets and udp/udp6 can use it as 1187 * well. 1188 */ 1189 int 1190 sysctl_inpcblist(SYSCTLFN_ARGS) 1191 { 1192 #ifdef INET 1193 struct sockaddr_in *in; 1194 const struct inpcb *inp; 1195 #endif 1196 #ifdef INET6 1197 struct sockaddr_in6 *in6; 1198 const struct in6pcb *in6p; 1199 #endif 1200 /* 1201 * sysctl_data is const, but CIRCLEQ_FOREACH can't use a const 1202 * struct inpcbtable pointer, so we have to discard const. :-/ 1203 */ 1204 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1205 const struct inpcb_hdr *inph; 1206 struct tcpcb *tp; 1207 struct kinfo_pcb pcb; 1208 char *dp; 1209 u_int op, arg; 1210 size_t len, needed, elem_size, out_size; 1211 int error, elem_count, pf, proto, pf2; 1212 1213 if (namelen != 4) 1214 return (EINVAL); 1215 1216 error = 0; 1217 dp = oldp; 1218 len = (oldp != NULL) ? *oldlenp : 0; 1219 op = name[0]; 1220 arg = name[1]; 1221 elem_size = name[2]; 1222 elem_count = name[3]; 1223 out_size = MIN(sizeof(pcb), elem_size); 1224 needed = 0; 1225 1226 elem_count = INT_MAX; 1227 elem_size = out_size = sizeof(pcb); 1228 1229 if (namelen == 1 && name[0] == CTL_QUERY) 1230 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1231 1232 if (name - oname != 4) 1233 return (EINVAL); 1234 1235 pf = oname[1]; 1236 proto = oname[2]; 1237 pf2 = (oldp == NULL) ? 0 : pf; 1238 1239 CIRCLEQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1240 #ifdef INET 1241 inp = (const struct inpcb *)inph; 1242 #endif 1243 #ifdef INET6 1244 in6p = (const struct in6pcb *)inph; 1245 #endif 1246 1247 if (inph->inph_af != pf) 1248 continue; 1249 1250 memset(&pcb, 0, sizeof(pcb)); 1251 1252 pcb.ki_family = pf; 1253 pcb.ki_type = proto; 1254 1255 switch (pf2) { 1256 case 0: 1257 /* just probing for size */ 1258 break; 1259 #ifdef INET 1260 case PF_INET: 1261 pcb.ki_family = inp->inp_socket->so_proto-> 1262 pr_domain->dom_family; 1263 pcb.ki_type = inp->inp_socket->so_proto-> 1264 pr_type; 1265 pcb.ki_protocol = inp->inp_socket->so_proto-> 1266 pr_protocol; 1267 pcb.ki_pflags = inp->inp_flags; 1268 1269 pcb.ki_sostate = inp->inp_socket->so_state; 1270 pcb.ki_prstate = inp->inp_state; 1271 if (proto == IPPROTO_TCP) { 1272 tp = intotcpcb(inp); 1273 pcb.ki_tstate = tp->t_state; 1274 pcb.ki_tflags = tp->t_flags; 1275 } 1276 1277 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1278 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1279 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1280 1281 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1282 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1283 1284 in = satosin(&pcb.ki_src); 1285 in->sin_len = sizeof(*in); 1286 in->sin_family = pf; 1287 in->sin_port = inp->inp_lport; 1288 in->sin_addr = inp->inp_laddr; 1289 if (pcb.ki_prstate >= INP_CONNECTED) { 1290 in = satosin(&pcb.ki_dst); 1291 in->sin_len = sizeof(*in); 1292 in->sin_family = pf; 1293 in->sin_port = inp->inp_fport; 1294 in->sin_addr = inp->inp_faddr; 1295 } 1296 break; 1297 #endif 1298 #ifdef INET6 1299 case PF_INET6: 1300 pcb.ki_family = in6p->in6p_socket->so_proto-> 1301 pr_domain->dom_family; 1302 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1303 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1304 pr_protocol; 1305 pcb.ki_pflags = in6p->in6p_flags; 1306 1307 pcb.ki_sostate = in6p->in6p_socket->so_state; 1308 pcb.ki_prstate = in6p->in6p_state; 1309 if (proto == IPPROTO_TCP) { 1310 tp = in6totcpcb(in6p); 1311 pcb.ki_tstate = tp->t_state; 1312 pcb.ki_tflags = tp->t_flags; 1313 } 1314 1315 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1316 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1317 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1318 1319 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1320 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1321 1322 in6 = satosin6(&pcb.ki_src); 1323 in6->sin6_len = sizeof(*in6); 1324 in6->sin6_family = pf; 1325 in6->sin6_port = in6p->in6p_lport; 1326 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1327 in6->sin6_addr = in6p->in6p_laddr; 1328 in6->sin6_scope_id = 0; /* XXX? */ 1329 1330 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1331 in6 = satosin6(&pcb.ki_dst); 1332 in6->sin6_len = sizeof(*in6); 1333 in6->sin6_family = pf; 1334 in6->sin6_port = in6p->in6p_fport; 1335 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1336 in6->sin6_addr = in6p->in6p_faddr; 1337 in6->sin6_scope_id = 0; /* XXX? */ 1338 } 1339 break; 1340 #endif 1341 } 1342 1343 if (len >= elem_size && elem_count > 0) { 1344 error = copyout(&pcb, dp, out_size); 1345 if (error) 1346 return (error); 1347 dp += elem_size; 1348 len -= elem_size; 1349 } 1350 if (elem_count > 0) { 1351 needed += elem_size; 1352 if (elem_count != INT_MAX) 1353 elem_count--; 1354 } 1355 } 1356 1357 *oldlenp = needed; 1358 if (oldp == NULL) 1359 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1360 1361 return (error); 1362 } 1363 1364 /* 1365 * this (second stage) setup routine is a replacement for tcp_sysctl() 1366 * (which is currently used for ipv4 and ipv6) 1367 */ 1368 static void 1369 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1370 const char *tcpname) 1371 { 1372 const struct sysctlnode *sack_node; 1373 1374 sysctl_createv(clog, 0, NULL, NULL, 1375 CTLFLAG_PERMANENT, 1376 CTLTYPE_NODE, "net", NULL, 1377 NULL, 0, NULL, 0, 1378 CTL_NET, CTL_EOL); 1379 sysctl_createv(clog, 0, NULL, NULL, 1380 CTLFLAG_PERMANENT, 1381 CTLTYPE_NODE, pfname, NULL, 1382 NULL, 0, NULL, 0, 1383 CTL_NET, pf, CTL_EOL); 1384 sysctl_createv(clog, 0, NULL, NULL, 1385 CTLFLAG_PERMANENT, 1386 CTLTYPE_NODE, tcpname, 1387 SYSCTL_DESCR("TCP related settings"), 1388 NULL, 0, NULL, 0, 1389 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1390 1391 sysctl_createv(clog, 0, NULL, NULL, 1392 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1393 CTLTYPE_INT, "rfc1323", 1394 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1395 NULL, 0, &tcp_do_rfc1323, 0, 1396 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1397 sysctl_createv(clog, 0, NULL, NULL, 1398 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1399 CTLTYPE_INT, "sendspace", 1400 SYSCTL_DESCR("Default TCP send buffer size"), 1401 NULL, 0, &tcp_sendspace, 0, 1402 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1403 sysctl_createv(clog, 0, NULL, NULL, 1404 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1405 CTLTYPE_INT, "recvspace", 1406 SYSCTL_DESCR("Default TCP receive buffer size"), 1407 NULL, 0, &tcp_recvspace, 0, 1408 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1409 sysctl_createv(clog, 0, NULL, NULL, 1410 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1411 CTLTYPE_INT, "mssdflt", 1412 SYSCTL_DESCR("Default maximum segment size"), 1413 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1414 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1415 sysctl_createv(clog, 0, NULL, NULL, 1416 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1417 CTLTYPE_INT, "syn_cache_limit", 1418 SYSCTL_DESCR("Maximum number of entries in the TCP " 1419 "compressed state engine"), 1420 NULL, 0, &tcp_syn_cache_limit, 0, 1421 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1422 CTL_EOL); 1423 sysctl_createv(clog, 0, NULL, NULL, 1424 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1425 CTLTYPE_INT, "syn_bucket_limit", 1426 SYSCTL_DESCR("Maximum number of entries per hash " 1427 "bucket in the TCP compressed state " 1428 "engine"), 1429 NULL, 0, &tcp_syn_bucket_limit, 0, 1430 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1431 CTL_EOL); 1432 #if 0 /* obsoleted */ 1433 sysctl_createv(clog, 0, NULL, NULL, 1434 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1435 CTLTYPE_INT, "syn_cache_interval", 1436 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1437 NULL, 0, &tcp_syn_cache_interval, 0, 1438 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1439 CTL_EOL); 1440 #endif 1441 sysctl_createv(clog, 0, NULL, NULL, 1442 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1443 CTLTYPE_INT, "init_win", 1444 SYSCTL_DESCR("Initial TCP congestion window"), 1445 NULL, 0, &tcp_init_win, 0, 1446 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1447 sysctl_createv(clog, 0, NULL, NULL, 1448 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1449 CTLTYPE_INT, "mss_ifmtu", 1450 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1451 NULL, 0, &tcp_mss_ifmtu, 0, 1452 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1453 sysctl_createv(clog, 0, NULL, &sack_node, 1454 CTLFLAG_PERMANENT, 1455 CTLTYPE_NODE, "sack", 1456 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1457 NULL, 0, NULL, 0, 1458 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1459 sysctl_createv(clog, 0, NULL, NULL, 1460 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1461 CTLTYPE_INT, "win_scale", 1462 SYSCTL_DESCR("Use RFC1323 window scale options"), 1463 NULL, 0, &tcp_do_win_scale, 0, 1464 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1465 sysctl_createv(clog, 0, NULL, NULL, 1466 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1467 CTLTYPE_INT, "timestamps", 1468 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1469 NULL, 0, &tcp_do_timestamps, 0, 1470 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1471 sysctl_createv(clog, 0, NULL, NULL, 1472 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1473 CTLTYPE_INT, "compat_42", 1474 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1475 NULL, 0, &tcp_compat_42, 0, 1476 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1477 sysctl_createv(clog, 0, NULL, NULL, 1478 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1479 CTLTYPE_INT, "cwm", 1480 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1481 "Monitoring"), 1482 NULL, 0, &tcp_cwm, 0, 1483 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1484 sysctl_createv(clog, 0, NULL, NULL, 1485 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1486 CTLTYPE_INT, "cwm_burstsize", 1487 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1488 "burst count in packets"), 1489 NULL, 0, &tcp_cwm_burstsize, 0, 1490 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1491 CTL_EOL); 1492 sysctl_createv(clog, 0, NULL, NULL, 1493 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1494 CTLTYPE_INT, "ack_on_push", 1495 SYSCTL_DESCR("Immediately return ACK when PSH is " 1496 "received"), 1497 NULL, 0, &tcp_ack_on_push, 0, 1498 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1499 sysctl_createv(clog, 0, NULL, NULL, 1500 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1501 CTLTYPE_INT, "keepidle", 1502 SYSCTL_DESCR("Allowed connection idle ticks before a " 1503 "keepalive probe is sent"), 1504 NULL, 0, &tcp_keepidle, 0, 1505 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1506 sysctl_createv(clog, 0, NULL, NULL, 1507 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1508 CTLTYPE_INT, "keepintvl", 1509 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1510 NULL, 0, &tcp_keepintvl, 0, 1511 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1512 sysctl_createv(clog, 0, NULL, NULL, 1513 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1514 CTLTYPE_INT, "keepcnt", 1515 SYSCTL_DESCR("Number of keepalive probes to send"), 1516 NULL, 0, &tcp_keepcnt, 0, 1517 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1518 sysctl_createv(clog, 0, NULL, NULL, 1519 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1520 CTLTYPE_INT, "slowhz", 1521 SYSCTL_DESCR("Keepalive ticks per second"), 1522 NULL, PR_SLOWHZ, NULL, 0, 1523 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1524 sysctl_createv(clog, 0, NULL, NULL, 1525 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1526 CTLTYPE_INT, "newreno", 1527 SYSCTL_DESCR("NewReno congestion control algorithm"), 1528 NULL, 0, &tcp_do_newreno, 0, 1529 CTL_NET, pf, IPPROTO_TCP, TCPCTL_NEWRENO, CTL_EOL); 1530 sysctl_createv(clog, 0, NULL, NULL, 1531 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1532 CTLTYPE_INT, "log_refused", 1533 SYSCTL_DESCR("Log refused TCP connections"), 1534 NULL, 0, &tcp_log_refused, 0, 1535 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1536 #if 0 /* obsoleted */ 1537 sysctl_createv(clog, 0, NULL, NULL, 1538 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1539 CTLTYPE_INT, "rstratelimit", NULL, 1540 NULL, 0, &tcp_rst_ratelim, 0, 1541 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1542 #endif 1543 sysctl_createv(clog, 0, NULL, NULL, 1544 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1545 CTLTYPE_INT, "rstppslimit", 1546 SYSCTL_DESCR("Maximum number of RST packets to send " 1547 "per second"), 1548 NULL, 0, &tcp_rst_ppslim, 0, 1549 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1550 sysctl_createv(clog, 0, NULL, NULL, 1551 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1552 CTLTYPE_INT, "delack_ticks", 1553 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1554 NULL, 0, &tcp_delack_ticks, 0, 1555 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1556 sysctl_createv(clog, 0, NULL, NULL, 1557 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1558 CTLTYPE_INT, "init_win_local", 1559 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1560 NULL, 0, &tcp_init_win_local, 0, 1561 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1562 CTL_EOL); 1563 sysctl_createv(clog, 0, NULL, NULL, 1564 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1565 CTLTYPE_STRUCT, "ident", 1566 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1567 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1568 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1569 sysctl_createv(clog, 0, NULL, NULL, 1570 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1571 CTLTYPE_INT, "do_loopback_cksum", 1572 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1573 NULL, 0, &tcp_do_loopback_cksum, 0, 1574 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1575 CTL_EOL); 1576 sysctl_createv(clog, 0, NULL, NULL, 1577 CTLFLAG_PERMANENT, 1578 CTLTYPE_STRUCT, "pcblist", 1579 SYSCTL_DESCR("TCP protocol control block list"), 1580 sysctl_inpcblist, 0, &tcbtable, 0, 1581 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1582 CTL_EOL); 1583 1584 /* SACK gets it's own little subtree. */ 1585 sysctl_createv(clog, 0, NULL, &sack_node, 1586 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1587 CTLTYPE_INT, "enable", 1588 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 1589 NULL, 0, &tcp_do_sack, 0, 1590 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1591 sysctl_createv(clog, 0, NULL, &sack_node, 1592 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1593 CTLTYPE_INT, "maxholes", 1594 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 1595 NULL, 0, &tcp_sack_tp_maxholes, 0, 1596 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1597 sysctl_createv(clog, 0, NULL, &sack_node, 1598 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1599 CTLTYPE_INT, "globalmaxholes", 1600 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 1601 NULL, 0, &tcp_sack_globalmaxholes, 0, 1602 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1603 sysctl_createv(clog, 0, NULL, &sack_node, 1604 CTLFLAG_PERMANENT, 1605 CTLTYPE_INT, "globalholes", 1606 SYSCTL_DESCR("Global number of TCP SACK holes"), 1607 NULL, 0, &tcp_sack_globalholes, 0, 1608 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1609 1610 sysctl_createv(clog, 0, NULL, NULL, 1611 CTLFLAG_PERMANENT, 1612 CTLTYPE_STRUCT, "stats", 1613 SYSCTL_DESCR("TCP statistics"), 1614 NULL, 0, &tcpstat, sizeof(tcpstat), 1615 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 1616 CTL_EOL); 1617 } 1618 1619 /* 1620 * Sysctl for tcp variables. 1621 */ 1622 #ifdef INET 1623 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup") 1624 { 1625 1626 sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp"); 1627 } 1628 #endif /* INET */ 1629 1630 #ifdef INET6 1631 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup") 1632 { 1633 1634 sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6"); 1635 } 1636 #endif /* INET6 */ 1637