1 /* $NetBSD: tcp_usrreq.c,v 1.122 2006/09/13 10:07:42 elad Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the NetBSD 55 * Foundation, Inc. and its contributors. 56 * 4. Neither the name of The NetBSD Foundation nor the names of its 57 * contributors may be used to endorse or promote products derived 58 * from this software without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70 * POSSIBILITY OF SUCH DAMAGE. 71 */ 72 73 /* 74 * Copyright (c) 1982, 1986, 1988, 1993, 1995 75 * The Regents of the University of California. All rights reserved. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.122 2006/09/13 10:07:42 elad Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 #include "opt_mbuftrace.h" 111 112 #include <sys/param.h> 113 #include <sys/systm.h> 114 #include <sys/kernel.h> 115 #include <sys/malloc.h> 116 #include <sys/mbuf.h> 117 #include <sys/socket.h> 118 #include <sys/socketvar.h> 119 #include <sys/protosw.h> 120 #include <sys/errno.h> 121 #include <sys/stat.h> 122 #include <sys/proc.h> 123 #include <sys/domain.h> 124 #include <sys/sysctl.h> 125 #include <sys/kauth.h> 126 127 #include <net/if.h> 128 #include <net/route.h> 129 130 #include <netinet/in.h> 131 #include <netinet/in_systm.h> 132 #include <netinet/in_var.h> 133 #include <netinet/ip.h> 134 #include <netinet/in_pcb.h> 135 #include <netinet/ip_var.h> 136 #include <netinet/in_offload.h> 137 138 #ifdef INET6 139 #ifndef INET 140 #include <netinet/in.h> 141 #endif 142 #include <netinet/ip6.h> 143 #include <netinet6/in6_pcb.h> 144 #include <netinet6/ip6_var.h> 145 #endif 146 147 #include <netinet/tcp.h> 148 #include <netinet/tcp_fsm.h> 149 #include <netinet/tcp_seq.h> 150 #include <netinet/tcp_timer.h> 151 #include <netinet/tcp_var.h> 152 #include <netinet/tcpip.h> 153 #include <netinet/tcp_debug.h> 154 155 #include "opt_tcp_space.h" 156 157 #ifdef IPSEC 158 #include <netinet6/ipsec.h> 159 #endif /*IPSEC*/ 160 161 /* 162 * TCP protocol interface to socket abstraction. 163 */ 164 165 /* 166 * Process a TCP user request for TCP tb. If this is a send request 167 * then m is the mbuf chain of send data. If this is a timer expiration 168 * (called from the software clock routine), then timertype tells which timer. 169 */ 170 /*ARGSUSED*/ 171 int 172 tcp_usrreq(struct socket *so, int req, 173 struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) 174 { 175 struct inpcb *inp; 176 #ifdef INET6 177 struct in6pcb *in6p; 178 #endif 179 struct tcpcb *tp = NULL; 180 int s; 181 int error = 0; 182 #ifdef TCP_DEBUG 183 int ostate = 0; 184 #endif 185 int family; /* family of the socket */ 186 187 family = so->so_proto->pr_domain->dom_family; 188 189 if (req == PRU_CONTROL) { 190 switch (family) { 191 #ifdef INET 192 case PF_INET: 193 return (in_control(so, (long)m, (caddr_t)nam, 194 (struct ifnet *)control, l)); 195 #endif 196 #ifdef INET6 197 case PF_INET6: 198 return (in6_control(so, (long)m, (caddr_t)nam, 199 (struct ifnet *)control, l)); 200 #endif 201 default: 202 return EAFNOSUPPORT; 203 } 204 } 205 206 if (req == PRU_PURGEIF) { 207 switch (family) { 208 #ifdef INET 209 case PF_INET: 210 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 211 in_purgeif((struct ifnet *)control); 212 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 213 break; 214 #endif 215 #ifdef INET6 216 case PF_INET6: 217 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 218 in6_purgeif((struct ifnet *)control); 219 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 220 break; 221 #endif 222 default: 223 return (EAFNOSUPPORT); 224 } 225 return (0); 226 } 227 228 s = splsoftnet(); 229 switch (family) { 230 #ifdef INET 231 case PF_INET: 232 inp = sotoinpcb(so); 233 #ifdef INET6 234 in6p = NULL; 235 #endif 236 break; 237 #endif 238 #ifdef INET6 239 case PF_INET6: 240 inp = NULL; 241 in6p = sotoin6pcb(so); 242 break; 243 #endif 244 default: 245 splx(s); 246 return EAFNOSUPPORT; 247 } 248 249 #ifdef DIAGNOSTIC 250 #ifdef INET6 251 if (inp && in6p) 252 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 253 #endif 254 if (req != PRU_SEND && req != PRU_SENDOOB && control) 255 panic("tcp_usrreq: unexpected control mbuf"); 256 #endif 257 /* 258 * When a TCP is attached to a socket, then there will be 259 * a (struct inpcb) pointed at by the socket, and this 260 * structure will point at a subsidary (struct tcpcb). 261 */ 262 #ifndef INET6 263 if (inp == 0 && req != PRU_ATTACH) 264 #else 265 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 266 #endif 267 { 268 error = EINVAL; 269 goto release; 270 } 271 #ifdef INET 272 if (inp) { 273 tp = intotcpcb(inp); 274 /* WHAT IF TP IS 0? */ 275 #ifdef KPROF 276 tcp_acounts[tp->t_state][req]++; 277 #endif 278 #ifdef TCP_DEBUG 279 ostate = tp->t_state; 280 #endif 281 } 282 #endif 283 #ifdef INET6 284 if (in6p) { 285 tp = in6totcpcb(in6p); 286 /* WHAT IF TP IS 0? */ 287 #ifdef KPROF 288 tcp_acounts[tp->t_state][req]++; 289 #endif 290 #ifdef TCP_DEBUG 291 ostate = tp->t_state; 292 #endif 293 } 294 #endif 295 296 switch (req) { 297 298 /* 299 * TCP attaches to socket via PRU_ATTACH, reserving space, 300 * and an internet control block. 301 */ 302 case PRU_ATTACH: 303 #ifndef INET6 304 if (inp != 0) 305 #else 306 if (inp != 0 || in6p != 0) 307 #endif 308 { 309 error = EISCONN; 310 break; 311 } 312 error = tcp_attach(so); 313 if (error) 314 break; 315 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 316 so->so_linger = TCP_LINGERTIME; 317 tp = sototcpcb(so); 318 break; 319 320 /* 321 * PRU_DETACH detaches the TCP protocol from the socket. 322 */ 323 case PRU_DETACH: 324 tp = tcp_disconnect(tp); 325 break; 326 327 /* 328 * Give the socket an address. 329 */ 330 case PRU_BIND: 331 switch (family) { 332 #ifdef INET 333 case PF_INET: 334 error = in_pcbbind(inp, nam, l); 335 break; 336 #endif 337 #ifdef INET6 338 case PF_INET6: 339 error = in6_pcbbind(in6p, nam, l); 340 if (!error) { 341 /* mapped addr case */ 342 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 343 tp->t_family = AF_INET; 344 else 345 tp->t_family = AF_INET6; 346 } 347 break; 348 #endif 349 } 350 break; 351 352 /* 353 * Prepare to accept connections. 354 */ 355 case PRU_LISTEN: 356 #ifdef INET 357 if (inp && inp->inp_lport == 0) { 358 error = in_pcbbind(inp, (struct mbuf *)0, 359 (struct lwp *)0); 360 if (error) 361 break; 362 } 363 #endif 364 #ifdef INET6 365 if (in6p && in6p->in6p_lport == 0) { 366 error = in6_pcbbind(in6p, (struct mbuf *)0, 367 (struct lwp *)0); 368 if (error) 369 break; 370 } 371 #endif 372 tp->t_state = TCPS_LISTEN; 373 break; 374 375 /* 376 * Initiate connection to peer. 377 * Create a template for use in transmissions on this connection. 378 * Enter SYN_SENT state, and mark socket as connecting. 379 * Start keep-alive timer, and seed output sequence space. 380 * Send initial segment on connection. 381 */ 382 case PRU_CONNECT: 383 #ifdef INET 384 if (inp) { 385 if (inp->inp_lport == 0) { 386 error = in_pcbbind(inp, (struct mbuf *)0, 387 (struct lwp *)0); 388 if (error) 389 break; 390 } 391 error = in_pcbconnect(inp, nam, l); 392 } 393 #endif 394 #ifdef INET6 395 if (in6p) { 396 if (in6p->in6p_lport == 0) { 397 error = in6_pcbbind(in6p, (struct mbuf *)0, 398 (struct lwp *)0); 399 if (error) 400 break; 401 } 402 error = in6_pcbconnect(in6p, nam, l); 403 if (!error) { 404 /* mapped addr case */ 405 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 406 tp->t_family = AF_INET; 407 else 408 tp->t_family = AF_INET6; 409 } 410 } 411 #endif 412 if (error) 413 break; 414 tp->t_template = tcp_template(tp); 415 if (tp->t_template == 0) { 416 #ifdef INET 417 if (inp) 418 in_pcbdisconnect(inp); 419 #endif 420 #ifdef INET6 421 if (in6p) 422 in6_pcbdisconnect(in6p); 423 #endif 424 error = ENOBUFS; 425 break; 426 } 427 /* Compute window scaling to request. */ 428 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 429 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 430 tp->request_r_scale++; 431 soisconnecting(so); 432 tcpstat.tcps_connattempt++; 433 tp->t_state = TCPS_SYN_SENT; 434 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 435 tp->iss = tcp_new_iss(tp, 0); 436 tcp_sendseqinit(tp); 437 error = tcp_output(tp); 438 break; 439 440 /* 441 * Create a TCP connection between two sockets. 442 */ 443 case PRU_CONNECT2: 444 error = EOPNOTSUPP; 445 break; 446 447 /* 448 * Initiate disconnect from peer. 449 * If connection never passed embryonic stage, just drop; 450 * else if don't need to let data drain, then can just drop anyways, 451 * else have to begin TCP shutdown process: mark socket disconnecting, 452 * drain unread data, state switch to reflect user close, and 453 * send segment (e.g. FIN) to peer. Socket will be really disconnected 454 * when peer sends FIN and acks ours. 455 * 456 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 457 */ 458 case PRU_DISCONNECT: 459 tp = tcp_disconnect(tp); 460 break; 461 462 /* 463 * Accept a connection. Essentially all the work is 464 * done at higher levels; just return the address 465 * of the peer, storing through addr. 466 */ 467 case PRU_ACCEPT: 468 #ifdef INET 469 if (inp) 470 in_setpeeraddr(inp, nam); 471 #endif 472 #ifdef INET6 473 if (in6p) 474 in6_setpeeraddr(in6p, nam); 475 #endif 476 break; 477 478 /* 479 * Mark the connection as being incapable of further output. 480 */ 481 case PRU_SHUTDOWN: 482 socantsendmore(so); 483 tp = tcp_usrclosed(tp); 484 if (tp) 485 error = tcp_output(tp); 486 break; 487 488 /* 489 * After a receive, possibly send window update to peer. 490 */ 491 case PRU_RCVD: 492 /* 493 * soreceive() calls this function when a user receives 494 * ancillary data on a listening socket. We don't call 495 * tcp_output in such a case, since there is no header 496 * template for a listening socket and hence the kernel 497 * will panic. 498 */ 499 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 500 (void) tcp_output(tp); 501 break; 502 503 /* 504 * Do a send by putting data in output queue and updating urgent 505 * marker if URG set. Possibly send more data. 506 */ 507 case PRU_SEND: 508 if (control && control->m_len) { 509 m_freem(control); 510 m_freem(m); 511 error = EINVAL; 512 break; 513 } 514 sbappendstream(&so->so_snd, m); 515 error = tcp_output(tp); 516 break; 517 518 /* 519 * Abort the TCP. 520 */ 521 case PRU_ABORT: 522 tp = tcp_drop(tp, ECONNABORTED); 523 break; 524 525 case PRU_SENSE: 526 /* 527 * stat: don't bother with a blocksize. 528 */ 529 splx(s); 530 return (0); 531 532 case PRU_RCVOOB: 533 if (control && control->m_len) { 534 m_freem(control); 535 m_freem(m); 536 error = EINVAL; 537 break; 538 } 539 if ((so->so_oobmark == 0 && 540 (so->so_state & SS_RCVATMARK) == 0) || 541 so->so_options & SO_OOBINLINE || 542 tp->t_oobflags & TCPOOB_HADDATA) { 543 error = EINVAL; 544 break; 545 } 546 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 547 error = EWOULDBLOCK; 548 break; 549 } 550 m->m_len = 1; 551 *mtod(m, caddr_t) = tp->t_iobc; 552 if (((long)nam & MSG_PEEK) == 0) 553 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 554 break; 555 556 case PRU_SENDOOB: 557 if (sbspace(&so->so_snd) < -512) { 558 m_freem(m); 559 error = ENOBUFS; 560 break; 561 } 562 /* 563 * According to RFC961 (Assigned Protocols), 564 * the urgent pointer points to the last octet 565 * of urgent data. We continue, however, 566 * to consider it to indicate the first octet 567 * of data past the urgent section. 568 * Otherwise, snd_up should be one lower. 569 */ 570 sbappendstream(&so->so_snd, m); 571 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 572 tp->t_force = 1; 573 error = tcp_output(tp); 574 tp->t_force = 0; 575 break; 576 577 case PRU_SOCKADDR: 578 #ifdef INET 579 if (inp) 580 in_setsockaddr(inp, nam); 581 #endif 582 #ifdef INET6 583 if (in6p) 584 in6_setsockaddr(in6p, nam); 585 #endif 586 break; 587 588 case PRU_PEERADDR: 589 #ifdef INET 590 if (inp) 591 in_setpeeraddr(inp, nam); 592 #endif 593 #ifdef INET6 594 if (in6p) 595 in6_setpeeraddr(in6p, nam); 596 #endif 597 break; 598 599 default: 600 panic("tcp_usrreq"); 601 } 602 #ifdef TCP_DEBUG 603 if (tp && (so->so_options & SO_DEBUG)) 604 tcp_trace(TA_USER, ostate, tp, NULL, req); 605 #endif 606 607 release: 608 splx(s); 609 return (error); 610 } 611 612 int 613 tcp_ctloutput(int op, struct socket *so, int level, int optname, 614 struct mbuf **mp) 615 { 616 int error = 0, s; 617 struct inpcb *inp; 618 #ifdef INET6 619 struct in6pcb *in6p; 620 #endif 621 struct tcpcb *tp; 622 struct mbuf *m; 623 int i; 624 int family; /* family of the socket */ 625 626 family = so->so_proto->pr_domain->dom_family; 627 628 s = splsoftnet(); 629 switch (family) { 630 #ifdef INET 631 case PF_INET: 632 inp = sotoinpcb(so); 633 #ifdef INET6 634 in6p = NULL; 635 #endif 636 break; 637 #endif 638 #ifdef INET6 639 case PF_INET6: 640 inp = NULL; 641 in6p = sotoin6pcb(so); 642 break; 643 #endif 644 default: 645 splx(s); 646 return EAFNOSUPPORT; 647 } 648 #ifndef INET6 649 if (inp == NULL) 650 #else 651 if (inp == NULL && in6p == NULL) 652 #endif 653 { 654 splx(s); 655 if (op == PRCO_SETOPT && *mp) 656 (void) m_free(*mp); 657 return (ECONNRESET); 658 } 659 if (level != IPPROTO_TCP) { 660 switch (family) { 661 #ifdef INET 662 case PF_INET: 663 error = ip_ctloutput(op, so, level, optname, mp); 664 break; 665 #endif 666 #ifdef INET6 667 case PF_INET6: 668 error = ip6_ctloutput(op, so, level, optname, mp); 669 break; 670 #endif 671 } 672 splx(s); 673 return (error); 674 } 675 if (inp) 676 tp = intotcpcb(inp); 677 #ifdef INET6 678 else if (in6p) 679 tp = in6totcpcb(in6p); 680 #endif 681 else 682 tp = NULL; 683 684 switch (op) { 685 686 case PRCO_SETOPT: 687 m = *mp; 688 switch (optname) { 689 690 #ifdef TCP_SIGNATURE 691 case TCP_MD5SIG: 692 if (m == NULL || m->m_len < sizeof (int)) 693 error = EINVAL; 694 if (error) 695 break; 696 if (*mtod(m, int *) > 0) 697 tp->t_flags |= TF_SIGNATURE; 698 else 699 tp->t_flags &= ~TF_SIGNATURE; 700 break; 701 #endif /* TCP_SIGNATURE */ 702 703 case TCP_NODELAY: 704 if (m == NULL || m->m_len < sizeof (int)) 705 error = EINVAL; 706 else if (*mtod(m, int *)) 707 tp->t_flags |= TF_NODELAY; 708 else 709 tp->t_flags &= ~TF_NODELAY; 710 break; 711 712 case TCP_MAXSEG: 713 if (m && (i = *mtod(m, int *)) > 0 && 714 i <= tp->t_peermss) 715 tp->t_peermss = i; /* limit on send size */ 716 else 717 error = EINVAL; 718 break; 719 720 default: 721 error = ENOPROTOOPT; 722 break; 723 } 724 if (m) 725 (void) m_free(m); 726 break; 727 728 case PRCO_GETOPT: 729 *mp = m = m_get(M_WAIT, MT_SOOPTS); 730 m->m_len = sizeof(int); 731 MCLAIM(m, so->so_mowner); 732 733 switch (optname) { 734 #ifdef TCP_SIGNATURE 735 case TCP_MD5SIG: 736 *mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 737 break; 738 #endif 739 case TCP_NODELAY: 740 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 741 break; 742 case TCP_MAXSEG: 743 *mtod(m, int *) = tp->t_peermss; 744 break; 745 default: 746 error = ENOPROTOOPT; 747 break; 748 } 749 break; 750 } 751 splx(s); 752 return (error); 753 } 754 755 #ifndef TCP_SENDSPACE 756 #define TCP_SENDSPACE 1024*32 757 #endif 758 int tcp_sendspace = TCP_SENDSPACE; 759 #ifndef TCP_RECVSPACE 760 #define TCP_RECVSPACE 1024*32 761 #endif 762 int tcp_recvspace = TCP_RECVSPACE; 763 764 /* 765 * Attach TCP protocol to socket, allocating 766 * internet protocol control block, tcp control block, 767 * bufer space, and entering LISTEN state if to accept connections. 768 */ 769 int 770 tcp_attach(struct socket *so) 771 { 772 struct tcpcb *tp; 773 struct inpcb *inp; 774 #ifdef INET6 775 struct in6pcb *in6p; 776 #endif 777 int error; 778 int family; /* family of the socket */ 779 780 family = so->so_proto->pr_domain->dom_family; 781 782 #ifdef MBUFTRACE 783 so->so_mowner = &tcp_mowner; 784 so->so_rcv.sb_mowner = &tcp_rx_mowner; 785 so->so_snd.sb_mowner = &tcp_tx_mowner; 786 #endif 787 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 788 error = soreserve(so, tcp_sendspace, tcp_recvspace); 789 if (error) 790 return (error); 791 } 792 switch (family) { 793 #ifdef INET 794 case PF_INET: 795 error = in_pcballoc(so, &tcbtable); 796 if (error) 797 return (error); 798 inp = sotoinpcb(so); 799 #ifdef INET6 800 in6p = NULL; 801 #endif 802 break; 803 #endif 804 #ifdef INET6 805 case PF_INET6: 806 error = in6_pcballoc(so, &tcbtable); 807 if (error) 808 return (error); 809 inp = NULL; 810 in6p = sotoin6pcb(so); 811 break; 812 #endif 813 default: 814 return EAFNOSUPPORT; 815 } 816 if (inp) 817 tp = tcp_newtcpcb(family, (void *)inp); 818 #ifdef INET6 819 else if (in6p) 820 tp = tcp_newtcpcb(family, (void *)in6p); 821 #endif 822 else 823 tp = NULL; 824 825 if (tp == 0) { 826 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 827 828 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 829 #ifdef INET 830 if (inp) 831 in_pcbdetach(inp); 832 #endif 833 #ifdef INET6 834 if (in6p) 835 in6_pcbdetach(in6p); 836 #endif 837 so->so_state |= nofd; 838 return (ENOBUFS); 839 } 840 tp->t_state = TCPS_CLOSED; 841 return (0); 842 } 843 844 /* 845 * Initiate (or continue) disconnect. 846 * If embryonic state, just send reset (once). 847 * If in ``let data drain'' option and linger null, just drop. 848 * Otherwise (hard), mark socket disconnecting and drop 849 * current input data; switch states based on user close, and 850 * send segment to peer (with FIN). 851 */ 852 struct tcpcb * 853 tcp_disconnect(struct tcpcb *tp) 854 { 855 struct socket *so; 856 857 if (tp->t_inpcb) 858 so = tp->t_inpcb->inp_socket; 859 #ifdef INET6 860 else if (tp->t_in6pcb) 861 so = tp->t_in6pcb->in6p_socket; 862 #endif 863 else 864 so = NULL; 865 866 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 867 tp = tcp_close(tp); 868 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 869 tp = tcp_drop(tp, 0); 870 else { 871 soisdisconnecting(so); 872 sbflush(&so->so_rcv); 873 tp = tcp_usrclosed(tp); 874 if (tp) 875 (void) tcp_output(tp); 876 } 877 return (tp); 878 } 879 880 /* 881 * User issued close, and wish to trail through shutdown states: 882 * if never received SYN, just forget it. If got a SYN from peer, 883 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 884 * If already got a FIN from peer, then almost done; go to LAST_ACK 885 * state. In all other cases, have already sent FIN to peer (e.g. 886 * after PRU_SHUTDOWN), and just have to play tedious game waiting 887 * for peer to send FIN or not respond to keep-alives, etc. 888 * We can let the user exit from the close as soon as the FIN is acked. 889 */ 890 struct tcpcb * 891 tcp_usrclosed(struct tcpcb *tp) 892 { 893 894 switch (tp->t_state) { 895 896 case TCPS_CLOSED: 897 case TCPS_LISTEN: 898 case TCPS_SYN_SENT: 899 tp->t_state = TCPS_CLOSED; 900 tp = tcp_close(tp); 901 break; 902 903 case TCPS_SYN_RECEIVED: 904 case TCPS_ESTABLISHED: 905 tp->t_state = TCPS_FIN_WAIT_1; 906 break; 907 908 case TCPS_CLOSE_WAIT: 909 tp->t_state = TCPS_LAST_ACK; 910 break; 911 } 912 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 913 struct socket *so; 914 if (tp->t_inpcb) 915 so = tp->t_inpcb->inp_socket; 916 #ifdef INET6 917 else if (tp->t_in6pcb) 918 so = tp->t_in6pcb->in6p_socket; 919 #endif 920 else 921 so = NULL; 922 if (so) 923 soisdisconnected(so); 924 /* 925 * If we are in FIN_WAIT_2, we arrived here because the 926 * application did a shutdown of the send side. Like the 927 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 928 * a full close, we start a timer to make sure sockets are 929 * not left in FIN_WAIT_2 forever. 930 */ 931 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 932 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 933 } 934 return (tp); 935 } 936 937 /* 938 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 939 * than 32. 940 */ 941 static int 942 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 943 { 944 int error, mssdflt; 945 struct sysctlnode node; 946 947 mssdflt = tcp_mssdflt; 948 node = *rnode; 949 node.sysctl_data = &mssdflt; 950 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 951 if (error || newp == NULL) 952 return (error); 953 954 if (mssdflt < 32) 955 return (EINVAL); 956 tcp_mssdflt = mssdflt; 957 958 return (0); 959 } 960 961 /* 962 * sysctl helper routine for setting port related values under 963 * net.inet.ip and net.inet6.ip6. does basic range checking and does 964 * additional checks for each type. this code has placed in 965 * tcp_input.c since INET and INET6 both use the same tcp code. 966 * 967 * this helper is not static so that both inet and inet6 can use it. 968 */ 969 int 970 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 971 { 972 int error, tmp; 973 int apmin, apmax; 974 #ifndef IPNOPRIVPORTS 975 int lpmin, lpmax; 976 #endif /* IPNOPRIVPORTS */ 977 struct sysctlnode node; 978 979 if (namelen != 0) 980 return (EINVAL); 981 982 switch (name[-3]) { 983 #ifdef INET 984 case PF_INET: 985 apmin = anonportmin; 986 apmax = anonportmax; 987 #ifndef IPNOPRIVPORTS 988 lpmin = lowportmin; 989 lpmax = lowportmax; 990 #endif /* IPNOPRIVPORTS */ 991 break; 992 #endif /* INET */ 993 #ifdef INET6 994 case PF_INET6: 995 apmin = ip6_anonportmin; 996 apmax = ip6_anonportmax; 997 #ifndef IPNOPRIVPORTS 998 lpmin = ip6_lowportmin; 999 lpmax = ip6_lowportmax; 1000 #endif /* IPNOPRIVPORTS */ 1001 break; 1002 #endif /* INET6 */ 1003 default: 1004 return (EINVAL); 1005 } 1006 1007 /* 1008 * insert temporary copy into node, perform lookup on 1009 * temporary, then restore pointer 1010 */ 1011 node = *rnode; 1012 tmp = *(int*)rnode->sysctl_data; 1013 node.sysctl_data = &tmp; 1014 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1015 if (error || newp == NULL) 1016 return (error); 1017 1018 /* 1019 * simple port range check 1020 */ 1021 if (tmp < 0 || tmp > 65535) 1022 return (EINVAL); 1023 1024 /* 1025 * per-node range checks 1026 */ 1027 switch (rnode->sysctl_num) { 1028 case IPCTL_ANONPORTMIN: 1029 if (tmp >= apmax) 1030 return (EINVAL); 1031 #ifndef IPNOPRIVPORTS 1032 if (tmp < IPPORT_RESERVED) 1033 return (EINVAL); 1034 #endif /* IPNOPRIVPORTS */ 1035 break; 1036 1037 case IPCTL_ANONPORTMAX: 1038 if (apmin >= tmp) 1039 return (EINVAL); 1040 #ifndef IPNOPRIVPORTS 1041 if (tmp < IPPORT_RESERVED) 1042 return (EINVAL); 1043 #endif /* IPNOPRIVPORTS */ 1044 break; 1045 1046 #ifndef IPNOPRIVPORTS 1047 case IPCTL_LOWPORTMIN: 1048 if (tmp >= lpmax || 1049 tmp > IPPORT_RESERVEDMAX || 1050 tmp < IPPORT_RESERVEDMIN) 1051 return (EINVAL); 1052 break; 1053 1054 case IPCTL_LOWPORTMAX: 1055 if (lpmin >= tmp || 1056 tmp > IPPORT_RESERVEDMAX || 1057 tmp < IPPORT_RESERVEDMIN) 1058 return (EINVAL); 1059 break; 1060 #endif /* IPNOPRIVPORTS */ 1061 1062 default: 1063 return (EINVAL); 1064 } 1065 1066 *(int*)rnode->sysctl_data = tmp; 1067 1068 return (0); 1069 } 1070 1071 /* 1072 * sysctl helper routine for the net.inet.tcp.ident and 1073 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1074 * old way of looking up the ident information for ipv4 which involves 1075 * stuffing the port/addr pairs into the mib lookup. 1076 */ 1077 static int 1078 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1079 { 1080 #ifdef INET 1081 struct inpcb *inb; 1082 struct sockaddr_in *si4[2]; 1083 #endif /* INET */ 1084 #ifdef INET6 1085 struct in6pcb *in6b; 1086 struct sockaddr_in6 *si6[2]; 1087 #endif /* INET6 */ 1088 struct sockaddr_storage sa[2]; 1089 struct socket *sockp; 1090 size_t sz; 1091 uid_t uid; 1092 int error, pf; 1093 1094 if (namelen != 4 && namelen != 0) 1095 return (EINVAL); 1096 if (name[-2] != IPPROTO_TCP) 1097 return (EINVAL); 1098 pf = name[-3]; 1099 1100 /* old style lookup, ipv4 only */ 1101 if (namelen == 4) { 1102 #ifdef INET 1103 struct in_addr laddr, raddr; 1104 u_int lport, rport; 1105 1106 if (pf != PF_INET) 1107 return (EPROTONOSUPPORT); 1108 raddr.s_addr = (uint32_t)name[0]; 1109 rport = (u_int)name[1]; 1110 laddr.s_addr = (uint32_t)name[2]; 1111 lport = (u_int)name[3]; 1112 inb = in_pcblookup_connect(&tcbtable, raddr, rport, 1113 laddr, lport); 1114 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1115 return (ESRCH); 1116 uid = sockp->so_uidinfo->ui_uid; 1117 if (oldp) { 1118 sz = MIN(sizeof(uid), *oldlenp); 1119 error = copyout(&uid, oldp, sz); 1120 if (error) 1121 return (error); 1122 } 1123 *oldlenp = sizeof(uid); 1124 return (0); 1125 #else /* INET */ 1126 return (EINVAL); 1127 #endif /* INET */ 1128 } 1129 1130 if (newp == NULL || newlen != sizeof(sa)) 1131 return (EINVAL); 1132 error = copyin(newp, &sa, newlen); 1133 if (error) 1134 return (error); 1135 1136 /* 1137 * requested families must match 1138 */ 1139 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1140 return (EINVAL); 1141 1142 switch (pf) { 1143 #ifdef INET 1144 case PF_INET: 1145 si4[0] = (struct sockaddr_in*)&sa[0]; 1146 si4[1] = (struct sockaddr_in*)&sa[1]; 1147 if (si4[0]->sin_len != sizeof(*si4[0]) || 1148 si4[0]->sin_len != si4[1]->sin_len) 1149 return (EINVAL); 1150 inb = in_pcblookup_connect(&tcbtable, 1151 si4[0]->sin_addr, si4[0]->sin_port, 1152 si4[1]->sin_addr, si4[1]->sin_port); 1153 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1154 return (ESRCH); 1155 break; 1156 #endif /* INET */ 1157 #ifdef INET6 1158 case PF_INET6: 1159 si6[0] = (struct sockaddr_in6*)&sa[0]; 1160 si6[1] = (struct sockaddr_in6*)&sa[1]; 1161 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1162 si6[0]->sin6_len != si6[1]->sin6_len) 1163 return (EINVAL); 1164 in6b = in6_pcblookup_connect(&tcbtable, 1165 &si6[0]->sin6_addr, si6[0]->sin6_port, 1166 &si6[1]->sin6_addr, si6[1]->sin6_port, 0); 1167 if (in6b == NULL || (sockp = in6b->in6p_socket) == NULL) 1168 return (ESRCH); 1169 break; 1170 #endif /* INET6 */ 1171 default: 1172 return (EPROTONOSUPPORT); 1173 } 1174 *oldlenp = sizeof(uid); 1175 1176 uid = sockp->so_uidinfo->ui_uid; 1177 if (oldp) { 1178 sz = MIN(sizeof(uid), *oldlenp); 1179 error = copyout(&uid, oldp, sz); 1180 if (error) 1181 return (error); 1182 } 1183 *oldlenp = sizeof(uid); 1184 1185 return (0); 1186 } 1187 1188 /* 1189 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1190 * inet/inet6, as well as raw pcbs for each. specifically not 1191 * declared static so that raw sockets and udp/udp6 can use it as 1192 * well. 1193 */ 1194 int 1195 sysctl_inpcblist(SYSCTLFN_ARGS) 1196 { 1197 #ifdef INET 1198 struct sockaddr_in *in; 1199 const struct inpcb *inp; 1200 #endif 1201 #ifdef INET6 1202 struct sockaddr_in6 *in6; 1203 const struct in6pcb *in6p; 1204 #endif 1205 /* 1206 * sysctl_data is const, but CIRCLEQ_FOREACH can't use a const 1207 * struct inpcbtable pointer, so we have to discard const. :-/ 1208 */ 1209 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1210 const struct inpcb_hdr *inph; 1211 struct tcpcb *tp; 1212 struct kinfo_pcb pcb; 1213 char *dp; 1214 u_int op, arg; 1215 size_t len, needed, elem_size, out_size; 1216 int error, elem_count, pf, proto, pf2; 1217 1218 if (namelen != 4) 1219 return (EINVAL); 1220 1221 if (oldp != NULL) { 1222 len = *oldlenp; 1223 elem_size = name[2]; 1224 elem_count = name[3]; 1225 if (elem_size != sizeof(pcb)) 1226 return EINVAL; 1227 } else { 1228 len = 0; 1229 elem_count = INT_MAX; 1230 elem_size = sizeof(pcb); 1231 } 1232 error = 0; 1233 dp = oldp; 1234 op = name[0]; 1235 arg = name[1]; 1236 out_size = elem_size; 1237 needed = 0; 1238 1239 if (namelen == 1 && name[0] == CTL_QUERY) 1240 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1241 1242 if (name - oname != 4) 1243 return (EINVAL); 1244 1245 pf = oname[1]; 1246 proto = oname[2]; 1247 pf2 = (oldp != NULL) ? pf : 0; 1248 1249 CIRCLEQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1250 #ifdef INET 1251 inp = (const struct inpcb *)inph; 1252 #endif 1253 #ifdef INET6 1254 in6p = (const struct in6pcb *)inph; 1255 #endif 1256 1257 if (inph->inph_af != pf) 1258 continue; 1259 1260 #ifdef notyet 1261 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_CANSEE, 1262 inph->inph_socket->so_cred)) 1263 #endif 1264 if (kauth_cred_geteuid(l->l_cred) != 0 && 1265 kauth_cred_geteuid(l->l_cred) != 1266 inph->inph_socket->so_uidinfo->ui_uid) 1267 continue; 1268 1269 memset(&pcb, 0, sizeof(pcb)); 1270 1271 pcb.ki_family = pf; 1272 pcb.ki_type = proto; 1273 1274 switch (pf2) { 1275 case 0: 1276 /* just probing for size */ 1277 break; 1278 #ifdef INET 1279 case PF_INET: 1280 pcb.ki_family = inp->inp_socket->so_proto-> 1281 pr_domain->dom_family; 1282 pcb.ki_type = inp->inp_socket->so_proto-> 1283 pr_type; 1284 pcb.ki_protocol = inp->inp_socket->so_proto-> 1285 pr_protocol; 1286 pcb.ki_pflags = inp->inp_flags; 1287 1288 pcb.ki_sostate = inp->inp_socket->so_state; 1289 pcb.ki_prstate = inp->inp_state; 1290 if (proto == IPPROTO_TCP) { 1291 tp = intotcpcb(inp); 1292 pcb.ki_tstate = tp->t_state; 1293 pcb.ki_tflags = tp->t_flags; 1294 } 1295 1296 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1297 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1298 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1299 1300 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1301 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1302 1303 in = satosin(&pcb.ki_src); 1304 in->sin_len = sizeof(*in); 1305 in->sin_family = pf; 1306 in->sin_port = inp->inp_lport; 1307 in->sin_addr = inp->inp_laddr; 1308 if (pcb.ki_prstate >= INP_CONNECTED) { 1309 in = satosin(&pcb.ki_dst); 1310 in->sin_len = sizeof(*in); 1311 in->sin_family = pf; 1312 in->sin_port = inp->inp_fport; 1313 in->sin_addr = inp->inp_faddr; 1314 } 1315 break; 1316 #endif 1317 #ifdef INET6 1318 case PF_INET6: 1319 pcb.ki_family = in6p->in6p_socket->so_proto-> 1320 pr_domain->dom_family; 1321 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1322 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1323 pr_protocol; 1324 pcb.ki_pflags = in6p->in6p_flags; 1325 1326 pcb.ki_sostate = in6p->in6p_socket->so_state; 1327 pcb.ki_prstate = in6p->in6p_state; 1328 if (proto == IPPROTO_TCP) { 1329 tp = in6totcpcb(in6p); 1330 pcb.ki_tstate = tp->t_state; 1331 pcb.ki_tflags = tp->t_flags; 1332 } 1333 1334 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1335 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1336 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1337 1338 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1339 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1340 1341 in6 = satosin6(&pcb.ki_src); 1342 in6->sin6_len = sizeof(*in6); 1343 in6->sin6_family = pf; 1344 in6->sin6_port = in6p->in6p_lport; 1345 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1346 in6->sin6_addr = in6p->in6p_laddr; 1347 in6->sin6_scope_id = 0; /* XXX? */ 1348 1349 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1350 in6 = satosin6(&pcb.ki_dst); 1351 in6->sin6_len = sizeof(*in6); 1352 in6->sin6_family = pf; 1353 in6->sin6_port = in6p->in6p_fport; 1354 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1355 in6->sin6_addr = in6p->in6p_faddr; 1356 in6->sin6_scope_id = 0; /* XXX? */ 1357 } 1358 break; 1359 #endif 1360 } 1361 1362 if (len >= elem_size && elem_count > 0) { 1363 error = copyout(&pcb, dp, out_size); 1364 if (error) 1365 return (error); 1366 dp += elem_size; 1367 len -= elem_size; 1368 } 1369 if (elem_count > 0) { 1370 needed += elem_size; 1371 if (elem_count != INT_MAX) 1372 elem_count--; 1373 } 1374 } 1375 1376 *oldlenp = needed; 1377 if (oldp == NULL) 1378 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1379 1380 return (error); 1381 } 1382 1383 /* 1384 * this (second stage) setup routine is a replacement for tcp_sysctl() 1385 * (which is currently used for ipv4 and ipv6) 1386 */ 1387 static void 1388 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1389 const char *tcpname) 1390 { 1391 int ecn_node; 1392 const struct sysctlnode *sack_node, *node; 1393 #ifdef TCP_DEBUG 1394 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1395 extern int tcp_debx; 1396 #endif 1397 1398 sysctl_createv(clog, 0, NULL, NULL, 1399 CTLFLAG_PERMANENT, 1400 CTLTYPE_NODE, "net", NULL, 1401 NULL, 0, NULL, 0, 1402 CTL_NET, CTL_EOL); 1403 sysctl_createv(clog, 0, NULL, NULL, 1404 CTLFLAG_PERMANENT, 1405 CTLTYPE_NODE, pfname, NULL, 1406 NULL, 0, NULL, 0, 1407 CTL_NET, pf, CTL_EOL); 1408 sysctl_createv(clog, 0, NULL, NULL, 1409 CTLFLAG_PERMANENT, 1410 CTLTYPE_NODE, tcpname, 1411 SYSCTL_DESCR("TCP related settings"), 1412 NULL, 0, NULL, 0, 1413 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1414 1415 sysctl_createv(clog, 0, NULL, NULL, 1416 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1417 CTLTYPE_INT, "rfc1323", 1418 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1419 NULL, 0, &tcp_do_rfc1323, 0, 1420 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1421 sysctl_createv(clog, 0, NULL, NULL, 1422 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1423 CTLTYPE_INT, "sendspace", 1424 SYSCTL_DESCR("Default TCP send buffer size"), 1425 NULL, 0, &tcp_sendspace, 0, 1426 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1427 sysctl_createv(clog, 0, NULL, NULL, 1428 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1429 CTLTYPE_INT, "recvspace", 1430 SYSCTL_DESCR("Default TCP receive buffer size"), 1431 NULL, 0, &tcp_recvspace, 0, 1432 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1433 sysctl_createv(clog, 0, NULL, NULL, 1434 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1435 CTLTYPE_INT, "mssdflt", 1436 SYSCTL_DESCR("Default maximum segment size"), 1437 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1438 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1439 sysctl_createv(clog, 0, NULL, NULL, 1440 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1441 CTLTYPE_INT, "syn_cache_limit", 1442 SYSCTL_DESCR("Maximum number of entries in the TCP " 1443 "compressed state engine"), 1444 NULL, 0, &tcp_syn_cache_limit, 0, 1445 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1446 CTL_EOL); 1447 sysctl_createv(clog, 0, NULL, NULL, 1448 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1449 CTLTYPE_INT, "syn_bucket_limit", 1450 SYSCTL_DESCR("Maximum number of entries per hash " 1451 "bucket in the TCP compressed state " 1452 "engine"), 1453 NULL, 0, &tcp_syn_bucket_limit, 0, 1454 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1455 CTL_EOL); 1456 #if 0 /* obsoleted */ 1457 sysctl_createv(clog, 0, NULL, NULL, 1458 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1459 CTLTYPE_INT, "syn_cache_interval", 1460 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1461 NULL, 0, &tcp_syn_cache_interval, 0, 1462 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1463 CTL_EOL); 1464 #endif 1465 sysctl_createv(clog, 0, NULL, NULL, 1466 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1467 CTLTYPE_INT, "init_win", 1468 SYSCTL_DESCR("Initial TCP congestion window"), 1469 NULL, 0, &tcp_init_win, 0, 1470 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1471 sysctl_createv(clog, 0, NULL, NULL, 1472 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1473 CTLTYPE_INT, "mss_ifmtu", 1474 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1475 NULL, 0, &tcp_mss_ifmtu, 0, 1476 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1477 sysctl_createv(clog, 0, NULL, &sack_node, 1478 CTLFLAG_PERMANENT, 1479 CTLTYPE_NODE, "sack", 1480 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1481 NULL, 0, NULL, 0, 1482 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1483 sysctl_createv(clog, 0, NULL, &node, 1484 CTLFLAG_PERMANENT, 1485 CTLTYPE_NODE, "ecn", 1486 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 1487 NULL, 0, NULL, 0, 1488 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1489 ecn_node = node->sysctl_num; 1490 sysctl_createv(clog, 0, NULL, NULL, 1491 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1492 CTLTYPE_INT, "win_scale", 1493 SYSCTL_DESCR("Use RFC1323 window scale options"), 1494 NULL, 0, &tcp_do_win_scale, 0, 1495 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1496 sysctl_createv(clog, 0, NULL, NULL, 1497 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1498 CTLTYPE_INT, "timestamps", 1499 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1500 NULL, 0, &tcp_do_timestamps, 0, 1501 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1502 sysctl_createv(clog, 0, NULL, NULL, 1503 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1504 CTLTYPE_INT, "compat_42", 1505 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1506 NULL, 0, &tcp_compat_42, 0, 1507 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1508 sysctl_createv(clog, 0, NULL, NULL, 1509 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1510 CTLTYPE_INT, "cwm", 1511 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1512 "Monitoring"), 1513 NULL, 0, &tcp_cwm, 0, 1514 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1515 sysctl_createv(clog, 0, NULL, NULL, 1516 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1517 CTLTYPE_INT, "cwm_burstsize", 1518 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1519 "burst count in packets"), 1520 NULL, 0, &tcp_cwm_burstsize, 0, 1521 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1522 CTL_EOL); 1523 sysctl_createv(clog, 0, NULL, NULL, 1524 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1525 CTLTYPE_INT, "ack_on_push", 1526 SYSCTL_DESCR("Immediately return ACK when PSH is " 1527 "received"), 1528 NULL, 0, &tcp_ack_on_push, 0, 1529 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1530 sysctl_createv(clog, 0, NULL, NULL, 1531 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1532 CTLTYPE_INT, "keepidle", 1533 SYSCTL_DESCR("Allowed connection idle ticks before a " 1534 "keepalive probe is sent"), 1535 NULL, 0, &tcp_keepidle, 0, 1536 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1537 sysctl_createv(clog, 0, NULL, NULL, 1538 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1539 CTLTYPE_INT, "keepintvl", 1540 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1541 NULL, 0, &tcp_keepintvl, 0, 1542 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1543 sysctl_createv(clog, 0, NULL, NULL, 1544 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1545 CTLTYPE_INT, "keepcnt", 1546 SYSCTL_DESCR("Number of keepalive probes to send"), 1547 NULL, 0, &tcp_keepcnt, 0, 1548 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1549 sysctl_createv(clog, 0, NULL, NULL, 1550 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1551 CTLTYPE_INT, "slowhz", 1552 SYSCTL_DESCR("Keepalive ticks per second"), 1553 NULL, PR_SLOWHZ, NULL, 0, 1554 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1555 sysctl_createv(clog, 0, NULL, NULL, 1556 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1557 CTLTYPE_INT, "newreno", 1558 SYSCTL_DESCR("NewReno congestion control algorithm"), 1559 NULL, 0, &tcp_do_newreno, 0, 1560 CTL_NET, pf, IPPROTO_TCP, TCPCTL_NEWRENO, CTL_EOL); 1561 sysctl_createv(clog, 0, NULL, NULL, 1562 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1563 CTLTYPE_INT, "log_refused", 1564 SYSCTL_DESCR("Log refused TCP connections"), 1565 NULL, 0, &tcp_log_refused, 0, 1566 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1567 #if 0 /* obsoleted */ 1568 sysctl_createv(clog, 0, NULL, NULL, 1569 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1570 CTLTYPE_INT, "rstratelimit", NULL, 1571 NULL, 0, &tcp_rst_ratelim, 0, 1572 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1573 #endif 1574 sysctl_createv(clog, 0, NULL, NULL, 1575 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1576 CTLTYPE_INT, "rstppslimit", 1577 SYSCTL_DESCR("Maximum number of RST packets to send " 1578 "per second"), 1579 NULL, 0, &tcp_rst_ppslim, 0, 1580 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1581 sysctl_createv(clog, 0, NULL, NULL, 1582 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1583 CTLTYPE_INT, "delack_ticks", 1584 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1585 NULL, 0, &tcp_delack_ticks, 0, 1586 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1587 sysctl_createv(clog, 0, NULL, NULL, 1588 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1589 CTLTYPE_INT, "init_win_local", 1590 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1591 NULL, 0, &tcp_init_win_local, 0, 1592 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1593 CTL_EOL); 1594 sysctl_createv(clog, 0, NULL, NULL, 1595 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1596 CTLTYPE_STRUCT, "ident", 1597 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1598 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1599 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1600 sysctl_createv(clog, 0, NULL, NULL, 1601 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1602 CTLTYPE_INT, "do_loopback_cksum", 1603 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1604 NULL, 0, &tcp_do_loopback_cksum, 0, 1605 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1606 CTL_EOL); 1607 sysctl_createv(clog, 0, NULL, NULL, 1608 CTLFLAG_PERMANENT, 1609 CTLTYPE_STRUCT, "pcblist", 1610 SYSCTL_DESCR("TCP protocol control block list"), 1611 sysctl_inpcblist, 0, &tcbtable, 0, 1612 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1613 CTL_EOL); 1614 1615 sysctl_createv(clog, 0, NULL, NULL, 1616 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1617 CTLTYPE_INT, "enable", 1618 SYSCTL_DESCR("Enable TCP Explicit Congestion " 1619 "Notification"), 1620 NULL, 0, &tcp_do_ecn, 0, 1621 CTL_NET, pf, IPPROTO_TCP, ecn_node, 1622 CTL_CREATE, CTL_EOL); 1623 sysctl_createv(clog, 0, NULL, NULL, 1624 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1625 CTLTYPE_INT, "maxretries", 1626 SYSCTL_DESCR("Number of times to retry ECN setup " 1627 "before disabling ECN on the connection"), 1628 NULL, 0, &tcp_ecn_maxretries, 0, 1629 CTL_NET, pf, IPPROTO_TCP, ecn_node, 1630 CTL_CREATE, CTL_EOL); 1631 1632 /* SACK gets it's own little subtree. */ 1633 sysctl_createv(clog, 0, NULL, &sack_node, 1634 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1635 CTLTYPE_INT, "enable", 1636 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 1637 NULL, 0, &tcp_do_sack, 0, 1638 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1639 sysctl_createv(clog, 0, NULL, &sack_node, 1640 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1641 CTLTYPE_INT, "maxholes", 1642 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 1643 NULL, 0, &tcp_sack_tp_maxholes, 0, 1644 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1645 sysctl_createv(clog, 0, NULL, &sack_node, 1646 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1647 CTLTYPE_INT, "globalmaxholes", 1648 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 1649 NULL, 0, &tcp_sack_globalmaxholes, 0, 1650 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1651 sysctl_createv(clog, 0, NULL, &sack_node, 1652 CTLFLAG_PERMANENT, 1653 CTLTYPE_INT, "globalholes", 1654 SYSCTL_DESCR("Global number of TCP SACK holes"), 1655 NULL, 0, &tcp_sack_globalholes, 0, 1656 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1657 1658 sysctl_createv(clog, 0, NULL, NULL, 1659 CTLFLAG_PERMANENT, 1660 CTLTYPE_STRUCT, "stats", 1661 SYSCTL_DESCR("TCP statistics"), 1662 NULL, 0, &tcpstat, sizeof(tcpstat), 1663 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 1664 CTL_EOL); 1665 #ifdef TCP_DEBUG 1666 sysctl_createv(clog, 0, NULL, NULL, 1667 CTLFLAG_PERMANENT, 1668 CTLTYPE_STRUCT, "debug", 1669 SYSCTL_DESCR("TCP sockets debug information"), 1670 NULL, 0, &tcp_debug, sizeof(tcp_debug), 1671 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 1672 CTL_EOL); 1673 sysctl_createv(clog, 0, NULL, NULL, 1674 CTLFLAG_PERMANENT, 1675 CTLTYPE_INT, "debx", 1676 SYSCTL_DESCR("Number of TCP debug sockets messages"), 1677 NULL, 0, &tcp_debx, sizeof(tcp_debx), 1678 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 1679 CTL_EOL); 1680 #endif 1681 1682 } 1683 1684 /* 1685 * Sysctl for tcp variables. 1686 */ 1687 #ifdef INET 1688 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup") 1689 { 1690 1691 sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp"); 1692 } 1693 #endif /* INET */ 1694 1695 #ifdef INET6 1696 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup") 1697 { 1698 1699 sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6"); 1700 } 1701 #endif /* INET6 */ 1702