1 /* $NetBSD: tcp_usrreq.c,v 1.123 2006/10/05 17:35:19 tls Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the NetBSD 55 * Foundation, Inc. and its contributors. 56 * 4. Neither the name of The NetBSD Foundation nor the names of its 57 * contributors may be used to endorse or promote products derived 58 * from this software without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70 * POSSIBILITY OF SUCH DAMAGE. 71 */ 72 73 /* 74 * Copyright (c) 1982, 1986, 1988, 1993, 1995 75 * The Regents of the University of California. All rights reserved. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.123 2006/10/05 17:35:19 tls Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 #include "opt_mbuftrace.h" 111 112 #include <sys/param.h> 113 #include <sys/systm.h> 114 #include <sys/kernel.h> 115 #include <sys/malloc.h> 116 #include <sys/mbuf.h> 117 #include <sys/socket.h> 118 #include <sys/socketvar.h> 119 #include <sys/protosw.h> 120 #include <sys/errno.h> 121 #include <sys/stat.h> 122 #include <sys/proc.h> 123 #include <sys/domain.h> 124 #include <sys/sysctl.h> 125 #include <sys/kauth.h> 126 127 #include <net/if.h> 128 #include <net/route.h> 129 130 #include <netinet/in.h> 131 #include <netinet/in_systm.h> 132 #include <netinet/in_var.h> 133 #include <netinet/ip.h> 134 #include <netinet/in_pcb.h> 135 #include <netinet/ip_var.h> 136 #include <netinet/in_offload.h> 137 138 #ifdef INET6 139 #ifndef INET 140 #include <netinet/in.h> 141 #endif 142 #include <netinet/ip6.h> 143 #include <netinet6/in6_pcb.h> 144 #include <netinet6/ip6_var.h> 145 #endif 146 147 #include <netinet/tcp.h> 148 #include <netinet/tcp_fsm.h> 149 #include <netinet/tcp_seq.h> 150 #include <netinet/tcp_timer.h> 151 #include <netinet/tcp_var.h> 152 #include <netinet/tcpip.h> 153 #include <netinet/tcp_debug.h> 154 155 #include "opt_tcp_space.h" 156 157 #ifdef IPSEC 158 #include <netinet6/ipsec.h> 159 #endif /*IPSEC*/ 160 161 /* 162 * TCP protocol interface to socket abstraction. 163 */ 164 165 /* 166 * Process a TCP user request for TCP tb. If this is a send request 167 * then m is the mbuf chain of send data. If this is a timer expiration 168 * (called from the software clock routine), then timertype tells which timer. 169 */ 170 /*ARGSUSED*/ 171 int 172 tcp_usrreq(struct socket *so, int req, 173 struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) 174 { 175 struct inpcb *inp; 176 #ifdef INET6 177 struct in6pcb *in6p; 178 #endif 179 struct tcpcb *tp = NULL; 180 int s; 181 int error = 0; 182 #ifdef TCP_DEBUG 183 int ostate = 0; 184 #endif 185 int family; /* family of the socket */ 186 187 family = so->so_proto->pr_domain->dom_family; 188 189 if (req == PRU_CONTROL) { 190 switch (family) { 191 #ifdef INET 192 case PF_INET: 193 return (in_control(so, (long)m, (caddr_t)nam, 194 (struct ifnet *)control, l)); 195 #endif 196 #ifdef INET6 197 case PF_INET6: 198 return (in6_control(so, (long)m, (caddr_t)nam, 199 (struct ifnet *)control, l)); 200 #endif 201 default: 202 return EAFNOSUPPORT; 203 } 204 } 205 206 s = splsoftnet(); 207 208 if (req == PRU_PURGEIF) { 209 switch (family) { 210 #ifdef INET 211 case PF_INET: 212 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 213 in_purgeif((struct ifnet *)control); 214 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 215 break; 216 #endif 217 #ifdef INET6 218 case PF_INET6: 219 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 220 in6_purgeif((struct ifnet *)control); 221 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 222 break; 223 #endif 224 default: 225 splx(s); 226 return (EAFNOSUPPORT); 227 } 228 splx(s); 229 return (0); 230 } 231 232 switch (family) { 233 #ifdef INET 234 case PF_INET: 235 inp = sotoinpcb(so); 236 #ifdef INET6 237 in6p = NULL; 238 #endif 239 break; 240 #endif 241 #ifdef INET6 242 case PF_INET6: 243 inp = NULL; 244 in6p = sotoin6pcb(so); 245 break; 246 #endif 247 default: 248 splx(s); 249 return EAFNOSUPPORT; 250 } 251 252 #ifdef DIAGNOSTIC 253 #ifdef INET6 254 if (inp && in6p) 255 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 256 #endif 257 if (req != PRU_SEND && req != PRU_SENDOOB && control) 258 panic("tcp_usrreq: unexpected control mbuf"); 259 #endif 260 /* 261 * When a TCP is attached to a socket, then there will be 262 * a (struct inpcb) pointed at by the socket, and this 263 * structure will point at a subsidary (struct tcpcb). 264 */ 265 #ifndef INET6 266 if (inp == 0 && req != PRU_ATTACH) 267 #else 268 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 269 #endif 270 { 271 error = EINVAL; 272 goto release; 273 } 274 #ifdef INET 275 if (inp) { 276 tp = intotcpcb(inp); 277 /* WHAT IF TP IS 0? */ 278 #ifdef KPROF 279 tcp_acounts[tp->t_state][req]++; 280 #endif 281 #ifdef TCP_DEBUG 282 ostate = tp->t_state; 283 #endif 284 } 285 #endif 286 #ifdef INET6 287 if (in6p) { 288 tp = in6totcpcb(in6p); 289 /* WHAT IF TP IS 0? */ 290 #ifdef KPROF 291 tcp_acounts[tp->t_state][req]++; 292 #endif 293 #ifdef TCP_DEBUG 294 ostate = tp->t_state; 295 #endif 296 } 297 #endif 298 299 switch (req) { 300 301 /* 302 * TCP attaches to socket via PRU_ATTACH, reserving space, 303 * and an internet control block. 304 */ 305 case PRU_ATTACH: 306 #ifndef INET6 307 if (inp != 0) 308 #else 309 if (inp != 0 || in6p != 0) 310 #endif 311 { 312 error = EISCONN; 313 break; 314 } 315 error = tcp_attach(so); 316 if (error) 317 break; 318 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 319 so->so_linger = TCP_LINGERTIME; 320 tp = sototcpcb(so); 321 break; 322 323 /* 324 * PRU_DETACH detaches the TCP protocol from the socket. 325 */ 326 case PRU_DETACH: 327 tp = tcp_disconnect(tp); 328 break; 329 330 /* 331 * Give the socket an address. 332 */ 333 case PRU_BIND: 334 switch (family) { 335 #ifdef INET 336 case PF_INET: 337 error = in_pcbbind(inp, nam, l); 338 break; 339 #endif 340 #ifdef INET6 341 case PF_INET6: 342 error = in6_pcbbind(in6p, nam, l); 343 if (!error) { 344 /* mapped addr case */ 345 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 346 tp->t_family = AF_INET; 347 else 348 tp->t_family = AF_INET6; 349 } 350 break; 351 #endif 352 } 353 break; 354 355 /* 356 * Prepare to accept connections. 357 */ 358 case PRU_LISTEN: 359 #ifdef INET 360 if (inp && inp->inp_lport == 0) { 361 error = in_pcbbind(inp, (struct mbuf *)0, 362 (struct lwp *)0); 363 if (error) 364 break; 365 } 366 #endif 367 #ifdef INET6 368 if (in6p && in6p->in6p_lport == 0) { 369 error = in6_pcbbind(in6p, (struct mbuf *)0, 370 (struct lwp *)0); 371 if (error) 372 break; 373 } 374 #endif 375 tp->t_state = TCPS_LISTEN; 376 break; 377 378 /* 379 * Initiate connection to peer. 380 * Create a template for use in transmissions on this connection. 381 * Enter SYN_SENT state, and mark socket as connecting. 382 * Start keep-alive timer, and seed output sequence space. 383 * Send initial segment on connection. 384 */ 385 case PRU_CONNECT: 386 #ifdef INET 387 if (inp) { 388 if (inp->inp_lport == 0) { 389 error = in_pcbbind(inp, (struct mbuf *)0, 390 (struct lwp *)0); 391 if (error) 392 break; 393 } 394 error = in_pcbconnect(inp, nam, l); 395 } 396 #endif 397 #ifdef INET6 398 if (in6p) { 399 if (in6p->in6p_lport == 0) { 400 error = in6_pcbbind(in6p, (struct mbuf *)0, 401 (struct lwp *)0); 402 if (error) 403 break; 404 } 405 error = in6_pcbconnect(in6p, nam, l); 406 if (!error) { 407 /* mapped addr case */ 408 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 409 tp->t_family = AF_INET; 410 else 411 tp->t_family = AF_INET6; 412 } 413 } 414 #endif 415 if (error) 416 break; 417 tp->t_template = tcp_template(tp); 418 if (tp->t_template == 0) { 419 #ifdef INET 420 if (inp) 421 in_pcbdisconnect(inp); 422 #endif 423 #ifdef INET6 424 if (in6p) 425 in6_pcbdisconnect(in6p); 426 #endif 427 error = ENOBUFS; 428 break; 429 } 430 /* Compute window scaling to request. */ 431 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 432 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 433 tp->request_r_scale++; 434 soisconnecting(so); 435 tcpstat.tcps_connattempt++; 436 tp->t_state = TCPS_SYN_SENT; 437 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 438 tp->iss = tcp_new_iss(tp, 0); 439 tcp_sendseqinit(tp); 440 error = tcp_output(tp); 441 break; 442 443 /* 444 * Create a TCP connection between two sockets. 445 */ 446 case PRU_CONNECT2: 447 error = EOPNOTSUPP; 448 break; 449 450 /* 451 * Initiate disconnect from peer. 452 * If connection never passed embryonic stage, just drop; 453 * else if don't need to let data drain, then can just drop anyways, 454 * else have to begin TCP shutdown process: mark socket disconnecting, 455 * drain unread data, state switch to reflect user close, and 456 * send segment (e.g. FIN) to peer. Socket will be really disconnected 457 * when peer sends FIN and acks ours. 458 * 459 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 460 */ 461 case PRU_DISCONNECT: 462 tp = tcp_disconnect(tp); 463 break; 464 465 /* 466 * Accept a connection. Essentially all the work is 467 * done at higher levels; just return the address 468 * of the peer, storing through addr. 469 */ 470 case PRU_ACCEPT: 471 #ifdef INET 472 if (inp) 473 in_setpeeraddr(inp, nam); 474 #endif 475 #ifdef INET6 476 if (in6p) 477 in6_setpeeraddr(in6p, nam); 478 #endif 479 break; 480 481 /* 482 * Mark the connection as being incapable of further output. 483 */ 484 case PRU_SHUTDOWN: 485 socantsendmore(so); 486 tp = tcp_usrclosed(tp); 487 if (tp) 488 error = tcp_output(tp); 489 break; 490 491 /* 492 * After a receive, possibly send window update to peer. 493 */ 494 case PRU_RCVD: 495 /* 496 * soreceive() calls this function when a user receives 497 * ancillary data on a listening socket. We don't call 498 * tcp_output in such a case, since there is no header 499 * template for a listening socket and hence the kernel 500 * will panic. 501 */ 502 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 503 (void) tcp_output(tp); 504 break; 505 506 /* 507 * Do a send by putting data in output queue and updating urgent 508 * marker if URG set. Possibly send more data. 509 */ 510 case PRU_SEND: 511 if (control && control->m_len) { 512 m_freem(control); 513 m_freem(m); 514 error = EINVAL; 515 break; 516 } 517 sbappendstream(&so->so_snd, m); 518 error = tcp_output(tp); 519 break; 520 521 /* 522 * Abort the TCP. 523 */ 524 case PRU_ABORT: 525 tp = tcp_drop(tp, ECONNABORTED); 526 break; 527 528 case PRU_SENSE: 529 /* 530 * stat: don't bother with a blocksize. 531 */ 532 splx(s); 533 return (0); 534 535 case PRU_RCVOOB: 536 if (control && control->m_len) { 537 m_freem(control); 538 m_freem(m); 539 error = EINVAL; 540 break; 541 } 542 if ((so->so_oobmark == 0 && 543 (so->so_state & SS_RCVATMARK) == 0) || 544 so->so_options & SO_OOBINLINE || 545 tp->t_oobflags & TCPOOB_HADDATA) { 546 error = EINVAL; 547 break; 548 } 549 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 550 error = EWOULDBLOCK; 551 break; 552 } 553 m->m_len = 1; 554 *mtod(m, caddr_t) = tp->t_iobc; 555 if (((long)nam & MSG_PEEK) == 0) 556 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 557 break; 558 559 case PRU_SENDOOB: 560 if (sbspace(&so->so_snd) < -512) { 561 m_freem(m); 562 error = ENOBUFS; 563 break; 564 } 565 /* 566 * According to RFC961 (Assigned Protocols), 567 * the urgent pointer points to the last octet 568 * of urgent data. We continue, however, 569 * to consider it to indicate the first octet 570 * of data past the urgent section. 571 * Otherwise, snd_up should be one lower. 572 */ 573 sbappendstream(&so->so_snd, m); 574 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 575 tp->t_force = 1; 576 error = tcp_output(tp); 577 tp->t_force = 0; 578 break; 579 580 case PRU_SOCKADDR: 581 #ifdef INET 582 if (inp) 583 in_setsockaddr(inp, nam); 584 #endif 585 #ifdef INET6 586 if (in6p) 587 in6_setsockaddr(in6p, nam); 588 #endif 589 break; 590 591 case PRU_PEERADDR: 592 #ifdef INET 593 if (inp) 594 in_setpeeraddr(inp, nam); 595 #endif 596 #ifdef INET6 597 if (in6p) 598 in6_setpeeraddr(in6p, nam); 599 #endif 600 break; 601 602 default: 603 panic("tcp_usrreq"); 604 } 605 #ifdef TCP_DEBUG 606 if (tp && (so->so_options & SO_DEBUG)) 607 tcp_trace(TA_USER, ostate, tp, NULL, req); 608 #endif 609 610 release: 611 splx(s); 612 return (error); 613 } 614 615 int 616 tcp_ctloutput(int op, struct socket *so, int level, int optname, 617 struct mbuf **mp) 618 { 619 int error = 0, s; 620 struct inpcb *inp; 621 #ifdef INET6 622 struct in6pcb *in6p; 623 #endif 624 struct tcpcb *tp; 625 struct mbuf *m; 626 int i; 627 int family; /* family of the socket */ 628 629 family = so->so_proto->pr_domain->dom_family; 630 631 s = splsoftnet(); 632 switch (family) { 633 #ifdef INET 634 case PF_INET: 635 inp = sotoinpcb(so); 636 #ifdef INET6 637 in6p = NULL; 638 #endif 639 break; 640 #endif 641 #ifdef INET6 642 case PF_INET6: 643 inp = NULL; 644 in6p = sotoin6pcb(so); 645 break; 646 #endif 647 default: 648 splx(s); 649 return EAFNOSUPPORT; 650 } 651 #ifndef INET6 652 if (inp == NULL) 653 #else 654 if (inp == NULL && in6p == NULL) 655 #endif 656 { 657 splx(s); 658 if (op == PRCO_SETOPT && *mp) 659 (void) m_free(*mp); 660 return (ECONNRESET); 661 } 662 if (level != IPPROTO_TCP) { 663 switch (family) { 664 #ifdef INET 665 case PF_INET: 666 error = ip_ctloutput(op, so, level, optname, mp); 667 break; 668 #endif 669 #ifdef INET6 670 case PF_INET6: 671 error = ip6_ctloutput(op, so, level, optname, mp); 672 break; 673 #endif 674 } 675 splx(s); 676 return (error); 677 } 678 if (inp) 679 tp = intotcpcb(inp); 680 #ifdef INET6 681 else if (in6p) 682 tp = in6totcpcb(in6p); 683 #endif 684 else 685 tp = NULL; 686 687 switch (op) { 688 689 case PRCO_SETOPT: 690 m = *mp; 691 switch (optname) { 692 693 #ifdef TCP_SIGNATURE 694 case TCP_MD5SIG: 695 if (m == NULL || m->m_len < sizeof (int)) 696 error = EINVAL; 697 if (error) 698 break; 699 if (*mtod(m, int *) > 0) 700 tp->t_flags |= TF_SIGNATURE; 701 else 702 tp->t_flags &= ~TF_SIGNATURE; 703 break; 704 #endif /* TCP_SIGNATURE */ 705 706 case TCP_NODELAY: 707 if (m == NULL || m->m_len < sizeof (int)) 708 error = EINVAL; 709 else if (*mtod(m, int *)) 710 tp->t_flags |= TF_NODELAY; 711 else 712 tp->t_flags &= ~TF_NODELAY; 713 break; 714 715 case TCP_MAXSEG: 716 if (m && (i = *mtod(m, int *)) > 0 && 717 i <= tp->t_peermss) 718 tp->t_peermss = i; /* limit on send size */ 719 else 720 error = EINVAL; 721 break; 722 723 default: 724 error = ENOPROTOOPT; 725 break; 726 } 727 if (m) 728 (void) m_free(m); 729 break; 730 731 case PRCO_GETOPT: 732 *mp = m = m_get(M_WAIT, MT_SOOPTS); 733 m->m_len = sizeof(int); 734 MCLAIM(m, so->so_mowner); 735 736 switch (optname) { 737 #ifdef TCP_SIGNATURE 738 case TCP_MD5SIG: 739 *mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 740 break; 741 #endif 742 case TCP_NODELAY: 743 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 744 break; 745 case TCP_MAXSEG: 746 *mtod(m, int *) = tp->t_peermss; 747 break; 748 default: 749 error = ENOPROTOOPT; 750 break; 751 } 752 break; 753 } 754 splx(s); 755 return (error); 756 } 757 758 #ifndef TCP_SENDSPACE 759 #define TCP_SENDSPACE 1024*32 760 #endif 761 int tcp_sendspace = TCP_SENDSPACE; 762 #ifndef TCP_RECVSPACE 763 #define TCP_RECVSPACE 1024*32 764 #endif 765 int tcp_recvspace = TCP_RECVSPACE; 766 767 /* 768 * Attach TCP protocol to socket, allocating 769 * internet protocol control block, tcp control block, 770 * bufer space, and entering LISTEN state if to accept connections. 771 */ 772 int 773 tcp_attach(struct socket *so) 774 { 775 struct tcpcb *tp; 776 struct inpcb *inp; 777 #ifdef INET6 778 struct in6pcb *in6p; 779 #endif 780 int error; 781 int family; /* family of the socket */ 782 783 family = so->so_proto->pr_domain->dom_family; 784 785 #ifdef MBUFTRACE 786 so->so_mowner = &tcp_mowner; 787 so->so_rcv.sb_mowner = &tcp_rx_mowner; 788 so->so_snd.sb_mowner = &tcp_tx_mowner; 789 #endif 790 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 791 error = soreserve(so, tcp_sendspace, tcp_recvspace); 792 if (error) 793 return (error); 794 } 795 switch (family) { 796 #ifdef INET 797 case PF_INET: 798 error = in_pcballoc(so, &tcbtable); 799 if (error) 800 return (error); 801 inp = sotoinpcb(so); 802 #ifdef INET6 803 in6p = NULL; 804 #endif 805 break; 806 #endif 807 #ifdef INET6 808 case PF_INET6: 809 error = in6_pcballoc(so, &tcbtable); 810 if (error) 811 return (error); 812 inp = NULL; 813 in6p = sotoin6pcb(so); 814 break; 815 #endif 816 default: 817 return EAFNOSUPPORT; 818 } 819 if (inp) 820 tp = tcp_newtcpcb(family, (void *)inp); 821 #ifdef INET6 822 else if (in6p) 823 tp = tcp_newtcpcb(family, (void *)in6p); 824 #endif 825 else 826 tp = NULL; 827 828 if (tp == 0) { 829 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 830 831 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 832 #ifdef INET 833 if (inp) 834 in_pcbdetach(inp); 835 #endif 836 #ifdef INET6 837 if (in6p) 838 in6_pcbdetach(in6p); 839 #endif 840 so->so_state |= nofd; 841 return (ENOBUFS); 842 } 843 tp->t_state = TCPS_CLOSED; 844 return (0); 845 } 846 847 /* 848 * Initiate (or continue) disconnect. 849 * If embryonic state, just send reset (once). 850 * If in ``let data drain'' option and linger null, just drop. 851 * Otherwise (hard), mark socket disconnecting and drop 852 * current input data; switch states based on user close, and 853 * send segment to peer (with FIN). 854 */ 855 struct tcpcb * 856 tcp_disconnect(struct tcpcb *tp) 857 { 858 struct socket *so; 859 860 if (tp->t_inpcb) 861 so = tp->t_inpcb->inp_socket; 862 #ifdef INET6 863 else if (tp->t_in6pcb) 864 so = tp->t_in6pcb->in6p_socket; 865 #endif 866 else 867 so = NULL; 868 869 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 870 tp = tcp_close(tp); 871 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 872 tp = tcp_drop(tp, 0); 873 else { 874 soisdisconnecting(so); 875 sbflush(&so->so_rcv); 876 tp = tcp_usrclosed(tp); 877 if (tp) 878 (void) tcp_output(tp); 879 } 880 return (tp); 881 } 882 883 /* 884 * User issued close, and wish to trail through shutdown states: 885 * if never received SYN, just forget it. If got a SYN from peer, 886 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 887 * If already got a FIN from peer, then almost done; go to LAST_ACK 888 * state. In all other cases, have already sent FIN to peer (e.g. 889 * after PRU_SHUTDOWN), and just have to play tedious game waiting 890 * for peer to send FIN or not respond to keep-alives, etc. 891 * We can let the user exit from the close as soon as the FIN is acked. 892 */ 893 struct tcpcb * 894 tcp_usrclosed(struct tcpcb *tp) 895 { 896 897 switch (tp->t_state) { 898 899 case TCPS_CLOSED: 900 case TCPS_LISTEN: 901 case TCPS_SYN_SENT: 902 tp->t_state = TCPS_CLOSED; 903 tp = tcp_close(tp); 904 break; 905 906 case TCPS_SYN_RECEIVED: 907 case TCPS_ESTABLISHED: 908 tp->t_state = TCPS_FIN_WAIT_1; 909 break; 910 911 case TCPS_CLOSE_WAIT: 912 tp->t_state = TCPS_LAST_ACK; 913 break; 914 } 915 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 916 struct socket *so; 917 if (tp->t_inpcb) 918 so = tp->t_inpcb->inp_socket; 919 #ifdef INET6 920 else if (tp->t_in6pcb) 921 so = tp->t_in6pcb->in6p_socket; 922 #endif 923 else 924 so = NULL; 925 if (so) 926 soisdisconnected(so); 927 /* 928 * If we are in FIN_WAIT_2, we arrived here because the 929 * application did a shutdown of the send side. Like the 930 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 931 * a full close, we start a timer to make sure sockets are 932 * not left in FIN_WAIT_2 forever. 933 */ 934 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 935 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 936 } 937 return (tp); 938 } 939 940 /* 941 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 942 * than 32. 943 */ 944 static int 945 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 946 { 947 int error, mssdflt; 948 struct sysctlnode node; 949 950 mssdflt = tcp_mssdflt; 951 node = *rnode; 952 node.sysctl_data = &mssdflt; 953 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 954 if (error || newp == NULL) 955 return (error); 956 957 if (mssdflt < 32) 958 return (EINVAL); 959 tcp_mssdflt = mssdflt; 960 961 return (0); 962 } 963 964 /* 965 * sysctl helper routine for setting port related values under 966 * net.inet.ip and net.inet6.ip6. does basic range checking and does 967 * additional checks for each type. this code has placed in 968 * tcp_input.c since INET and INET6 both use the same tcp code. 969 * 970 * this helper is not static so that both inet and inet6 can use it. 971 */ 972 int 973 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 974 { 975 int error, tmp; 976 int apmin, apmax; 977 #ifndef IPNOPRIVPORTS 978 int lpmin, lpmax; 979 #endif /* IPNOPRIVPORTS */ 980 struct sysctlnode node; 981 982 if (namelen != 0) 983 return (EINVAL); 984 985 switch (name[-3]) { 986 #ifdef INET 987 case PF_INET: 988 apmin = anonportmin; 989 apmax = anonportmax; 990 #ifndef IPNOPRIVPORTS 991 lpmin = lowportmin; 992 lpmax = lowportmax; 993 #endif /* IPNOPRIVPORTS */ 994 break; 995 #endif /* INET */ 996 #ifdef INET6 997 case PF_INET6: 998 apmin = ip6_anonportmin; 999 apmax = ip6_anonportmax; 1000 #ifndef IPNOPRIVPORTS 1001 lpmin = ip6_lowportmin; 1002 lpmax = ip6_lowportmax; 1003 #endif /* IPNOPRIVPORTS */ 1004 break; 1005 #endif /* INET6 */ 1006 default: 1007 return (EINVAL); 1008 } 1009 1010 /* 1011 * insert temporary copy into node, perform lookup on 1012 * temporary, then restore pointer 1013 */ 1014 node = *rnode; 1015 tmp = *(int*)rnode->sysctl_data; 1016 node.sysctl_data = &tmp; 1017 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1018 if (error || newp == NULL) 1019 return (error); 1020 1021 /* 1022 * simple port range check 1023 */ 1024 if (tmp < 0 || tmp > 65535) 1025 return (EINVAL); 1026 1027 /* 1028 * per-node range checks 1029 */ 1030 switch (rnode->sysctl_num) { 1031 case IPCTL_ANONPORTMIN: 1032 if (tmp >= apmax) 1033 return (EINVAL); 1034 #ifndef IPNOPRIVPORTS 1035 if (tmp < IPPORT_RESERVED) 1036 return (EINVAL); 1037 #endif /* IPNOPRIVPORTS */ 1038 break; 1039 1040 case IPCTL_ANONPORTMAX: 1041 if (apmin >= tmp) 1042 return (EINVAL); 1043 #ifndef IPNOPRIVPORTS 1044 if (tmp < IPPORT_RESERVED) 1045 return (EINVAL); 1046 #endif /* IPNOPRIVPORTS */ 1047 break; 1048 1049 #ifndef IPNOPRIVPORTS 1050 case IPCTL_LOWPORTMIN: 1051 if (tmp >= lpmax || 1052 tmp > IPPORT_RESERVEDMAX || 1053 tmp < IPPORT_RESERVEDMIN) 1054 return (EINVAL); 1055 break; 1056 1057 case IPCTL_LOWPORTMAX: 1058 if (lpmin >= tmp || 1059 tmp > IPPORT_RESERVEDMAX || 1060 tmp < IPPORT_RESERVEDMIN) 1061 return (EINVAL); 1062 break; 1063 #endif /* IPNOPRIVPORTS */ 1064 1065 default: 1066 return (EINVAL); 1067 } 1068 1069 *(int*)rnode->sysctl_data = tmp; 1070 1071 return (0); 1072 } 1073 1074 /* 1075 * sysctl helper routine for the net.inet.tcp.ident and 1076 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1077 * old way of looking up the ident information for ipv4 which involves 1078 * stuffing the port/addr pairs into the mib lookup. 1079 */ 1080 static int 1081 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1082 { 1083 #ifdef INET 1084 struct inpcb *inb; 1085 struct sockaddr_in *si4[2]; 1086 #endif /* INET */ 1087 #ifdef INET6 1088 struct in6pcb *in6b; 1089 struct sockaddr_in6 *si6[2]; 1090 #endif /* INET6 */ 1091 struct sockaddr_storage sa[2]; 1092 struct socket *sockp; 1093 size_t sz; 1094 uid_t uid; 1095 int error, pf; 1096 1097 if (namelen != 4 && namelen != 0) 1098 return (EINVAL); 1099 if (name[-2] != IPPROTO_TCP) 1100 return (EINVAL); 1101 pf = name[-3]; 1102 1103 /* old style lookup, ipv4 only */ 1104 if (namelen == 4) { 1105 #ifdef INET 1106 struct in_addr laddr, raddr; 1107 u_int lport, rport; 1108 1109 if (pf != PF_INET) 1110 return (EPROTONOSUPPORT); 1111 raddr.s_addr = (uint32_t)name[0]; 1112 rport = (u_int)name[1]; 1113 laddr.s_addr = (uint32_t)name[2]; 1114 lport = (u_int)name[3]; 1115 inb = in_pcblookup_connect(&tcbtable, raddr, rport, 1116 laddr, lport); 1117 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1118 return (ESRCH); 1119 uid = sockp->so_uidinfo->ui_uid; 1120 if (oldp) { 1121 sz = MIN(sizeof(uid), *oldlenp); 1122 error = copyout(&uid, oldp, sz); 1123 if (error) 1124 return (error); 1125 } 1126 *oldlenp = sizeof(uid); 1127 return (0); 1128 #else /* INET */ 1129 return (EINVAL); 1130 #endif /* INET */ 1131 } 1132 1133 if (newp == NULL || newlen != sizeof(sa)) 1134 return (EINVAL); 1135 error = copyin(newp, &sa, newlen); 1136 if (error) 1137 return (error); 1138 1139 /* 1140 * requested families must match 1141 */ 1142 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1143 return (EINVAL); 1144 1145 switch (pf) { 1146 #ifdef INET 1147 case PF_INET: 1148 si4[0] = (struct sockaddr_in*)&sa[0]; 1149 si4[1] = (struct sockaddr_in*)&sa[1]; 1150 if (si4[0]->sin_len != sizeof(*si4[0]) || 1151 si4[0]->sin_len != si4[1]->sin_len) 1152 return (EINVAL); 1153 inb = in_pcblookup_connect(&tcbtable, 1154 si4[0]->sin_addr, si4[0]->sin_port, 1155 si4[1]->sin_addr, si4[1]->sin_port); 1156 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1157 return (ESRCH); 1158 break; 1159 #endif /* INET */ 1160 #ifdef INET6 1161 case PF_INET6: 1162 si6[0] = (struct sockaddr_in6*)&sa[0]; 1163 si6[1] = (struct sockaddr_in6*)&sa[1]; 1164 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1165 si6[0]->sin6_len != si6[1]->sin6_len) 1166 return (EINVAL); 1167 in6b = in6_pcblookup_connect(&tcbtable, 1168 &si6[0]->sin6_addr, si6[0]->sin6_port, 1169 &si6[1]->sin6_addr, si6[1]->sin6_port, 0); 1170 if (in6b == NULL || (sockp = in6b->in6p_socket) == NULL) 1171 return (ESRCH); 1172 break; 1173 #endif /* INET6 */ 1174 default: 1175 return (EPROTONOSUPPORT); 1176 } 1177 *oldlenp = sizeof(uid); 1178 1179 uid = sockp->so_uidinfo->ui_uid; 1180 if (oldp) { 1181 sz = MIN(sizeof(uid), *oldlenp); 1182 error = copyout(&uid, oldp, sz); 1183 if (error) 1184 return (error); 1185 } 1186 *oldlenp = sizeof(uid); 1187 1188 return (0); 1189 } 1190 1191 /* 1192 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1193 * inet/inet6, as well as raw pcbs for each. specifically not 1194 * declared static so that raw sockets and udp/udp6 can use it as 1195 * well. 1196 */ 1197 int 1198 sysctl_inpcblist(SYSCTLFN_ARGS) 1199 { 1200 #ifdef INET 1201 struct sockaddr_in *in; 1202 const struct inpcb *inp; 1203 #endif 1204 #ifdef INET6 1205 struct sockaddr_in6 *in6; 1206 const struct in6pcb *in6p; 1207 #endif 1208 /* 1209 * sysctl_data is const, but CIRCLEQ_FOREACH can't use a const 1210 * struct inpcbtable pointer, so we have to discard const. :-/ 1211 */ 1212 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1213 const struct inpcb_hdr *inph; 1214 struct tcpcb *tp; 1215 struct kinfo_pcb pcb; 1216 char *dp; 1217 u_int op, arg; 1218 size_t len, needed, elem_size, out_size; 1219 int error, elem_count, pf, proto, pf2; 1220 1221 if (namelen != 4) 1222 return (EINVAL); 1223 1224 if (oldp != NULL) { 1225 len = *oldlenp; 1226 elem_size = name[2]; 1227 elem_count = name[3]; 1228 if (elem_size != sizeof(pcb)) 1229 return EINVAL; 1230 } else { 1231 len = 0; 1232 elem_count = INT_MAX; 1233 elem_size = sizeof(pcb); 1234 } 1235 error = 0; 1236 dp = oldp; 1237 op = name[0]; 1238 arg = name[1]; 1239 out_size = elem_size; 1240 needed = 0; 1241 1242 if (namelen == 1 && name[0] == CTL_QUERY) 1243 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1244 1245 if (name - oname != 4) 1246 return (EINVAL); 1247 1248 pf = oname[1]; 1249 proto = oname[2]; 1250 pf2 = (oldp != NULL) ? pf : 0; 1251 1252 CIRCLEQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1253 #ifdef INET 1254 inp = (const struct inpcb *)inph; 1255 #endif 1256 #ifdef INET6 1257 in6p = (const struct in6pcb *)inph; 1258 #endif 1259 1260 if (inph->inph_af != pf) 1261 continue; 1262 1263 #ifdef notyet 1264 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_CANSEE, 1265 inph->inph_socket->so_cred)) 1266 #endif 1267 if (kauth_cred_geteuid(l->l_cred) != 0 && 1268 kauth_cred_geteuid(l->l_cred) != 1269 inph->inph_socket->so_uidinfo->ui_uid) 1270 continue; 1271 1272 memset(&pcb, 0, sizeof(pcb)); 1273 1274 pcb.ki_family = pf; 1275 pcb.ki_type = proto; 1276 1277 switch (pf2) { 1278 case 0: 1279 /* just probing for size */ 1280 break; 1281 #ifdef INET 1282 case PF_INET: 1283 pcb.ki_family = inp->inp_socket->so_proto-> 1284 pr_domain->dom_family; 1285 pcb.ki_type = inp->inp_socket->so_proto-> 1286 pr_type; 1287 pcb.ki_protocol = inp->inp_socket->so_proto-> 1288 pr_protocol; 1289 pcb.ki_pflags = inp->inp_flags; 1290 1291 pcb.ki_sostate = inp->inp_socket->so_state; 1292 pcb.ki_prstate = inp->inp_state; 1293 if (proto == IPPROTO_TCP) { 1294 tp = intotcpcb(inp); 1295 pcb.ki_tstate = tp->t_state; 1296 pcb.ki_tflags = tp->t_flags; 1297 } 1298 1299 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1300 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1301 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1302 1303 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1304 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1305 1306 in = satosin(&pcb.ki_src); 1307 in->sin_len = sizeof(*in); 1308 in->sin_family = pf; 1309 in->sin_port = inp->inp_lport; 1310 in->sin_addr = inp->inp_laddr; 1311 if (pcb.ki_prstate >= INP_CONNECTED) { 1312 in = satosin(&pcb.ki_dst); 1313 in->sin_len = sizeof(*in); 1314 in->sin_family = pf; 1315 in->sin_port = inp->inp_fport; 1316 in->sin_addr = inp->inp_faddr; 1317 } 1318 break; 1319 #endif 1320 #ifdef INET6 1321 case PF_INET6: 1322 pcb.ki_family = in6p->in6p_socket->so_proto-> 1323 pr_domain->dom_family; 1324 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1325 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1326 pr_protocol; 1327 pcb.ki_pflags = in6p->in6p_flags; 1328 1329 pcb.ki_sostate = in6p->in6p_socket->so_state; 1330 pcb.ki_prstate = in6p->in6p_state; 1331 if (proto == IPPROTO_TCP) { 1332 tp = in6totcpcb(in6p); 1333 pcb.ki_tstate = tp->t_state; 1334 pcb.ki_tflags = tp->t_flags; 1335 } 1336 1337 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1338 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1339 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1340 1341 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1342 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1343 1344 in6 = satosin6(&pcb.ki_src); 1345 in6->sin6_len = sizeof(*in6); 1346 in6->sin6_family = pf; 1347 in6->sin6_port = in6p->in6p_lport; 1348 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1349 in6->sin6_addr = in6p->in6p_laddr; 1350 in6->sin6_scope_id = 0; /* XXX? */ 1351 1352 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1353 in6 = satosin6(&pcb.ki_dst); 1354 in6->sin6_len = sizeof(*in6); 1355 in6->sin6_family = pf; 1356 in6->sin6_port = in6p->in6p_fport; 1357 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1358 in6->sin6_addr = in6p->in6p_faddr; 1359 in6->sin6_scope_id = 0; /* XXX? */ 1360 } 1361 break; 1362 #endif 1363 } 1364 1365 if (len >= elem_size && elem_count > 0) { 1366 error = copyout(&pcb, dp, out_size); 1367 if (error) 1368 return (error); 1369 dp += elem_size; 1370 len -= elem_size; 1371 } 1372 if (elem_count > 0) { 1373 needed += elem_size; 1374 if (elem_count != INT_MAX) 1375 elem_count--; 1376 } 1377 } 1378 1379 *oldlenp = needed; 1380 if (oldp == NULL) 1381 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1382 1383 return (error); 1384 } 1385 1386 /* 1387 * this (second stage) setup routine is a replacement for tcp_sysctl() 1388 * (which is currently used for ipv4 and ipv6) 1389 */ 1390 static void 1391 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1392 const char *tcpname) 1393 { 1394 int ecn_node; 1395 const struct sysctlnode *sack_node, *node; 1396 #ifdef TCP_DEBUG 1397 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1398 extern int tcp_debx; 1399 #endif 1400 1401 sysctl_createv(clog, 0, NULL, NULL, 1402 CTLFLAG_PERMANENT, 1403 CTLTYPE_NODE, "net", NULL, 1404 NULL, 0, NULL, 0, 1405 CTL_NET, CTL_EOL); 1406 sysctl_createv(clog, 0, NULL, NULL, 1407 CTLFLAG_PERMANENT, 1408 CTLTYPE_NODE, pfname, NULL, 1409 NULL, 0, NULL, 0, 1410 CTL_NET, pf, CTL_EOL); 1411 sysctl_createv(clog, 0, NULL, NULL, 1412 CTLFLAG_PERMANENT, 1413 CTLTYPE_NODE, tcpname, 1414 SYSCTL_DESCR("TCP related settings"), 1415 NULL, 0, NULL, 0, 1416 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1417 1418 sysctl_createv(clog, 0, NULL, NULL, 1419 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1420 CTLTYPE_INT, "rfc1323", 1421 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1422 NULL, 0, &tcp_do_rfc1323, 0, 1423 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1424 sysctl_createv(clog, 0, NULL, NULL, 1425 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1426 CTLTYPE_INT, "sendspace", 1427 SYSCTL_DESCR("Default TCP send buffer size"), 1428 NULL, 0, &tcp_sendspace, 0, 1429 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1430 sysctl_createv(clog, 0, NULL, NULL, 1431 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1432 CTLTYPE_INT, "recvspace", 1433 SYSCTL_DESCR("Default TCP receive buffer size"), 1434 NULL, 0, &tcp_recvspace, 0, 1435 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1436 sysctl_createv(clog, 0, NULL, NULL, 1437 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1438 CTLTYPE_INT, "mssdflt", 1439 SYSCTL_DESCR("Default maximum segment size"), 1440 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1441 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1442 sysctl_createv(clog, 0, NULL, NULL, 1443 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1444 CTLTYPE_INT, "syn_cache_limit", 1445 SYSCTL_DESCR("Maximum number of entries in the TCP " 1446 "compressed state engine"), 1447 NULL, 0, &tcp_syn_cache_limit, 0, 1448 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1449 CTL_EOL); 1450 sysctl_createv(clog, 0, NULL, NULL, 1451 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1452 CTLTYPE_INT, "syn_bucket_limit", 1453 SYSCTL_DESCR("Maximum number of entries per hash " 1454 "bucket in the TCP compressed state " 1455 "engine"), 1456 NULL, 0, &tcp_syn_bucket_limit, 0, 1457 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1458 CTL_EOL); 1459 #if 0 /* obsoleted */ 1460 sysctl_createv(clog, 0, NULL, NULL, 1461 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1462 CTLTYPE_INT, "syn_cache_interval", 1463 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1464 NULL, 0, &tcp_syn_cache_interval, 0, 1465 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1466 CTL_EOL); 1467 #endif 1468 sysctl_createv(clog, 0, NULL, NULL, 1469 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1470 CTLTYPE_INT, "init_win", 1471 SYSCTL_DESCR("Initial TCP congestion window"), 1472 NULL, 0, &tcp_init_win, 0, 1473 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1474 sysctl_createv(clog, 0, NULL, NULL, 1475 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1476 CTLTYPE_INT, "mss_ifmtu", 1477 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1478 NULL, 0, &tcp_mss_ifmtu, 0, 1479 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1480 sysctl_createv(clog, 0, NULL, &sack_node, 1481 CTLFLAG_PERMANENT, 1482 CTLTYPE_NODE, "sack", 1483 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1484 NULL, 0, NULL, 0, 1485 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1486 sysctl_createv(clog, 0, NULL, &node, 1487 CTLFLAG_PERMANENT, 1488 CTLTYPE_NODE, "ecn", 1489 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 1490 NULL, 0, NULL, 0, 1491 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1492 ecn_node = node->sysctl_num; 1493 sysctl_createv(clog, 0, NULL, NULL, 1494 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1495 CTLTYPE_INT, "win_scale", 1496 SYSCTL_DESCR("Use RFC1323 window scale options"), 1497 NULL, 0, &tcp_do_win_scale, 0, 1498 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1499 sysctl_createv(clog, 0, NULL, NULL, 1500 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1501 CTLTYPE_INT, "timestamps", 1502 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1503 NULL, 0, &tcp_do_timestamps, 0, 1504 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1505 sysctl_createv(clog, 0, NULL, NULL, 1506 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1507 CTLTYPE_INT, "compat_42", 1508 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1509 NULL, 0, &tcp_compat_42, 0, 1510 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1511 sysctl_createv(clog, 0, NULL, NULL, 1512 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1513 CTLTYPE_INT, "cwm", 1514 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1515 "Monitoring"), 1516 NULL, 0, &tcp_cwm, 0, 1517 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1518 sysctl_createv(clog, 0, NULL, NULL, 1519 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1520 CTLTYPE_INT, "cwm_burstsize", 1521 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1522 "burst count in packets"), 1523 NULL, 0, &tcp_cwm_burstsize, 0, 1524 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1525 CTL_EOL); 1526 sysctl_createv(clog, 0, NULL, NULL, 1527 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1528 CTLTYPE_INT, "ack_on_push", 1529 SYSCTL_DESCR("Immediately return ACK when PSH is " 1530 "received"), 1531 NULL, 0, &tcp_ack_on_push, 0, 1532 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1533 sysctl_createv(clog, 0, NULL, NULL, 1534 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1535 CTLTYPE_INT, "keepidle", 1536 SYSCTL_DESCR("Allowed connection idle ticks before a " 1537 "keepalive probe is sent"), 1538 NULL, 0, &tcp_keepidle, 0, 1539 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1540 sysctl_createv(clog, 0, NULL, NULL, 1541 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1542 CTLTYPE_INT, "keepintvl", 1543 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1544 NULL, 0, &tcp_keepintvl, 0, 1545 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1546 sysctl_createv(clog, 0, NULL, NULL, 1547 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1548 CTLTYPE_INT, "keepcnt", 1549 SYSCTL_DESCR("Number of keepalive probes to send"), 1550 NULL, 0, &tcp_keepcnt, 0, 1551 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1552 sysctl_createv(clog, 0, NULL, NULL, 1553 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1554 CTLTYPE_INT, "slowhz", 1555 SYSCTL_DESCR("Keepalive ticks per second"), 1556 NULL, PR_SLOWHZ, NULL, 0, 1557 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1558 sysctl_createv(clog, 0, NULL, NULL, 1559 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1560 CTLTYPE_INT, "newreno", 1561 SYSCTL_DESCR("NewReno congestion control algorithm"), 1562 NULL, 0, &tcp_do_newreno, 0, 1563 CTL_NET, pf, IPPROTO_TCP, TCPCTL_NEWRENO, CTL_EOL); 1564 sysctl_createv(clog, 0, NULL, NULL, 1565 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1566 CTLTYPE_INT, "log_refused", 1567 SYSCTL_DESCR("Log refused TCP connections"), 1568 NULL, 0, &tcp_log_refused, 0, 1569 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1570 #if 0 /* obsoleted */ 1571 sysctl_createv(clog, 0, NULL, NULL, 1572 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1573 CTLTYPE_INT, "rstratelimit", NULL, 1574 NULL, 0, &tcp_rst_ratelim, 0, 1575 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1576 #endif 1577 sysctl_createv(clog, 0, NULL, NULL, 1578 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1579 CTLTYPE_INT, "rstppslimit", 1580 SYSCTL_DESCR("Maximum number of RST packets to send " 1581 "per second"), 1582 NULL, 0, &tcp_rst_ppslim, 0, 1583 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1584 sysctl_createv(clog, 0, NULL, NULL, 1585 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1586 CTLTYPE_INT, "delack_ticks", 1587 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1588 NULL, 0, &tcp_delack_ticks, 0, 1589 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1590 sysctl_createv(clog, 0, NULL, NULL, 1591 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1592 CTLTYPE_INT, "init_win_local", 1593 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1594 NULL, 0, &tcp_init_win_local, 0, 1595 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1596 CTL_EOL); 1597 sysctl_createv(clog, 0, NULL, NULL, 1598 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1599 CTLTYPE_STRUCT, "ident", 1600 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1601 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1602 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1603 sysctl_createv(clog, 0, NULL, NULL, 1604 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1605 CTLTYPE_INT, "do_loopback_cksum", 1606 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1607 NULL, 0, &tcp_do_loopback_cksum, 0, 1608 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1609 CTL_EOL); 1610 sysctl_createv(clog, 0, NULL, NULL, 1611 CTLFLAG_PERMANENT, 1612 CTLTYPE_STRUCT, "pcblist", 1613 SYSCTL_DESCR("TCP protocol control block list"), 1614 sysctl_inpcblist, 0, &tcbtable, 0, 1615 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1616 CTL_EOL); 1617 1618 sysctl_createv(clog, 0, NULL, NULL, 1619 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1620 CTLTYPE_INT, "enable", 1621 SYSCTL_DESCR("Enable TCP Explicit Congestion " 1622 "Notification"), 1623 NULL, 0, &tcp_do_ecn, 0, 1624 CTL_NET, pf, IPPROTO_TCP, ecn_node, 1625 CTL_CREATE, CTL_EOL); 1626 sysctl_createv(clog, 0, NULL, NULL, 1627 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1628 CTLTYPE_INT, "maxretries", 1629 SYSCTL_DESCR("Number of times to retry ECN setup " 1630 "before disabling ECN on the connection"), 1631 NULL, 0, &tcp_ecn_maxretries, 0, 1632 CTL_NET, pf, IPPROTO_TCP, ecn_node, 1633 CTL_CREATE, CTL_EOL); 1634 1635 /* SACK gets it's own little subtree. */ 1636 sysctl_createv(clog, 0, NULL, &sack_node, 1637 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1638 CTLTYPE_INT, "enable", 1639 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 1640 NULL, 0, &tcp_do_sack, 0, 1641 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1642 sysctl_createv(clog, 0, NULL, &sack_node, 1643 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1644 CTLTYPE_INT, "maxholes", 1645 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 1646 NULL, 0, &tcp_sack_tp_maxholes, 0, 1647 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1648 sysctl_createv(clog, 0, NULL, &sack_node, 1649 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1650 CTLTYPE_INT, "globalmaxholes", 1651 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 1652 NULL, 0, &tcp_sack_globalmaxholes, 0, 1653 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1654 sysctl_createv(clog, 0, NULL, &sack_node, 1655 CTLFLAG_PERMANENT, 1656 CTLTYPE_INT, "globalholes", 1657 SYSCTL_DESCR("Global number of TCP SACK holes"), 1658 NULL, 0, &tcp_sack_globalholes, 0, 1659 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1660 1661 sysctl_createv(clog, 0, NULL, NULL, 1662 CTLFLAG_PERMANENT, 1663 CTLTYPE_STRUCT, "stats", 1664 SYSCTL_DESCR("TCP statistics"), 1665 NULL, 0, &tcpstat, sizeof(tcpstat), 1666 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 1667 CTL_EOL); 1668 #ifdef TCP_DEBUG 1669 sysctl_createv(clog, 0, NULL, NULL, 1670 CTLFLAG_PERMANENT, 1671 CTLTYPE_STRUCT, "debug", 1672 SYSCTL_DESCR("TCP sockets debug information"), 1673 NULL, 0, &tcp_debug, sizeof(tcp_debug), 1674 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 1675 CTL_EOL); 1676 sysctl_createv(clog, 0, NULL, NULL, 1677 CTLFLAG_PERMANENT, 1678 CTLTYPE_INT, "debx", 1679 SYSCTL_DESCR("Number of TCP debug sockets messages"), 1680 NULL, 0, &tcp_debx, sizeof(tcp_debx), 1681 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 1682 CTL_EOL); 1683 #endif 1684 1685 } 1686 1687 /* 1688 * Sysctl for tcp variables. 1689 */ 1690 #ifdef INET 1691 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup") 1692 { 1693 1694 sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp"); 1695 } 1696 #endif /* INET */ 1697 1698 #ifdef INET6 1699 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup") 1700 { 1701 1702 sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6"); 1703 } 1704 #endif /* INET6 */ 1705