1 /* $NetBSD: tcp_usrreq.c,v 1.106 2005/06/20 02:49:18 atatat Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the NetBSD 53 * Foundation, Inc. and its contributors. 54 * 4. Neither the name of The NetBSD Foundation nor the names of its 55 * contributors may be used to endorse or promote products derived 56 * from this software without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 59 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 60 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 61 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 62 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 63 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 64 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 65 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 66 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 67 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 68 * POSSIBILITY OF SUCH DAMAGE. 69 */ 70 71 /* 72 * Copyright (c) 1982, 1986, 1988, 1993, 1995 73 * The Regents of the University of California. All rights reserved. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. Neither the name of the University nor the names of its contributors 84 * may be used to endorse or promote products derived from this software 85 * without specific prior written permission. 86 * 87 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 88 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 89 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 90 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 91 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 92 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 93 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 94 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 95 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 96 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 97 * SUCH DAMAGE. 98 * 99 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 100 */ 101 102 #include <sys/cdefs.h> 103 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.106 2005/06/20 02:49:18 atatat Exp $"); 104 105 #include "opt_inet.h" 106 #include "opt_ipsec.h" 107 #include "opt_tcp_debug.h" 108 #include "opt_mbuftrace.h" 109 110 #include <sys/param.h> 111 #include <sys/systm.h> 112 #include <sys/kernel.h> 113 #include <sys/malloc.h> 114 #include <sys/mbuf.h> 115 #include <sys/socket.h> 116 #include <sys/socketvar.h> 117 #include <sys/protosw.h> 118 #include <sys/errno.h> 119 #include <sys/stat.h> 120 #include <sys/proc.h> 121 #include <sys/domain.h> 122 #include <sys/sysctl.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 134 #ifdef INET6 135 #ifndef INET 136 #include <netinet/in.h> 137 #endif 138 #include <netinet/ip6.h> 139 #include <netinet6/in6_pcb.h> 140 #include <netinet6/ip6_var.h> 141 #endif 142 143 #include <netinet/tcp.h> 144 #include <netinet/tcp_fsm.h> 145 #include <netinet/tcp_seq.h> 146 #include <netinet/tcp_timer.h> 147 #include <netinet/tcp_var.h> 148 #include <netinet/tcpip.h> 149 #include <netinet/tcp_debug.h> 150 151 #include "opt_tcp_space.h" 152 153 #ifdef IPSEC 154 #include <netinet6/ipsec.h> 155 #endif /*IPSEC*/ 156 157 /* 158 * TCP protocol interface to socket abstraction. 159 */ 160 161 /* 162 * Process a TCP user request for TCP tb. If this is a send request 163 * then m is the mbuf chain of send data. If this is a timer expiration 164 * (called from the software clock routine), then timertype tells which timer. 165 */ 166 /*ARGSUSED*/ 167 int 168 tcp_usrreq(struct socket *so, int req, 169 struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) 170 { 171 struct inpcb *inp; 172 #ifdef INET6 173 struct in6pcb *in6p; 174 #endif 175 struct tcpcb *tp = NULL; 176 int s; 177 int error = 0; 178 #ifdef TCP_DEBUG 179 int ostate = 0; 180 #endif 181 int family; /* family of the socket */ 182 183 family = so->so_proto->pr_domain->dom_family; 184 185 if (req == PRU_CONTROL) { 186 switch (family) { 187 #ifdef INET 188 case PF_INET: 189 return (in_control(so, (long)m, (caddr_t)nam, 190 (struct ifnet *)control, p)); 191 #endif 192 #ifdef INET6 193 case PF_INET6: 194 return (in6_control(so, (long)m, (caddr_t)nam, 195 (struct ifnet *)control, p)); 196 #endif 197 default: 198 return EAFNOSUPPORT; 199 } 200 } 201 202 if (req == PRU_PURGEIF) { 203 switch (family) { 204 #ifdef INET 205 case PF_INET: 206 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 207 in_purgeif((struct ifnet *)control); 208 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 209 break; 210 #endif 211 #ifdef INET6 212 case PF_INET6: 213 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 214 in6_purgeif((struct ifnet *)control); 215 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 216 break; 217 #endif 218 default: 219 return (EAFNOSUPPORT); 220 } 221 return (0); 222 } 223 224 s = splsoftnet(); 225 switch (family) { 226 #ifdef INET 227 case PF_INET: 228 inp = sotoinpcb(so); 229 #ifdef INET6 230 in6p = NULL; 231 #endif 232 break; 233 #endif 234 #ifdef INET6 235 case PF_INET6: 236 inp = NULL; 237 in6p = sotoin6pcb(so); 238 break; 239 #endif 240 default: 241 splx(s); 242 return EAFNOSUPPORT; 243 } 244 245 #ifdef DIAGNOSTIC 246 #ifdef INET6 247 if (inp && in6p) 248 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 249 #endif 250 if (req != PRU_SEND && req != PRU_SENDOOB && control) 251 panic("tcp_usrreq: unexpected control mbuf"); 252 #endif 253 /* 254 * When a TCP is attached to a socket, then there will be 255 * a (struct inpcb) pointed at by the socket, and this 256 * structure will point at a subsidary (struct tcpcb). 257 */ 258 #ifndef INET6 259 if (inp == 0 && req != PRU_ATTACH) 260 #else 261 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 262 #endif 263 { 264 error = EINVAL; 265 goto release; 266 } 267 #ifdef INET 268 if (inp) { 269 tp = intotcpcb(inp); 270 /* WHAT IF TP IS 0? */ 271 #ifdef KPROF 272 tcp_acounts[tp->t_state][req]++; 273 #endif 274 #ifdef TCP_DEBUG 275 ostate = tp->t_state; 276 #endif 277 } 278 #endif 279 #ifdef INET6 280 if (in6p) { 281 tp = in6totcpcb(in6p); 282 /* WHAT IF TP IS 0? */ 283 #ifdef KPROF 284 tcp_acounts[tp->t_state][req]++; 285 #endif 286 #ifdef TCP_DEBUG 287 ostate = tp->t_state; 288 #endif 289 } 290 #endif 291 292 switch (req) { 293 294 /* 295 * TCP attaches to socket via PRU_ATTACH, reserving space, 296 * and an internet control block. 297 */ 298 case PRU_ATTACH: 299 #ifndef INET6 300 if (inp != 0) 301 #else 302 if (inp != 0 || in6p != 0) 303 #endif 304 { 305 error = EISCONN; 306 break; 307 } 308 error = tcp_attach(so); 309 if (error) 310 break; 311 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 312 so->so_linger = TCP_LINGERTIME; 313 tp = sototcpcb(so); 314 break; 315 316 /* 317 * PRU_DETACH detaches the TCP protocol from the socket. 318 */ 319 case PRU_DETACH: 320 tp = tcp_disconnect(tp); 321 break; 322 323 /* 324 * Give the socket an address. 325 */ 326 case PRU_BIND: 327 switch (family) { 328 #ifdef INET 329 case PF_INET: 330 error = in_pcbbind(inp, nam, p); 331 break; 332 #endif 333 #ifdef INET6 334 case PF_INET6: 335 error = in6_pcbbind(in6p, nam, p); 336 if (!error) { 337 /* mapped addr case */ 338 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 339 tp->t_family = AF_INET; 340 else 341 tp->t_family = AF_INET6; 342 } 343 break; 344 #endif 345 } 346 break; 347 348 /* 349 * Prepare to accept connections. 350 */ 351 case PRU_LISTEN: 352 #ifdef INET 353 if (inp && inp->inp_lport == 0) { 354 error = in_pcbbind(inp, (struct mbuf *)0, 355 (struct proc *)0); 356 if (error) 357 break; 358 } 359 #endif 360 #ifdef INET6 361 if (in6p && in6p->in6p_lport == 0) { 362 error = in6_pcbbind(in6p, (struct mbuf *)0, 363 (struct proc *)0); 364 if (error) 365 break; 366 } 367 #endif 368 tp->t_state = TCPS_LISTEN; 369 break; 370 371 /* 372 * Initiate connection to peer. 373 * Create a template for use in transmissions on this connection. 374 * Enter SYN_SENT state, and mark socket as connecting. 375 * Start keep-alive timer, and seed output sequence space. 376 * Send initial segment on connection. 377 */ 378 case PRU_CONNECT: 379 #ifdef INET 380 if (inp) { 381 if (inp->inp_lport == 0) { 382 error = in_pcbbind(inp, (struct mbuf *)0, 383 (struct proc *)0); 384 if (error) 385 break; 386 } 387 error = in_pcbconnect(inp, nam); 388 } 389 #endif 390 #ifdef INET6 391 if (in6p) { 392 if (in6p->in6p_lport == 0) { 393 error = in6_pcbbind(in6p, (struct mbuf *)0, 394 (struct proc *)0); 395 if (error) 396 break; 397 } 398 error = in6_pcbconnect(in6p, nam); 399 if (!error) { 400 /* mapped addr case */ 401 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 402 tp->t_family = AF_INET; 403 else 404 tp->t_family = AF_INET6; 405 } 406 } 407 #endif 408 if (error) 409 break; 410 tp->t_template = tcp_template(tp); 411 if (tp->t_template == 0) { 412 #ifdef INET 413 if (inp) 414 in_pcbdisconnect(inp); 415 #endif 416 #ifdef INET6 417 if (in6p) 418 in6_pcbdisconnect(in6p); 419 #endif 420 error = ENOBUFS; 421 break; 422 } 423 /* Compute window scaling to request. */ 424 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 425 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 426 tp->request_r_scale++; 427 soisconnecting(so); 428 tcpstat.tcps_connattempt++; 429 tp->t_state = TCPS_SYN_SENT; 430 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 431 tp->iss = tcp_new_iss(tp, 0); 432 tcp_sendseqinit(tp); 433 error = tcp_output(tp); 434 break; 435 436 /* 437 * Create a TCP connection between two sockets. 438 */ 439 case PRU_CONNECT2: 440 error = EOPNOTSUPP; 441 break; 442 443 /* 444 * Initiate disconnect from peer. 445 * If connection never passed embryonic stage, just drop; 446 * else if don't need to let data drain, then can just drop anyways, 447 * else have to begin TCP shutdown process: mark socket disconnecting, 448 * drain unread data, state switch to reflect user close, and 449 * send segment (e.g. FIN) to peer. Socket will be really disconnected 450 * when peer sends FIN and acks ours. 451 * 452 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 453 */ 454 case PRU_DISCONNECT: 455 tp = tcp_disconnect(tp); 456 break; 457 458 /* 459 * Accept a connection. Essentially all the work is 460 * done at higher levels; just return the address 461 * of the peer, storing through addr. 462 */ 463 case PRU_ACCEPT: 464 #ifdef INET 465 if (inp) 466 in_setpeeraddr(inp, nam); 467 #endif 468 #ifdef INET6 469 if (in6p) 470 in6_setpeeraddr(in6p, nam); 471 #endif 472 break; 473 474 /* 475 * Mark the connection as being incapable of further output. 476 */ 477 case PRU_SHUTDOWN: 478 socantsendmore(so); 479 tp = tcp_usrclosed(tp); 480 if (tp) 481 error = tcp_output(tp); 482 break; 483 484 /* 485 * After a receive, possibly send window update to peer. 486 */ 487 case PRU_RCVD: 488 /* 489 * soreceive() calls this function when a user receives 490 * ancillary data on a listening socket. We don't call 491 * tcp_output in such a case, since there is no header 492 * template for a listening socket and hence the kernel 493 * will panic. 494 */ 495 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 496 (void) tcp_output(tp); 497 break; 498 499 /* 500 * Do a send by putting data in output queue and updating urgent 501 * marker if URG set. Possibly send more data. 502 */ 503 case PRU_SEND: 504 if (control && control->m_len) { 505 m_freem(control); 506 m_freem(m); 507 error = EINVAL; 508 break; 509 } 510 sbappendstream(&so->so_snd, m); 511 error = tcp_output(tp); 512 break; 513 514 /* 515 * Abort the TCP. 516 */ 517 case PRU_ABORT: 518 tp = tcp_drop(tp, ECONNABORTED); 519 break; 520 521 case PRU_SENSE: 522 /* 523 * stat: don't bother with a blocksize. 524 */ 525 splx(s); 526 return (0); 527 528 case PRU_RCVOOB: 529 if (control && control->m_len) { 530 m_freem(control); 531 m_freem(m); 532 error = EINVAL; 533 break; 534 } 535 if ((so->so_oobmark == 0 && 536 (so->so_state & SS_RCVATMARK) == 0) || 537 so->so_options & SO_OOBINLINE || 538 tp->t_oobflags & TCPOOB_HADDATA) { 539 error = EINVAL; 540 break; 541 } 542 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 543 error = EWOULDBLOCK; 544 break; 545 } 546 m->m_len = 1; 547 *mtod(m, caddr_t) = tp->t_iobc; 548 if (((long)nam & MSG_PEEK) == 0) 549 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 550 break; 551 552 case PRU_SENDOOB: 553 if (sbspace(&so->so_snd) < -512) { 554 m_freem(m); 555 error = ENOBUFS; 556 break; 557 } 558 /* 559 * According to RFC961 (Assigned Protocols), 560 * the urgent pointer points to the last octet 561 * of urgent data. We continue, however, 562 * to consider it to indicate the first octet 563 * of data past the urgent section. 564 * Otherwise, snd_up should be one lower. 565 */ 566 sbappendstream(&so->so_snd, m); 567 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 568 tp->t_force = 1; 569 error = tcp_output(tp); 570 tp->t_force = 0; 571 break; 572 573 case PRU_SOCKADDR: 574 #ifdef INET 575 if (inp) 576 in_setsockaddr(inp, nam); 577 #endif 578 #ifdef INET6 579 if (in6p) 580 in6_setsockaddr(in6p, nam); 581 #endif 582 break; 583 584 case PRU_PEERADDR: 585 #ifdef INET 586 if (inp) 587 in_setpeeraddr(inp, nam); 588 #endif 589 #ifdef INET6 590 if (in6p) 591 in6_setpeeraddr(in6p, nam); 592 #endif 593 break; 594 595 default: 596 panic("tcp_usrreq"); 597 } 598 #ifdef TCP_DEBUG 599 if (tp && (so->so_options & SO_DEBUG)) 600 tcp_trace(TA_USER, ostate, tp, NULL, req); 601 #endif 602 603 release: 604 splx(s); 605 return (error); 606 } 607 608 int 609 tcp_ctloutput(int op, struct socket *so, int level, int optname, 610 struct mbuf **mp) 611 { 612 int error = 0, s; 613 struct inpcb *inp; 614 #ifdef INET6 615 struct in6pcb *in6p; 616 #endif 617 struct tcpcb *tp; 618 struct mbuf *m; 619 int i; 620 int family; /* family of the socket */ 621 622 family = so->so_proto->pr_domain->dom_family; 623 624 s = splsoftnet(); 625 switch (family) { 626 #ifdef INET 627 case PF_INET: 628 inp = sotoinpcb(so); 629 #ifdef INET6 630 in6p = NULL; 631 #endif 632 break; 633 #endif 634 #ifdef INET6 635 case PF_INET6: 636 inp = NULL; 637 in6p = sotoin6pcb(so); 638 break; 639 #endif 640 default: 641 splx(s); 642 return EAFNOSUPPORT; 643 } 644 #ifndef INET6 645 if (inp == NULL) 646 #else 647 if (inp == NULL && in6p == NULL) 648 #endif 649 { 650 splx(s); 651 if (op == PRCO_SETOPT && *mp) 652 (void) m_free(*mp); 653 return (ECONNRESET); 654 } 655 if (level != IPPROTO_TCP) { 656 switch (family) { 657 #ifdef INET 658 case PF_INET: 659 error = ip_ctloutput(op, so, level, optname, mp); 660 break; 661 #endif 662 #ifdef INET6 663 case PF_INET6: 664 error = ip6_ctloutput(op, so, level, optname, mp); 665 break; 666 #endif 667 } 668 splx(s); 669 return (error); 670 } 671 if (inp) 672 tp = intotcpcb(inp); 673 #ifdef INET6 674 else if (in6p) 675 tp = in6totcpcb(in6p); 676 #endif 677 else 678 tp = NULL; 679 680 switch (op) { 681 682 case PRCO_SETOPT: 683 m = *mp; 684 switch (optname) { 685 686 #ifdef TCP_SIGNATURE 687 case TCP_MD5SIG: 688 if (m == NULL || m->m_len < sizeof (int)) 689 error = EINVAL; 690 if (error) 691 break; 692 if (*mtod(m, int *) > 0) 693 tp->t_flags |= TF_SIGNATURE; 694 else 695 tp->t_flags &= ~TF_SIGNATURE; 696 break; 697 #endif /* TCP_SIGNATURE */ 698 699 case TCP_NODELAY: 700 if (m == NULL || m->m_len < sizeof (int)) 701 error = EINVAL; 702 else if (*mtod(m, int *)) 703 tp->t_flags |= TF_NODELAY; 704 else 705 tp->t_flags &= ~TF_NODELAY; 706 break; 707 708 case TCP_MAXSEG: 709 if (m && (i = *mtod(m, int *)) > 0 && 710 i <= tp->t_peermss) 711 tp->t_peermss = i; /* limit on send size */ 712 else 713 error = EINVAL; 714 break; 715 716 default: 717 error = ENOPROTOOPT; 718 break; 719 } 720 if (m) 721 (void) m_free(m); 722 break; 723 724 case PRCO_GETOPT: 725 *mp = m = m_get(M_WAIT, MT_SOOPTS); 726 m->m_len = sizeof(int); 727 MCLAIM(m, so->so_mowner); 728 729 switch (optname) { 730 #ifdef TCP_SIGNATURE 731 case TCP_MD5SIG: 732 *mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 733 break; 734 #endif 735 case TCP_NODELAY: 736 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 737 break; 738 case TCP_MAXSEG: 739 *mtod(m, int *) = tp->t_peermss; 740 break; 741 default: 742 error = ENOPROTOOPT; 743 break; 744 } 745 break; 746 } 747 splx(s); 748 return (error); 749 } 750 751 #ifndef TCP_SENDSPACE 752 #define TCP_SENDSPACE 1024*32 753 #endif 754 int tcp_sendspace = TCP_SENDSPACE; 755 #ifndef TCP_RECVSPACE 756 #define TCP_RECVSPACE 1024*32 757 #endif 758 int tcp_recvspace = TCP_RECVSPACE; 759 760 /* 761 * Attach TCP protocol to socket, allocating 762 * internet protocol control block, tcp control block, 763 * bufer space, and entering LISTEN state if to accept connections. 764 */ 765 int 766 tcp_attach(struct socket *so) 767 { 768 struct tcpcb *tp; 769 struct inpcb *inp; 770 #ifdef INET6 771 struct in6pcb *in6p; 772 #endif 773 int error; 774 int family; /* family of the socket */ 775 776 family = so->so_proto->pr_domain->dom_family; 777 778 #ifdef MBUFTRACE 779 so->so_mowner = &tcp_mowner; 780 so->so_rcv.sb_mowner = &tcp_rx_mowner; 781 so->so_snd.sb_mowner = &tcp_tx_mowner; 782 #endif 783 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 784 error = soreserve(so, tcp_sendspace, tcp_recvspace); 785 if (error) 786 return (error); 787 } 788 switch (family) { 789 #ifdef INET 790 case PF_INET: 791 error = in_pcballoc(so, &tcbtable); 792 if (error) 793 return (error); 794 inp = sotoinpcb(so); 795 #ifdef INET6 796 in6p = NULL; 797 #endif 798 break; 799 #endif 800 #ifdef INET6 801 case PF_INET6: 802 error = in6_pcballoc(so, &tcbtable); 803 if (error) 804 return (error); 805 inp = NULL; 806 in6p = sotoin6pcb(so); 807 break; 808 #endif 809 default: 810 return EAFNOSUPPORT; 811 } 812 if (inp) 813 tp = tcp_newtcpcb(family, (void *)inp); 814 #ifdef INET6 815 else if (in6p) 816 tp = tcp_newtcpcb(family, (void *)in6p); 817 #endif 818 else 819 tp = NULL; 820 821 if (tp == 0) { 822 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 823 824 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 825 #ifdef INET 826 if (inp) 827 in_pcbdetach(inp); 828 #endif 829 #ifdef INET6 830 if (in6p) 831 in6_pcbdetach(in6p); 832 #endif 833 so->so_state |= nofd; 834 return (ENOBUFS); 835 } 836 tp->t_state = TCPS_CLOSED; 837 return (0); 838 } 839 840 /* 841 * Initiate (or continue) disconnect. 842 * If embryonic state, just send reset (once). 843 * If in ``let data drain'' option and linger null, just drop. 844 * Otherwise (hard), mark socket disconnecting and drop 845 * current input data; switch states based on user close, and 846 * send segment to peer (with FIN). 847 */ 848 struct tcpcb * 849 tcp_disconnect(struct tcpcb *tp) 850 { 851 struct socket *so; 852 853 if (tp->t_inpcb) 854 so = tp->t_inpcb->inp_socket; 855 #ifdef INET6 856 else if (tp->t_in6pcb) 857 so = tp->t_in6pcb->in6p_socket; 858 #endif 859 else 860 so = NULL; 861 862 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 863 tp = tcp_close(tp); 864 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 865 tp = tcp_drop(tp, 0); 866 else { 867 soisdisconnecting(so); 868 sbflush(&so->so_rcv); 869 tp = tcp_usrclosed(tp); 870 if (tp) 871 (void) tcp_output(tp); 872 } 873 return (tp); 874 } 875 876 /* 877 * User issued close, and wish to trail through shutdown states: 878 * if never received SYN, just forget it. If got a SYN from peer, 879 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 880 * If already got a FIN from peer, then almost done; go to LAST_ACK 881 * state. In all other cases, have already sent FIN to peer (e.g. 882 * after PRU_SHUTDOWN), and just have to play tedious game waiting 883 * for peer to send FIN or not respond to keep-alives, etc. 884 * We can let the user exit from the close as soon as the FIN is acked. 885 */ 886 struct tcpcb * 887 tcp_usrclosed(struct tcpcb *tp) 888 { 889 890 switch (tp->t_state) { 891 892 case TCPS_CLOSED: 893 case TCPS_LISTEN: 894 case TCPS_SYN_SENT: 895 tp->t_state = TCPS_CLOSED; 896 tp = tcp_close(tp); 897 break; 898 899 case TCPS_SYN_RECEIVED: 900 case TCPS_ESTABLISHED: 901 tp->t_state = TCPS_FIN_WAIT_1; 902 break; 903 904 case TCPS_CLOSE_WAIT: 905 tp->t_state = TCPS_LAST_ACK; 906 break; 907 } 908 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 909 struct socket *so; 910 if (tp->t_inpcb) 911 so = tp->t_inpcb->inp_socket; 912 #ifdef INET6 913 else if (tp->t_in6pcb) 914 so = tp->t_in6pcb->in6p_socket; 915 #endif 916 else 917 so = NULL; 918 soisdisconnected(so); 919 /* 920 * If we are in FIN_WAIT_2, we arrived here because the 921 * application did a shutdown of the send side. Like the 922 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 923 * a full close, we start a timer to make sure sockets are 924 * not left in FIN_WAIT_2 forever. 925 */ 926 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 927 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 928 } 929 return (tp); 930 } 931 932 /* 933 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 934 * than 32. 935 */ 936 static int 937 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 938 { 939 int error, mssdflt; 940 struct sysctlnode node; 941 942 mssdflt = tcp_mssdflt; 943 node = *rnode; 944 node.sysctl_data = &mssdflt; 945 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 946 if (error || newp == NULL) 947 return (error); 948 949 if (mssdflt < 32) 950 return (EINVAL); 951 tcp_mssdflt = mssdflt; 952 953 return (0); 954 } 955 956 /* 957 * sysctl helper routine for setting port related values under 958 * net.inet.ip and net.inet6.ip6. does basic range checking and does 959 * additional checks for each type. this code has placed in 960 * tcp_input.c since INET and INET6 both use the same tcp code. 961 * 962 * this helper is not static so that both inet and inet6 can use it. 963 */ 964 int 965 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 966 { 967 int error, tmp; 968 int apmin, apmax; 969 #ifndef IPNOPRIVPORTS 970 int lpmin, lpmax; 971 #endif /* IPNOPRIVPORTS */ 972 struct sysctlnode node; 973 974 if (namelen != 0) 975 return (EINVAL); 976 977 switch (name[-3]) { 978 #ifdef INET 979 case PF_INET: 980 apmin = anonportmin; 981 apmax = anonportmax; 982 #ifndef IPNOPRIVPORTS 983 lpmin = lowportmin; 984 lpmax = lowportmax; 985 #endif /* IPNOPRIVPORTS */ 986 break; 987 #endif /* INET */ 988 #ifdef INET6 989 case PF_INET6: 990 apmin = ip6_anonportmin; 991 apmax = ip6_anonportmax; 992 #ifndef IPNOPRIVPORTS 993 lpmin = ip6_lowportmin; 994 lpmax = ip6_lowportmax; 995 #endif /* IPNOPRIVPORTS */ 996 break; 997 #endif /* INET6 */ 998 default: 999 return (EINVAL); 1000 } 1001 1002 /* 1003 * insert temporary copy into node, perform lookup on 1004 * temporary, then restore pointer 1005 */ 1006 node = *rnode; 1007 tmp = *(int*)rnode->sysctl_data; 1008 node.sysctl_data = &tmp; 1009 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1010 if (error || newp == NULL) 1011 return (error); 1012 1013 /* 1014 * simple port range check 1015 */ 1016 if (tmp < 0 || tmp > 65535) 1017 return (EINVAL); 1018 1019 /* 1020 * per-node range checks 1021 */ 1022 switch (rnode->sysctl_num) { 1023 case IPCTL_ANONPORTMIN: 1024 if (tmp >= apmax) 1025 return (EINVAL); 1026 #ifndef IPNOPRIVPORTS 1027 if (tmp < IPPORT_RESERVED) 1028 return (EINVAL); 1029 #endif /* IPNOPRIVPORTS */ 1030 break; 1031 1032 case IPCTL_ANONPORTMAX: 1033 if (apmin >= tmp) 1034 return (EINVAL); 1035 #ifndef IPNOPRIVPORTS 1036 if (tmp < IPPORT_RESERVED) 1037 return (EINVAL); 1038 #endif /* IPNOPRIVPORTS */ 1039 break; 1040 1041 #ifndef IPNOPRIVPORTS 1042 case IPCTL_LOWPORTMIN: 1043 if (tmp >= lpmax || 1044 tmp > IPPORT_RESERVEDMAX || 1045 tmp < IPPORT_RESERVEDMIN) 1046 return (EINVAL); 1047 break; 1048 1049 case IPCTL_LOWPORTMAX: 1050 if (lpmin >= tmp || 1051 tmp > IPPORT_RESERVEDMAX || 1052 tmp < IPPORT_RESERVEDMIN) 1053 return (EINVAL); 1054 break; 1055 #endif /* IPNOPRIVPORTS */ 1056 1057 default: 1058 return (EINVAL); 1059 } 1060 1061 *(int*)rnode->sysctl_data = tmp; 1062 1063 return (0); 1064 } 1065 1066 /* 1067 * sysctl helper routine for the net.inet.tcp.ident and 1068 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1069 * old way of looking up the ident information for ipv4 which involves 1070 * stuffing the port/addr pairs into the mib lookup. 1071 */ 1072 static int 1073 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1074 { 1075 #ifdef INET 1076 struct inpcb *inb; 1077 struct sockaddr_in *si4[2]; 1078 #endif /* INET */ 1079 #ifdef INET6 1080 struct in6pcb *in6b; 1081 struct sockaddr_in6 *si6[2]; 1082 #endif /* INET6 */ 1083 struct sockaddr_storage sa[2]; 1084 struct socket *sockp; 1085 size_t sz; 1086 uid_t uid; 1087 int error, pf; 1088 1089 if (namelen != 4 && namelen != 0) 1090 return (EINVAL); 1091 if (name[-2] != IPPROTO_TCP) 1092 return (EINVAL); 1093 pf = name[-3]; 1094 1095 /* old style lookup, ipv4 only */ 1096 if (namelen == 4) { 1097 #ifdef INET 1098 struct in_addr laddr, raddr; 1099 u_int lport, rport; 1100 1101 if (pf != PF_INET) 1102 return (EPROTONOSUPPORT); 1103 raddr.s_addr = (uint32_t)name[0]; 1104 rport = (u_int)name[1]; 1105 laddr.s_addr = (uint32_t)name[2]; 1106 lport = (u_int)name[3]; 1107 inb = in_pcblookup_connect(&tcbtable, raddr, rport, 1108 laddr, lport); 1109 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1110 return (ESRCH); 1111 uid = sockp->so_uidinfo->ui_uid; 1112 if (oldp) { 1113 sz = MIN(sizeof(uid), *oldlenp); 1114 error = copyout(&uid, oldp, sz); 1115 if (error) 1116 return (error); 1117 } 1118 *oldlenp = sizeof(uid); 1119 return (0); 1120 #else /* INET */ 1121 return (EINVAL); 1122 #endif /* INET */ 1123 } 1124 1125 if (newp == NULL || newlen != sizeof(sa)) 1126 return (EINVAL); 1127 error = copyin(newp, &sa, newlen); 1128 if (error) 1129 return (error); 1130 1131 /* 1132 * requested families must match 1133 */ 1134 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1135 return (EINVAL); 1136 1137 switch (pf) { 1138 #ifdef INET 1139 case PF_INET: 1140 si4[0] = (struct sockaddr_in*)&sa[0]; 1141 si4[1] = (struct sockaddr_in*)&sa[1]; 1142 if (si4[0]->sin_len != sizeof(*si4[0]) || 1143 si4[0]->sin_len != si4[1]->sin_len) 1144 return (EINVAL); 1145 inb = in_pcblookup_connect(&tcbtable, 1146 si4[0]->sin_addr, si4[0]->sin_port, 1147 si4[1]->sin_addr, si4[1]->sin_port); 1148 if (inb == NULL || (sockp = inb->inp_socket) == NULL) 1149 return (ESRCH); 1150 break; 1151 #endif /* INET */ 1152 #ifdef INET6 1153 case PF_INET6: 1154 si6[0] = (struct sockaddr_in6*)&sa[0]; 1155 si6[1] = (struct sockaddr_in6*)&sa[1]; 1156 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1157 si6[0]->sin6_len != si6[1]->sin6_len) 1158 return (EINVAL); 1159 in6b = in6_pcblookup_connect(&tcbtable, 1160 &si6[0]->sin6_addr, si6[0]->sin6_port, 1161 &si6[1]->sin6_addr, si6[1]->sin6_port, 0); 1162 if (in6b == NULL || (sockp = in6b->in6p_socket) == NULL) 1163 return (ESRCH); 1164 break; 1165 #endif /* INET6 */ 1166 default: 1167 return (EPROTONOSUPPORT); 1168 } 1169 1170 uid = sockp->so_uidinfo->ui_uid; 1171 if (oldp) { 1172 sz = MIN(sizeof(uid), *oldlenp); 1173 error = copyout(&uid, oldp, sz); 1174 if (error) 1175 return (error); 1176 } 1177 *oldlenp = sizeof(uid); 1178 1179 return (0); 1180 } 1181 1182 /* 1183 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1184 * inet/inet6, as well as raw pcbs for each. specifically not 1185 * declared static so that raw sockets and udp/udp6 can use it as 1186 * well. 1187 */ 1188 int 1189 sysctl_inpcblist(SYSCTLFN_ARGS) 1190 { 1191 #ifdef INET 1192 struct sockaddr_in *in; 1193 const struct inpcb *inp; 1194 #endif 1195 #ifdef INET6 1196 struct sockaddr_in6 *in6; 1197 const struct in6pcb *in6p; 1198 #endif 1199 /* 1200 * sysctl_data is const, but CIRCLEQ_FOREACH can't use a const 1201 * struct inpcbtable pointer, so we have to discard const. :-/ 1202 */ 1203 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1204 const struct inpcb_hdr *inph; 1205 struct tcpcb *tp; 1206 struct kinfo_pcb pcb; 1207 char *dp; 1208 u_int op, arg; 1209 size_t len, needed, elem_size, out_size; 1210 int error, elem_count, pf, proto, pf2; 1211 1212 if (namelen != 4) 1213 return (EINVAL); 1214 1215 error = 0; 1216 dp = oldp; 1217 len = (oldp != NULL) ? *oldlenp : 0; 1218 op = name[0]; 1219 arg = name[1]; 1220 elem_size = name[2]; 1221 elem_count = name[3]; 1222 out_size = MIN(sizeof(pcb), elem_size); 1223 needed = 0; 1224 1225 elem_count = INT_MAX; 1226 elem_size = out_size = sizeof(pcb); 1227 1228 if (namelen == 1 && name[0] == CTL_QUERY) 1229 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1230 1231 if (name - oname != 4) 1232 return (EINVAL); 1233 1234 pf = oname[1]; 1235 proto = oname[2]; 1236 pf2 = (oldp == NULL) ? 0 : pf; 1237 1238 CIRCLEQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1239 #ifdef INET 1240 inp = (const struct inpcb *)inph; 1241 #endif 1242 #ifdef INET6 1243 in6p = (const struct in6pcb *)inph; 1244 #endif 1245 1246 if (inph->inph_af != pf) 1247 continue; 1248 1249 memset(&pcb, 0, sizeof(pcb)); 1250 1251 pcb.ki_family = pf; 1252 pcb.ki_type = proto; 1253 1254 switch (pf2) { 1255 case 0: 1256 /* just probing for size */ 1257 break; 1258 #ifdef INET 1259 case PF_INET: 1260 pcb.ki_family = inp->inp_socket->so_proto-> 1261 pr_domain->dom_family; 1262 pcb.ki_type = inp->inp_socket->so_proto-> 1263 pr_type; 1264 pcb.ki_protocol = inp->inp_socket->so_proto-> 1265 pr_protocol; 1266 pcb.ki_pflags = inp->inp_flags; 1267 1268 pcb.ki_sostate = inp->inp_socket->so_state; 1269 pcb.ki_prstate = inp->inp_state; 1270 if (proto == IPPROTO_TCP) { 1271 tp = intotcpcb(inp); 1272 pcb.ki_tstate = tp->t_state; 1273 pcb.ki_tflags = tp->t_flags; 1274 } 1275 1276 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1277 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1278 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1279 1280 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1281 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1282 1283 in = satosin(&pcb.ki_src); 1284 in->sin_len = sizeof(*in); 1285 in->sin_family = pf; 1286 in->sin_port = inp->inp_lport; 1287 in->sin_addr = inp->inp_laddr; 1288 if (pcb.ki_prstate >= INP_CONNECTED) { 1289 in = satosin(&pcb.ki_dst); 1290 in->sin_len = sizeof(*in); 1291 in->sin_family = pf; 1292 in->sin_port = inp->inp_fport; 1293 in->sin_addr = inp->inp_faddr; 1294 } 1295 break; 1296 #endif 1297 #ifdef INET6 1298 case PF_INET6: 1299 pcb.ki_family = in6p->in6p_socket->so_proto-> 1300 pr_domain->dom_family; 1301 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1302 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1303 pr_protocol; 1304 pcb.ki_pflags = in6p->in6p_flags; 1305 1306 pcb.ki_sostate = in6p->in6p_socket->so_state; 1307 pcb.ki_prstate = in6p->in6p_state; 1308 if (proto == IPPROTO_TCP) { 1309 tp = in6totcpcb(in6p); 1310 pcb.ki_tstate = tp->t_state; 1311 pcb.ki_tflags = tp->t_flags; 1312 } 1313 1314 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1315 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1316 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1317 1318 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1319 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1320 1321 in6 = satosin6(&pcb.ki_src); 1322 in6->sin6_len = sizeof(*in6); 1323 in6->sin6_family = pf; 1324 in6->sin6_port = in6p->in6p_lport; 1325 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1326 in6->sin6_addr = in6p->in6p_laddr; 1327 in6->sin6_scope_id = 0; /* XXX? */ 1328 1329 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1330 in6 = satosin6(&pcb.ki_dst); 1331 in6->sin6_len = sizeof(*in6); 1332 in6->sin6_family = pf; 1333 in6->sin6_port = in6p->in6p_fport; 1334 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1335 in6->sin6_addr = in6p->in6p_faddr; 1336 in6->sin6_scope_id = 0; /* XXX? */ 1337 } 1338 break; 1339 #endif 1340 } 1341 1342 if (len >= elem_size && elem_count > 0) { 1343 error = copyout(&pcb, dp, out_size); 1344 if (error) 1345 return (error); 1346 dp += elem_size; 1347 len -= elem_size; 1348 } 1349 if (elem_count > 0) { 1350 needed += elem_size; 1351 if (elem_count != INT_MAX) 1352 elem_count--; 1353 } 1354 } 1355 1356 *oldlenp = needed; 1357 if (oldp == NULL) 1358 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1359 1360 return (error); 1361 } 1362 1363 /* 1364 * this (second stage) setup routine is a replacement for tcp_sysctl() 1365 * (which is currently used for ipv4 and ipv6) 1366 */ 1367 static void 1368 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1369 const char *tcpname) 1370 { 1371 const struct sysctlnode *sack_node; 1372 1373 sysctl_createv(clog, 0, NULL, NULL, 1374 CTLFLAG_PERMANENT, 1375 CTLTYPE_NODE, "net", NULL, 1376 NULL, 0, NULL, 0, 1377 CTL_NET, CTL_EOL); 1378 sysctl_createv(clog, 0, NULL, NULL, 1379 CTLFLAG_PERMANENT, 1380 CTLTYPE_NODE, pfname, NULL, 1381 NULL, 0, NULL, 0, 1382 CTL_NET, pf, CTL_EOL); 1383 sysctl_createv(clog, 0, NULL, NULL, 1384 CTLFLAG_PERMANENT, 1385 CTLTYPE_NODE, tcpname, 1386 SYSCTL_DESCR("TCP related settings"), 1387 NULL, 0, NULL, 0, 1388 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1389 1390 sysctl_createv(clog, 0, NULL, NULL, 1391 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1392 CTLTYPE_INT, "rfc1323", 1393 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1394 NULL, 0, &tcp_do_rfc1323, 0, 1395 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1396 sysctl_createv(clog, 0, NULL, NULL, 1397 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1398 CTLTYPE_INT, "sendspace", 1399 SYSCTL_DESCR("Default TCP send buffer size"), 1400 NULL, 0, &tcp_sendspace, 0, 1401 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1402 sysctl_createv(clog, 0, NULL, NULL, 1403 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1404 CTLTYPE_INT, "recvspace", 1405 SYSCTL_DESCR("Default TCP receive buffer size"), 1406 NULL, 0, &tcp_recvspace, 0, 1407 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1408 sysctl_createv(clog, 0, NULL, NULL, 1409 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1410 CTLTYPE_INT, "mssdflt", 1411 SYSCTL_DESCR("Default maximum segment size"), 1412 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1413 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1414 sysctl_createv(clog, 0, NULL, NULL, 1415 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1416 CTLTYPE_INT, "syn_cache_limit", 1417 SYSCTL_DESCR("Maximum number of entries in the TCP " 1418 "compressed state engine"), 1419 NULL, 0, &tcp_syn_cache_limit, 0, 1420 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1421 CTL_EOL); 1422 sysctl_createv(clog, 0, NULL, NULL, 1423 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1424 CTLTYPE_INT, "syn_bucket_limit", 1425 SYSCTL_DESCR("Maximum number of entries per hash " 1426 "bucket in the TCP compressed state " 1427 "engine"), 1428 NULL, 0, &tcp_syn_bucket_limit, 0, 1429 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1430 CTL_EOL); 1431 #if 0 /* obsoleted */ 1432 sysctl_createv(clog, 0, NULL, NULL, 1433 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1434 CTLTYPE_INT, "syn_cache_interval", 1435 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1436 NULL, 0, &tcp_syn_cache_interval, 0, 1437 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1438 CTL_EOL); 1439 #endif 1440 sysctl_createv(clog, 0, NULL, NULL, 1441 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1442 CTLTYPE_INT, "init_win", 1443 SYSCTL_DESCR("Initial TCP congestion window"), 1444 NULL, 0, &tcp_init_win, 0, 1445 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1446 sysctl_createv(clog, 0, NULL, NULL, 1447 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1448 CTLTYPE_INT, "mss_ifmtu", 1449 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1450 NULL, 0, &tcp_mss_ifmtu, 0, 1451 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1452 sysctl_createv(clog, 0, NULL, &sack_node, 1453 CTLFLAG_PERMANENT, 1454 CTLTYPE_NODE, "sack", 1455 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1456 NULL, 0, NULL, 0, 1457 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1458 sysctl_createv(clog, 0, NULL, NULL, 1459 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1460 CTLTYPE_INT, "win_scale", 1461 SYSCTL_DESCR("Use RFC1323 window scale options"), 1462 NULL, 0, &tcp_do_win_scale, 0, 1463 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1464 sysctl_createv(clog, 0, NULL, NULL, 1465 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1466 CTLTYPE_INT, "timestamps", 1467 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1468 NULL, 0, &tcp_do_timestamps, 0, 1469 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1470 sysctl_createv(clog, 0, NULL, NULL, 1471 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1472 CTLTYPE_INT, "compat_42", 1473 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1474 NULL, 0, &tcp_compat_42, 0, 1475 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1476 sysctl_createv(clog, 0, NULL, NULL, 1477 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1478 CTLTYPE_INT, "cwm", 1479 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1480 "Monitoring"), 1481 NULL, 0, &tcp_cwm, 0, 1482 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1483 sysctl_createv(clog, 0, NULL, NULL, 1484 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1485 CTLTYPE_INT, "cwm_burstsize", 1486 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1487 "burst count in packets"), 1488 NULL, 0, &tcp_cwm_burstsize, 0, 1489 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1490 CTL_EOL); 1491 sysctl_createv(clog, 0, NULL, NULL, 1492 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1493 CTLTYPE_INT, "ack_on_push", 1494 SYSCTL_DESCR("Immediately return ACK when PSH is " 1495 "received"), 1496 NULL, 0, &tcp_ack_on_push, 0, 1497 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1498 sysctl_createv(clog, 0, NULL, NULL, 1499 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1500 CTLTYPE_INT, "keepidle", 1501 SYSCTL_DESCR("Allowed connection idle ticks before a " 1502 "keepalive probe is sent"), 1503 NULL, 0, &tcp_keepidle, 0, 1504 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1505 sysctl_createv(clog, 0, NULL, NULL, 1506 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1507 CTLTYPE_INT, "keepintvl", 1508 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1509 NULL, 0, &tcp_keepintvl, 0, 1510 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1511 sysctl_createv(clog, 0, NULL, NULL, 1512 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1513 CTLTYPE_INT, "keepcnt", 1514 SYSCTL_DESCR("Number of keepalive probes to send"), 1515 NULL, 0, &tcp_keepcnt, 0, 1516 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1517 sysctl_createv(clog, 0, NULL, NULL, 1518 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1519 CTLTYPE_INT, "slowhz", 1520 SYSCTL_DESCR("Keepalive ticks per second"), 1521 NULL, PR_SLOWHZ, NULL, 0, 1522 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1523 sysctl_createv(clog, 0, NULL, NULL, 1524 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1525 CTLTYPE_INT, "newreno", 1526 SYSCTL_DESCR("NewReno congestion control algorithm"), 1527 NULL, 0, &tcp_do_newreno, 0, 1528 CTL_NET, pf, IPPROTO_TCP, TCPCTL_NEWRENO, CTL_EOL); 1529 sysctl_createv(clog, 0, NULL, NULL, 1530 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1531 CTLTYPE_INT, "log_refused", 1532 SYSCTL_DESCR("Log refused TCP connections"), 1533 NULL, 0, &tcp_log_refused, 0, 1534 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1535 #if 0 /* obsoleted */ 1536 sysctl_createv(clog, 0, NULL, NULL, 1537 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1538 CTLTYPE_INT, "rstratelimit", NULL, 1539 NULL, 0, &tcp_rst_ratelim, 0, 1540 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1541 #endif 1542 sysctl_createv(clog, 0, NULL, NULL, 1543 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1544 CTLTYPE_INT, "rstppslimit", 1545 SYSCTL_DESCR("Maximum number of RST packets to send " 1546 "per second"), 1547 NULL, 0, &tcp_rst_ppslim, 0, 1548 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1549 sysctl_createv(clog, 0, NULL, NULL, 1550 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1551 CTLTYPE_INT, "delack_ticks", 1552 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1553 NULL, 0, &tcp_delack_ticks, 0, 1554 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1555 sysctl_createv(clog, 0, NULL, NULL, 1556 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1557 CTLTYPE_INT, "init_win_local", 1558 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1559 NULL, 0, &tcp_init_win_local, 0, 1560 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1561 CTL_EOL); 1562 sysctl_createv(clog, 0, NULL, NULL, 1563 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1564 CTLTYPE_STRUCT, "ident", 1565 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1566 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1567 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1568 sysctl_createv(clog, 0, NULL, NULL, 1569 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1570 CTLTYPE_INT, "do_loopback_cksum", 1571 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1572 NULL, 0, &tcp_do_loopback_cksum, 0, 1573 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1574 CTL_EOL); 1575 sysctl_createv(clog, 0, NULL, NULL, 1576 CTLFLAG_PERMANENT, 1577 CTLTYPE_STRUCT, "pcblist", 1578 SYSCTL_DESCR("TCP protocol control block list"), 1579 sysctl_inpcblist, 0, &tcbtable, 0, 1580 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1581 CTL_EOL); 1582 1583 /* SACK gets it's own little subtree. */ 1584 sysctl_createv(clog, 0, NULL, &sack_node, 1585 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1586 CTLTYPE_INT, "enable", 1587 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 1588 NULL, 0, &tcp_do_sack, 0, 1589 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1590 sysctl_createv(clog, 0, NULL, &sack_node, 1591 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1592 CTLTYPE_INT, "maxholes", 1593 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 1594 NULL, 0, &tcp_sack_tp_maxholes, 0, 1595 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1596 sysctl_createv(clog, 0, NULL, &sack_node, 1597 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1598 CTLTYPE_INT, "globalmaxholes", 1599 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 1600 NULL, 0, &tcp_sack_globalmaxholes, 0, 1601 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1602 sysctl_createv(clog, 0, NULL, &sack_node, 1603 CTLFLAG_PERMANENT, 1604 CTLTYPE_INT, "globalholes", 1605 SYSCTL_DESCR("Global number of TCP SACK holes"), 1606 NULL, 0, &tcp_sack_globalholes, 0, 1607 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1608 } 1609 1610 /* 1611 * Sysctl for tcp variables. 1612 */ 1613 #ifdef INET 1614 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup") 1615 { 1616 1617 sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp"); 1618 } 1619 #endif /* INET */ 1620 1621 #ifdef INET6 1622 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup") 1623 { 1624 1625 sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6"); 1626 } 1627 #endif /* INET6 */ 1628