1 /* $NetBSD: tcp_usrreq.c,v 1.144 2008/04/24 11:38:38 ad Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the NetBSD 55 * Foundation, Inc. and its contributors. 56 * 4. Neither the name of The NetBSD Foundation nor the names of its 57 * contributors may be used to endorse or promote products derived 58 * from this software without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70 * POSSIBILITY OF SUCH DAMAGE. 71 */ 72 73 /* 74 * Copyright (c) 1982, 1986, 1988, 1993, 1995 75 * The Regents of the University of California. All rights reserved. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.144 2008/04/24 11:38:38 ad Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 #include "opt_mbuftrace.h" 111 #include "rnd.h" 112 113 #include <sys/param.h> 114 #include <sys/systm.h> 115 #include <sys/kernel.h> 116 #include <sys/malloc.h> 117 #include <sys/mbuf.h> 118 #include <sys/socket.h> 119 #include <sys/socketvar.h> 120 #include <sys/protosw.h> 121 #include <sys/errno.h> 122 #include <sys/stat.h> 123 #include <sys/proc.h> 124 #include <sys/domain.h> 125 #include <sys/sysctl.h> 126 #include <sys/kauth.h> 127 128 #include <net/if.h> 129 #include <net/route.h> 130 131 #include <netinet/in.h> 132 #include <netinet/in_systm.h> 133 #include <netinet/in_var.h> 134 #include <netinet/ip.h> 135 #include <netinet/in_pcb.h> 136 #include <netinet/ip_var.h> 137 #include <netinet/in_offload.h> 138 139 #ifdef INET6 140 #ifndef INET 141 #include <netinet/in.h> 142 #endif 143 #include <netinet/ip6.h> 144 #include <netinet6/in6_pcb.h> 145 #include <netinet6/ip6_var.h> 146 #include <netinet6/scope6_var.h> 147 #endif 148 149 #include <netinet/tcp.h> 150 #include <netinet/tcp_fsm.h> 151 #include <netinet/tcp_seq.h> 152 #include <netinet/tcp_timer.h> 153 #include <netinet/tcp_var.h> 154 #include <netinet/tcp_private.h> 155 #include <netinet/tcp_congctl.h> 156 #include <netinet/tcpip.h> 157 #include <netinet/tcp_debug.h> 158 159 #include "opt_tcp_space.h" 160 161 #ifdef IPSEC 162 #include <netinet6/ipsec.h> 163 #endif /*IPSEC*/ 164 165 /* 166 * TCP protocol interface to socket abstraction. 167 */ 168 169 /* 170 * Process a TCP user request for TCP tb. If this is a send request 171 * then m is the mbuf chain of send data. If this is a timer expiration 172 * (called from the software clock routine), then timertype tells which timer. 173 */ 174 /*ARGSUSED*/ 175 int 176 tcp_usrreq(struct socket *so, int req, 177 struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) 178 { 179 struct inpcb *inp; 180 #ifdef INET6 181 struct in6pcb *in6p; 182 #endif 183 struct tcpcb *tp = NULL; 184 int s; 185 int error = 0; 186 #ifdef TCP_DEBUG 187 int ostate = 0; 188 #endif 189 int family; /* family of the socket */ 190 191 family = so->so_proto->pr_domain->dom_family; 192 193 if (req == PRU_CONTROL) { 194 switch (family) { 195 #ifdef INET 196 case PF_INET: 197 return (in_control(so, (long)m, (void *)nam, 198 (struct ifnet *)control, l)); 199 #endif 200 #ifdef INET6 201 case PF_INET6: 202 return (in6_control(so, (long)m, (void *)nam, 203 (struct ifnet *)control, l)); 204 #endif 205 default: 206 return EAFNOSUPPORT; 207 } 208 } 209 210 s = splsoftnet(); 211 212 if (req == PRU_PURGEIF) { 213 mutex_enter(softnet_lock); 214 switch (family) { 215 #ifdef INET 216 case PF_INET: 217 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 218 in_purgeif((struct ifnet *)control); 219 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 220 break; 221 #endif 222 #ifdef INET6 223 case PF_INET6: 224 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 225 in6_purgeif((struct ifnet *)control); 226 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 227 break; 228 #endif 229 default: 230 mutex_exit(softnet_lock); 231 splx(s); 232 return (EAFNOSUPPORT); 233 } 234 mutex_exit(softnet_lock); 235 splx(s); 236 return (0); 237 } 238 239 if (req == PRU_ATTACH) 240 sosetlock(so); 241 242 switch (family) { 243 #ifdef INET 244 case PF_INET: 245 inp = sotoinpcb(so); 246 #ifdef INET6 247 in6p = NULL; 248 #endif 249 break; 250 #endif 251 #ifdef INET6 252 case PF_INET6: 253 inp = NULL; 254 in6p = sotoin6pcb(so); 255 break; 256 #endif 257 default: 258 splx(s); 259 return EAFNOSUPPORT; 260 } 261 262 #ifdef DIAGNOSTIC 263 #ifdef INET6 264 if (inp && in6p) 265 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 266 #endif 267 if (req != PRU_SEND && req != PRU_SENDOOB && control) 268 panic("tcp_usrreq: unexpected control mbuf"); 269 #endif 270 /* 271 * When a TCP is attached to a socket, then there will be 272 * a (struct inpcb) pointed at by the socket, and this 273 * structure will point at a subsidary (struct tcpcb). 274 */ 275 #ifndef INET6 276 if (inp == 0 && req != PRU_ATTACH) 277 #else 278 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 279 #endif 280 { 281 error = EINVAL; 282 goto release; 283 } 284 #ifdef INET 285 if (inp) { 286 tp = intotcpcb(inp); 287 /* WHAT IF TP IS 0? */ 288 #ifdef KPROF 289 tcp_acounts[tp->t_state][req]++; 290 #endif 291 #ifdef TCP_DEBUG 292 ostate = tp->t_state; 293 #endif 294 } 295 #endif 296 #ifdef INET6 297 if (in6p) { 298 tp = in6totcpcb(in6p); 299 /* WHAT IF TP IS 0? */ 300 #ifdef KPROF 301 tcp_acounts[tp->t_state][req]++; 302 #endif 303 #ifdef TCP_DEBUG 304 ostate = tp->t_state; 305 #endif 306 } 307 #endif 308 309 switch (req) { 310 311 /* 312 * TCP attaches to socket via PRU_ATTACH, reserving space, 313 * and an internet control block. 314 */ 315 case PRU_ATTACH: 316 #ifndef INET6 317 if (inp != 0) 318 #else 319 if (inp != 0 || in6p != 0) 320 #endif 321 { 322 error = EISCONN; 323 break; 324 } 325 error = tcp_attach(so); 326 if (error) 327 break; 328 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 329 so->so_linger = TCP_LINGERTIME; 330 tp = sototcpcb(so); 331 break; 332 333 /* 334 * PRU_DETACH detaches the TCP protocol from the socket. 335 */ 336 case PRU_DETACH: 337 tp = tcp_disconnect(tp); 338 break; 339 340 /* 341 * Give the socket an address. 342 */ 343 case PRU_BIND: 344 switch (family) { 345 #ifdef INET 346 case PF_INET: 347 error = in_pcbbind(inp, nam, l); 348 break; 349 #endif 350 #ifdef INET6 351 case PF_INET6: 352 error = in6_pcbbind(in6p, nam, l); 353 if (!error) { 354 /* mapped addr case */ 355 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 356 tp->t_family = AF_INET; 357 else 358 tp->t_family = AF_INET6; 359 } 360 break; 361 #endif 362 } 363 break; 364 365 /* 366 * Prepare to accept connections. 367 */ 368 case PRU_LISTEN: 369 #ifdef INET 370 if (inp && inp->inp_lport == 0) { 371 error = in_pcbbind(inp, (struct mbuf *)0, l); 372 if (error) 373 break; 374 } 375 #endif 376 #ifdef INET6 377 if (in6p && in6p->in6p_lport == 0) { 378 error = in6_pcbbind(in6p, (struct mbuf *)0, 379 (struct lwp *)0); 380 if (error) 381 break; 382 } 383 #endif 384 tp->t_state = TCPS_LISTEN; 385 break; 386 387 /* 388 * Initiate connection to peer. 389 * Create a template for use in transmissions on this connection. 390 * Enter SYN_SENT state, and mark socket as connecting. 391 * Start keep-alive timer, and seed output sequence space. 392 * Send initial segment on connection. 393 */ 394 case PRU_CONNECT: 395 #ifdef INET 396 if (inp) { 397 if (inp->inp_lport == 0) { 398 error = in_pcbbind(inp, (struct mbuf *)0, l); 399 if (error) 400 break; 401 } 402 error = in_pcbconnect(inp, nam, l); 403 } 404 #endif 405 #ifdef INET6 406 if (in6p) { 407 if (in6p->in6p_lport == 0) { 408 error = in6_pcbbind(in6p, (struct mbuf *)0, 409 (struct lwp *)0); 410 if (error) 411 break; 412 } 413 error = in6_pcbconnect(in6p, nam, l); 414 if (!error) { 415 /* mapped addr case */ 416 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 417 tp->t_family = AF_INET; 418 else 419 tp->t_family = AF_INET6; 420 } 421 } 422 #endif 423 if (error) 424 break; 425 tp->t_template = tcp_template(tp); 426 if (tp->t_template == 0) { 427 #ifdef INET 428 if (inp) 429 in_pcbdisconnect(inp); 430 #endif 431 #ifdef INET6 432 if (in6p) 433 in6_pcbdisconnect(in6p); 434 #endif 435 error = ENOBUFS; 436 break; 437 } 438 /* 439 * Compute window scaling to request. 440 * XXX: This should be moved to tcp_output(). 441 */ 442 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 443 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 444 tp->request_r_scale++; 445 soisconnecting(so); 446 TCP_STATINC(TCP_STAT_CONNATTEMPT); 447 tp->t_state = TCPS_SYN_SENT; 448 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 449 tp->iss = tcp_new_iss(tp, 0); 450 tcp_sendseqinit(tp); 451 error = tcp_output(tp); 452 break; 453 454 /* 455 * Create a TCP connection between two sockets. 456 */ 457 case PRU_CONNECT2: 458 error = EOPNOTSUPP; 459 break; 460 461 /* 462 * Initiate disconnect from peer. 463 * If connection never passed embryonic stage, just drop; 464 * else if don't need to let data drain, then can just drop anyways, 465 * else have to begin TCP shutdown process: mark socket disconnecting, 466 * drain unread data, state switch to reflect user close, and 467 * send segment (e.g. FIN) to peer. Socket will be really disconnected 468 * when peer sends FIN and acks ours. 469 * 470 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 471 */ 472 case PRU_DISCONNECT: 473 tp = tcp_disconnect(tp); 474 break; 475 476 /* 477 * Accept a connection. Essentially all the work is 478 * done at higher levels; just return the address 479 * of the peer, storing through addr. 480 */ 481 case PRU_ACCEPT: 482 #ifdef INET 483 if (inp) 484 in_setpeeraddr(inp, nam); 485 #endif 486 #ifdef INET6 487 if (in6p) 488 in6_setpeeraddr(in6p, nam); 489 #endif 490 break; 491 492 /* 493 * Mark the connection as being incapable of further output. 494 */ 495 case PRU_SHUTDOWN: 496 socantsendmore(so); 497 tp = tcp_usrclosed(tp); 498 if (tp) 499 error = tcp_output(tp); 500 break; 501 502 /* 503 * After a receive, possibly send window update to peer. 504 */ 505 case PRU_RCVD: 506 /* 507 * soreceive() calls this function when a user receives 508 * ancillary data on a listening socket. We don't call 509 * tcp_output in such a case, since there is no header 510 * template for a listening socket and hence the kernel 511 * will panic. 512 */ 513 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 514 (void) tcp_output(tp); 515 break; 516 517 /* 518 * Do a send by putting data in output queue and updating urgent 519 * marker if URG set. Possibly send more data. 520 */ 521 case PRU_SEND: 522 if (control && control->m_len) { 523 m_freem(control); 524 m_freem(m); 525 error = EINVAL; 526 break; 527 } 528 sbappendstream(&so->so_snd, m); 529 error = tcp_output(tp); 530 break; 531 532 /* 533 * Abort the TCP. 534 */ 535 case PRU_ABORT: 536 tp = tcp_drop(tp, ECONNABORTED); 537 break; 538 539 case PRU_SENSE: 540 /* 541 * stat: don't bother with a blocksize. 542 */ 543 splx(s); 544 return (0); 545 546 case PRU_RCVOOB: 547 if (control && control->m_len) { 548 m_freem(control); 549 m_freem(m); 550 error = EINVAL; 551 break; 552 } 553 if ((so->so_oobmark == 0 && 554 (so->so_state & SS_RCVATMARK) == 0) || 555 so->so_options & SO_OOBINLINE || 556 tp->t_oobflags & TCPOOB_HADDATA) { 557 error = EINVAL; 558 break; 559 } 560 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 561 error = EWOULDBLOCK; 562 break; 563 } 564 m->m_len = 1; 565 *mtod(m, char *) = tp->t_iobc; 566 if (((long)nam & MSG_PEEK) == 0) 567 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 568 break; 569 570 case PRU_SENDOOB: 571 if (sbspace(&so->so_snd) < -512) { 572 m_freem(m); 573 error = ENOBUFS; 574 break; 575 } 576 /* 577 * According to RFC961 (Assigned Protocols), 578 * the urgent pointer points to the last octet 579 * of urgent data. We continue, however, 580 * to consider it to indicate the first octet 581 * of data past the urgent section. 582 * Otherwise, snd_up should be one lower. 583 */ 584 sbappendstream(&so->so_snd, m); 585 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 586 tp->t_force = 1; 587 error = tcp_output(tp); 588 tp->t_force = 0; 589 break; 590 591 case PRU_SOCKADDR: 592 #ifdef INET 593 if (inp) 594 in_setsockaddr(inp, nam); 595 #endif 596 #ifdef INET6 597 if (in6p) 598 in6_setsockaddr(in6p, nam); 599 #endif 600 break; 601 602 case PRU_PEERADDR: 603 #ifdef INET 604 if (inp) 605 in_setpeeraddr(inp, nam); 606 #endif 607 #ifdef INET6 608 if (in6p) 609 in6_setpeeraddr(in6p, nam); 610 #endif 611 break; 612 613 default: 614 panic("tcp_usrreq"); 615 } 616 #ifdef TCP_DEBUG 617 if (tp && (so->so_options & SO_DEBUG)) 618 tcp_trace(TA_USER, ostate, tp, NULL, req); 619 #endif 620 621 release: 622 splx(s); 623 return (error); 624 } 625 626 static void 627 change_keepalive(struct socket *so, struct tcpcb *tp) 628 { 629 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 630 TCP_TIMER_DISARM(tp, TCPT_KEEP); 631 TCP_TIMER_DISARM(tp, TCPT_2MSL); 632 633 if (tp->t_state == TCPS_SYN_RECEIVED || 634 tp->t_state == TCPS_SYN_SENT) { 635 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 636 } else if (so->so_options & SO_KEEPALIVE && 637 tp->t_state <= TCPS_CLOSE_WAIT) { 638 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 639 } else { 640 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 641 } 642 643 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 644 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 645 } 646 647 648 int 649 tcp_ctloutput(int op, struct socket *so, int level, int optname, 650 struct mbuf **mp) 651 { 652 int error = 0, s; 653 struct inpcb *inp; 654 #ifdef INET6 655 struct in6pcb *in6p; 656 #endif 657 struct tcpcb *tp; 658 struct mbuf *m; 659 int i; 660 u_int ui; 661 int family; /* family of the socket */ 662 663 family = so->so_proto->pr_domain->dom_family; 664 665 s = splsoftnet(); 666 switch (family) { 667 #ifdef INET 668 case PF_INET: 669 inp = sotoinpcb(so); 670 #ifdef INET6 671 in6p = NULL; 672 #endif 673 break; 674 #endif 675 #ifdef INET6 676 case PF_INET6: 677 inp = NULL; 678 in6p = sotoin6pcb(so); 679 break; 680 #endif 681 default: 682 splx(s); 683 panic("%s: af %d", __func__, family); 684 } 685 #ifndef INET6 686 if (inp == NULL) 687 #else 688 if (inp == NULL && in6p == NULL) 689 #endif 690 { 691 splx(s); 692 if (op == PRCO_SETOPT && *mp) 693 (void) m_free(*mp); 694 return (ECONNRESET); 695 } 696 if (level != IPPROTO_TCP) { 697 switch (family) { 698 #ifdef INET 699 case PF_INET: 700 error = ip_ctloutput(op, so, level, optname, mp); 701 break; 702 #endif 703 #ifdef INET6 704 case PF_INET6: 705 error = ip6_ctloutput(op, so, level, optname, mp); 706 break; 707 #endif 708 } 709 splx(s); 710 return (error); 711 } 712 if (inp) 713 tp = intotcpcb(inp); 714 #ifdef INET6 715 else if (in6p) 716 tp = in6totcpcb(in6p); 717 #endif 718 else 719 tp = NULL; 720 721 switch (op) { 722 723 case PRCO_SETOPT: 724 m = *mp; 725 switch (optname) { 726 727 #ifdef TCP_SIGNATURE 728 case TCP_MD5SIG: 729 if (m == NULL || m->m_len != sizeof(int)) 730 error = EINVAL; 731 if (error) 732 break; 733 if (*mtod(m, int *) > 0) 734 tp->t_flags |= TF_SIGNATURE; 735 else 736 tp->t_flags &= ~TF_SIGNATURE; 737 break; 738 #endif /* TCP_SIGNATURE */ 739 740 case TCP_NODELAY: 741 if (m == NULL || m->m_len != sizeof(int)) 742 error = EINVAL; 743 else if (*mtod(m, int *)) 744 tp->t_flags |= TF_NODELAY; 745 else 746 tp->t_flags &= ~TF_NODELAY; 747 break; 748 749 case TCP_MAXSEG: 750 if (m && m->m_len == sizeof(int) && 751 (i = *mtod(m, int *)) > 0 && 752 i <= tp->t_peermss) 753 tp->t_peermss = i; /* limit on send size */ 754 else 755 error = EINVAL; 756 break; 757 #ifdef notyet 758 case TCP_CONGCTL: 759 if (m == NULL) 760 error = EINVAL; 761 error = tcp_congctl_select(tp, mtod(m, char *)); 762 #endif 763 break; 764 765 case TCP_KEEPIDLE: 766 if (m && m->m_len == sizeof(u_int) && 767 (ui = *mtod(m, u_int *)) > 0) { 768 tp->t_keepidle = ui; 769 change_keepalive(so, tp); 770 } else 771 error = EINVAL; 772 break; 773 774 case TCP_KEEPINTVL: 775 if (m && m->m_len == sizeof(u_int) && 776 (ui = *mtod(m, u_int *)) > 0) { 777 tp->t_keepintvl = ui; 778 change_keepalive(so, tp); 779 } else 780 error = EINVAL; 781 break; 782 783 case TCP_KEEPCNT: 784 if (m && m->m_len == sizeof(u_int) && 785 (ui = *mtod(m, u_int *)) > 0) { 786 tp->t_keepcnt = ui; 787 change_keepalive(so, tp); 788 } else 789 error = EINVAL; 790 break; 791 792 case TCP_KEEPINIT: 793 if (m && m->m_len == sizeof(u_int) && 794 (ui = *mtod(m, u_int *)) > 0) { 795 tp->t_keepinit = ui; 796 change_keepalive(so, tp); 797 } else 798 error = EINVAL; 799 break; 800 801 default: 802 error = ENOPROTOOPT; 803 break; 804 } 805 if (m) 806 (void) m_free(m); 807 break; 808 809 case PRCO_GETOPT: 810 *mp = m = m_intopt(so, 0); 811 812 switch (optname) { 813 #ifdef TCP_SIGNATURE 814 case TCP_MD5SIG: 815 *mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 816 break; 817 #endif 818 case TCP_NODELAY: 819 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 820 break; 821 case TCP_MAXSEG: 822 *mtod(m, int *) = tp->t_peermss; 823 break; 824 #ifdef notyet 825 case TCP_CONGCTL: 826 break; 827 #endif 828 default: 829 error = ENOPROTOOPT; 830 break; 831 } 832 break; 833 } 834 splx(s); 835 return (error); 836 } 837 838 #ifndef TCP_SENDSPACE 839 #define TCP_SENDSPACE 1024*32 840 #endif 841 int tcp_sendspace = TCP_SENDSPACE; 842 #ifndef TCP_RECVSPACE 843 #define TCP_RECVSPACE 1024*32 844 #endif 845 int tcp_recvspace = TCP_RECVSPACE; 846 847 /* 848 * Attach TCP protocol to socket, allocating 849 * internet protocol control block, tcp control block, 850 * bufer space, and entering LISTEN state if to accept connections. 851 */ 852 int 853 tcp_attach(struct socket *so) 854 { 855 struct tcpcb *tp; 856 struct inpcb *inp; 857 #ifdef INET6 858 struct in6pcb *in6p; 859 #endif 860 int error; 861 int family; /* family of the socket */ 862 863 family = so->so_proto->pr_domain->dom_family; 864 865 #ifdef MBUFTRACE 866 so->so_mowner = &tcp_sock_mowner; 867 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 868 so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 869 #endif 870 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 871 error = soreserve(so, tcp_sendspace, tcp_recvspace); 872 if (error) 873 return (error); 874 } 875 876 so->so_rcv.sb_flags |= SB_AUTOSIZE; 877 so->so_snd.sb_flags |= SB_AUTOSIZE; 878 879 switch (family) { 880 #ifdef INET 881 case PF_INET: 882 error = in_pcballoc(so, &tcbtable); 883 if (error) 884 return (error); 885 inp = sotoinpcb(so); 886 #ifdef INET6 887 in6p = NULL; 888 #endif 889 break; 890 #endif 891 #ifdef INET6 892 case PF_INET6: 893 error = in6_pcballoc(so, &tcbtable); 894 if (error) 895 return (error); 896 inp = NULL; 897 in6p = sotoin6pcb(so); 898 break; 899 #endif 900 default: 901 return EAFNOSUPPORT; 902 } 903 if (inp) 904 tp = tcp_newtcpcb(family, (void *)inp); 905 #ifdef INET6 906 else if (in6p) 907 tp = tcp_newtcpcb(family, (void *)in6p); 908 #endif 909 else 910 tp = NULL; 911 912 if (tp == 0) { 913 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 914 915 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 916 #ifdef INET 917 if (inp) 918 in_pcbdetach(inp); 919 #endif 920 #ifdef INET6 921 if (in6p) 922 in6_pcbdetach(in6p); 923 #endif 924 so->so_state |= nofd; 925 return (ENOBUFS); 926 } 927 tp->t_state = TCPS_CLOSED; 928 return (0); 929 } 930 931 /* 932 * Initiate (or continue) disconnect. 933 * If embryonic state, just send reset (once). 934 * If in ``let data drain'' option and linger null, just drop. 935 * Otherwise (hard), mark socket disconnecting and drop 936 * current input data; switch states based on user close, and 937 * send segment to peer (with FIN). 938 */ 939 struct tcpcb * 940 tcp_disconnect(struct tcpcb *tp) 941 { 942 struct socket *so; 943 944 if (tp->t_inpcb) 945 so = tp->t_inpcb->inp_socket; 946 #ifdef INET6 947 else if (tp->t_in6pcb) 948 so = tp->t_in6pcb->in6p_socket; 949 #endif 950 else 951 so = NULL; 952 953 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 954 tp = tcp_close(tp); 955 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 956 tp = tcp_drop(tp, 0); 957 else { 958 soisdisconnecting(so); 959 sbflush(&so->so_rcv); 960 tp = tcp_usrclosed(tp); 961 if (tp) 962 (void) tcp_output(tp); 963 } 964 return (tp); 965 } 966 967 /* 968 * User issued close, and wish to trail through shutdown states: 969 * if never received SYN, just forget it. If got a SYN from peer, 970 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 971 * If already got a FIN from peer, then almost done; go to LAST_ACK 972 * state. In all other cases, have already sent FIN to peer (e.g. 973 * after PRU_SHUTDOWN), and just have to play tedious game waiting 974 * for peer to send FIN or not respond to keep-alives, etc. 975 * We can let the user exit from the close as soon as the FIN is acked. 976 */ 977 struct tcpcb * 978 tcp_usrclosed(struct tcpcb *tp) 979 { 980 981 switch (tp->t_state) { 982 983 case TCPS_CLOSED: 984 case TCPS_LISTEN: 985 case TCPS_SYN_SENT: 986 tp->t_state = TCPS_CLOSED; 987 tp = tcp_close(tp); 988 break; 989 990 case TCPS_SYN_RECEIVED: 991 case TCPS_ESTABLISHED: 992 tp->t_state = TCPS_FIN_WAIT_1; 993 break; 994 995 case TCPS_CLOSE_WAIT: 996 tp->t_state = TCPS_LAST_ACK; 997 break; 998 } 999 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1000 struct socket *so; 1001 if (tp->t_inpcb) 1002 so = tp->t_inpcb->inp_socket; 1003 #ifdef INET6 1004 else if (tp->t_in6pcb) 1005 so = tp->t_in6pcb->in6p_socket; 1006 #endif 1007 else 1008 so = NULL; 1009 if (so) 1010 soisdisconnected(so); 1011 /* 1012 * If we are in FIN_WAIT_2, we arrived here because the 1013 * application did a shutdown of the send side. Like the 1014 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1015 * a full close, we start a timer to make sure sockets are 1016 * not left in FIN_WAIT_2 forever. 1017 */ 1018 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1019 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1020 } 1021 return (tp); 1022 } 1023 1024 /* 1025 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1026 * than 32. 1027 */ 1028 static int 1029 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1030 { 1031 int error, mssdflt; 1032 struct sysctlnode node; 1033 1034 mssdflt = tcp_mssdflt; 1035 node = *rnode; 1036 node.sysctl_data = &mssdflt; 1037 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1038 if (error || newp == NULL) 1039 return (error); 1040 1041 if (mssdflt < 32) 1042 return (EINVAL); 1043 tcp_mssdflt = mssdflt; 1044 1045 return (0); 1046 } 1047 1048 /* 1049 * sysctl helper routine for setting port related values under 1050 * net.inet.ip and net.inet6.ip6. does basic range checking and does 1051 * additional checks for each type. this code has placed in 1052 * tcp_input.c since INET and INET6 both use the same tcp code. 1053 * 1054 * this helper is not static so that both inet and inet6 can use it. 1055 */ 1056 int 1057 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1058 { 1059 int error, tmp; 1060 int apmin, apmax; 1061 #ifndef IPNOPRIVPORTS 1062 int lpmin, lpmax; 1063 #endif /* IPNOPRIVPORTS */ 1064 struct sysctlnode node; 1065 1066 if (namelen != 0) 1067 return (EINVAL); 1068 1069 switch (name[-3]) { 1070 #ifdef INET 1071 case PF_INET: 1072 apmin = anonportmin; 1073 apmax = anonportmax; 1074 #ifndef IPNOPRIVPORTS 1075 lpmin = lowportmin; 1076 lpmax = lowportmax; 1077 #endif /* IPNOPRIVPORTS */ 1078 break; 1079 #endif /* INET */ 1080 #ifdef INET6 1081 case PF_INET6: 1082 apmin = ip6_anonportmin; 1083 apmax = ip6_anonportmax; 1084 #ifndef IPNOPRIVPORTS 1085 lpmin = ip6_lowportmin; 1086 lpmax = ip6_lowportmax; 1087 #endif /* IPNOPRIVPORTS */ 1088 break; 1089 #endif /* INET6 */ 1090 default: 1091 return (EINVAL); 1092 } 1093 1094 /* 1095 * insert temporary copy into node, perform lookup on 1096 * temporary, then restore pointer 1097 */ 1098 node = *rnode; 1099 tmp = *(int*)rnode->sysctl_data; 1100 node.sysctl_data = &tmp; 1101 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1102 if (error || newp == NULL) 1103 return (error); 1104 1105 /* 1106 * simple port range check 1107 */ 1108 if (tmp < 0 || tmp > 65535) 1109 return (EINVAL); 1110 1111 /* 1112 * per-node range checks 1113 */ 1114 switch (rnode->sysctl_num) { 1115 case IPCTL_ANONPORTMIN: 1116 if (tmp >= apmax) 1117 return (EINVAL); 1118 #ifndef IPNOPRIVPORTS 1119 if (tmp < IPPORT_RESERVED) 1120 return (EINVAL); 1121 #endif /* IPNOPRIVPORTS */ 1122 break; 1123 1124 case IPCTL_ANONPORTMAX: 1125 if (apmin >= tmp) 1126 return (EINVAL); 1127 #ifndef IPNOPRIVPORTS 1128 if (tmp < IPPORT_RESERVED) 1129 return (EINVAL); 1130 #endif /* IPNOPRIVPORTS */ 1131 break; 1132 1133 #ifndef IPNOPRIVPORTS 1134 case IPCTL_LOWPORTMIN: 1135 if (tmp >= lpmax || 1136 tmp > IPPORT_RESERVEDMAX || 1137 tmp < IPPORT_RESERVEDMIN) 1138 return (EINVAL); 1139 break; 1140 1141 case IPCTL_LOWPORTMAX: 1142 if (lpmin >= tmp || 1143 tmp > IPPORT_RESERVEDMAX || 1144 tmp < IPPORT_RESERVEDMIN) 1145 return (EINVAL); 1146 break; 1147 #endif /* IPNOPRIVPORTS */ 1148 1149 default: 1150 return (EINVAL); 1151 } 1152 1153 *(int*)rnode->sysctl_data = tmp; 1154 1155 return (0); 1156 } 1157 1158 /* 1159 * The superuser can drop any connection. Normal users can only drop 1160 * their own connections. 1161 */ 1162 static inline int 1163 check_sockuid(struct socket *sockp, kauth_cred_t cred) 1164 { 1165 uid_t sockuid; 1166 1167 sockuid = sockp->so_uidinfo->ui_uid; 1168 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0 || 1169 sockuid == kauth_cred_getuid(cred) || 1170 sockuid == kauth_cred_geteuid(cred)) 1171 return 0; 1172 return EACCES; 1173 } 1174 1175 static inline int 1176 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1177 { 1178 size_t sz; 1179 int error; 1180 uid_t uid; 1181 1182 uid = sockp->so_uidinfo->ui_uid; 1183 if (oldp) { 1184 sz = MIN(sizeof(uid), *oldlenp); 1185 error = copyout(&uid, oldp, sz); 1186 if (error) 1187 return error; 1188 } 1189 *oldlenp = sizeof(uid); 1190 return 0; 1191 } 1192 1193 static inline int 1194 inet4_ident_core(struct in_addr raddr, u_int rport, 1195 struct in_addr laddr, u_int lport, 1196 void *oldp, size_t *oldlenp, 1197 struct lwp *l, int dodrop) 1198 { 1199 struct inpcb *inp; 1200 struct socket *sockp; 1201 1202 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport); 1203 1204 if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1205 return ESRCH; 1206 1207 if (dodrop) { 1208 struct tcpcb *tp; 1209 1210 if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1211 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1212 return ESRCH; 1213 1214 if (check_sockuid(inp->inp_socket, l->l_cred) != 0) 1215 return EACCES; 1216 1217 (void)tcp_drop(tp, ECONNABORTED); 1218 return 0; 1219 } 1220 else 1221 return copyout_uid(sockp, oldp, oldlenp); 1222 } 1223 1224 #ifdef INET6 1225 static inline int 1226 inet6_ident_core(struct in6_addr *raddr, u_int rport, 1227 struct in6_addr *laddr, u_int lport, 1228 void *oldp, size_t *oldlenp, 1229 struct lwp *l, int dodrop) 1230 { 1231 struct in6pcb *in6p; 1232 struct socket *sockp; 1233 1234 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0); 1235 1236 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL) 1237 return ESRCH; 1238 1239 if (dodrop) { 1240 struct tcpcb *tp; 1241 1242 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL || 1243 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0) 1244 return ESRCH; 1245 1246 if (check_sockuid(in6p->in6p_socket, l->l_cred) != 0) 1247 return EACCES; 1248 1249 (void)tcp_drop(tp, ECONNABORTED); 1250 return 0; 1251 } 1252 else 1253 return copyout_uid(sockp, oldp, oldlenp); 1254 } 1255 #endif 1256 1257 /* 1258 * sysctl helper routine for the net.inet.tcp.drop and 1259 * net.inet6.tcp6.drop nodes. 1260 */ 1261 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1262 1263 /* 1264 * sysctl helper routine for the net.inet.tcp.ident and 1265 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1266 * old way of looking up the ident information for ipv4 which involves 1267 * stuffing the port/addr pairs into the mib lookup. 1268 */ 1269 static int 1270 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1271 { 1272 #ifdef INET 1273 struct sockaddr_in *si4[2]; 1274 #endif /* INET */ 1275 #ifdef INET6 1276 struct sockaddr_in6 *si6[2]; 1277 #endif /* INET6 */ 1278 struct sockaddr_storage sa[2]; 1279 int error, pf, dodrop, s; 1280 1281 dodrop = name[-1] == TCPCTL_DROP; 1282 if (dodrop) { 1283 if (oldp != NULL || *oldlenp != 0) 1284 return EINVAL; 1285 if (newp == NULL) 1286 return EPERM; 1287 if (newlen < sizeof(sa)) 1288 return ENOMEM; 1289 } 1290 if (namelen != 4 && namelen != 0) 1291 return EINVAL; 1292 if (name[-2] != IPPROTO_TCP) 1293 return EINVAL; 1294 pf = name[-3]; 1295 1296 /* old style lookup, ipv4 only */ 1297 if (namelen == 4) { 1298 #ifdef INET 1299 struct in_addr laddr, raddr; 1300 u_int lport, rport; 1301 1302 if (pf != PF_INET) 1303 return EPROTONOSUPPORT; 1304 raddr.s_addr = (uint32_t)name[0]; 1305 rport = (u_int)name[1]; 1306 laddr.s_addr = (uint32_t)name[2]; 1307 lport = (u_int)name[3]; 1308 1309 s = splsoftnet(); 1310 error = inet4_ident_core(raddr, rport, laddr, lport, 1311 oldp, oldlenp, l, dodrop); 1312 splx(s); 1313 return error; 1314 #else /* INET */ 1315 return EINVAL; 1316 #endif /* INET */ 1317 } 1318 1319 if (newp == NULL || newlen != sizeof(sa)) 1320 return EINVAL; 1321 error = copyin(newp, &sa, newlen); 1322 if (error) 1323 return error; 1324 1325 /* 1326 * requested families must match 1327 */ 1328 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1329 return EINVAL; 1330 1331 switch (pf) { 1332 #ifdef INET6 1333 case PF_INET6: 1334 si6[0] = (struct sockaddr_in6*)&sa[0]; 1335 si6[1] = (struct sockaddr_in6*)&sa[1]; 1336 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1337 si6[1]->sin6_len != sizeof(*si6[1])) 1338 return EINVAL; 1339 1340 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1341 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1342 error = sa6_embedscope(si6[0], ip6_use_defzone); 1343 if (error) 1344 return error; 1345 error = sa6_embedscope(si6[1], ip6_use_defzone); 1346 if (error) 1347 return error; 1348 1349 s = splsoftnet(); 1350 error = inet6_ident_core(&si6[0]->sin6_addr, 1351 si6[0]->sin6_port, &si6[1]->sin6_addr, 1352 si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1353 splx(s); 1354 return error; 1355 } 1356 1357 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1358 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1359 return EINVAL; 1360 1361 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1362 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1363 /*FALLTHROUGH*/ 1364 #endif /* INET6 */ 1365 #ifdef INET 1366 case PF_INET: 1367 si4[0] = (struct sockaddr_in*)&sa[0]; 1368 si4[1] = (struct sockaddr_in*)&sa[1]; 1369 if (si4[0]->sin_len != sizeof(*si4[0]) || 1370 si4[0]->sin_len != sizeof(*si4[1])) 1371 return EINVAL; 1372 1373 s = splsoftnet(); 1374 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1375 si4[1]->sin_addr, si4[1]->sin_port, 1376 oldp, oldlenp, l, dodrop); 1377 splx(s); 1378 return error; 1379 #endif /* INET */ 1380 default: 1381 return EPROTONOSUPPORT; 1382 } 1383 } 1384 1385 /* 1386 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1387 * inet/inet6, as well as raw pcbs for each. specifically not 1388 * declared static so that raw sockets and udp/udp6 can use it as 1389 * well. 1390 */ 1391 int 1392 sysctl_inpcblist(SYSCTLFN_ARGS) 1393 { 1394 #ifdef INET 1395 struct sockaddr_in *in; 1396 const struct inpcb *inp; 1397 #endif 1398 #ifdef INET6 1399 struct sockaddr_in6 *in6; 1400 const struct in6pcb *in6p; 1401 #endif 1402 /* 1403 * sysctl_data is const, but CIRCLEQ_FOREACH can't use a const 1404 * struct inpcbtable pointer, so we have to discard const. :-/ 1405 */ 1406 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1407 const struct inpcb_hdr *inph; 1408 struct tcpcb *tp; 1409 struct kinfo_pcb pcb; 1410 char *dp; 1411 u_int op, arg; 1412 size_t len, needed, elem_size, out_size; 1413 int error, elem_count, pf, proto, pf2; 1414 1415 if (namelen != 4) 1416 return (EINVAL); 1417 1418 if (oldp != NULL) { 1419 len = *oldlenp; 1420 elem_size = name[2]; 1421 elem_count = name[3]; 1422 if (elem_size != sizeof(pcb)) 1423 return EINVAL; 1424 } else { 1425 len = 0; 1426 elem_count = INT_MAX; 1427 elem_size = sizeof(pcb); 1428 } 1429 error = 0; 1430 dp = oldp; 1431 op = name[0]; 1432 arg = name[1]; 1433 out_size = elem_size; 1434 needed = 0; 1435 1436 if (namelen == 1 && name[0] == CTL_QUERY) 1437 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1438 1439 if (name - oname != 4) 1440 return (EINVAL); 1441 1442 pf = oname[1]; 1443 proto = oname[2]; 1444 pf2 = (oldp != NULL) ? pf : 0; 1445 1446 CIRCLEQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1447 #ifdef INET 1448 inp = (const struct inpcb *)inph; 1449 #endif 1450 #ifdef INET6 1451 in6p = (const struct in6pcb *)inph; 1452 #endif 1453 1454 if (inph->inph_af != pf) 1455 continue; 1456 1457 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1458 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL, 1459 NULL) != 0) 1460 continue; 1461 1462 memset(&pcb, 0, sizeof(pcb)); 1463 1464 pcb.ki_family = pf; 1465 pcb.ki_type = proto; 1466 1467 switch (pf2) { 1468 case 0: 1469 /* just probing for size */ 1470 break; 1471 #ifdef INET 1472 case PF_INET: 1473 pcb.ki_family = inp->inp_socket->so_proto-> 1474 pr_domain->dom_family; 1475 pcb.ki_type = inp->inp_socket->so_proto-> 1476 pr_type; 1477 pcb.ki_protocol = inp->inp_socket->so_proto-> 1478 pr_protocol; 1479 pcb.ki_pflags = inp->inp_flags; 1480 1481 pcb.ki_sostate = inp->inp_socket->so_state; 1482 pcb.ki_prstate = inp->inp_state; 1483 if (proto == IPPROTO_TCP) { 1484 tp = intotcpcb(inp); 1485 pcb.ki_tstate = tp->t_state; 1486 pcb.ki_tflags = tp->t_flags; 1487 } 1488 1489 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1490 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1491 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1492 1493 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1494 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1495 1496 in = satosin(&pcb.ki_src); 1497 in->sin_len = sizeof(*in); 1498 in->sin_family = pf; 1499 in->sin_port = inp->inp_lport; 1500 in->sin_addr = inp->inp_laddr; 1501 if (pcb.ki_prstate >= INP_CONNECTED) { 1502 in = satosin(&pcb.ki_dst); 1503 in->sin_len = sizeof(*in); 1504 in->sin_family = pf; 1505 in->sin_port = inp->inp_fport; 1506 in->sin_addr = inp->inp_faddr; 1507 } 1508 break; 1509 #endif 1510 #ifdef INET6 1511 case PF_INET6: 1512 pcb.ki_family = in6p->in6p_socket->so_proto-> 1513 pr_domain->dom_family; 1514 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1515 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1516 pr_protocol; 1517 pcb.ki_pflags = in6p->in6p_flags; 1518 1519 pcb.ki_sostate = in6p->in6p_socket->so_state; 1520 pcb.ki_prstate = in6p->in6p_state; 1521 if (proto == IPPROTO_TCP) { 1522 tp = in6totcpcb(in6p); 1523 pcb.ki_tstate = tp->t_state; 1524 pcb.ki_tflags = tp->t_flags; 1525 } 1526 1527 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1528 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1529 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1530 1531 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1532 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1533 1534 in6 = satosin6(&pcb.ki_src); 1535 in6->sin6_len = sizeof(*in6); 1536 in6->sin6_family = pf; 1537 in6->sin6_port = in6p->in6p_lport; 1538 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1539 in6->sin6_addr = in6p->in6p_laddr; 1540 in6->sin6_scope_id = 0; /* XXX? */ 1541 1542 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1543 in6 = satosin6(&pcb.ki_dst); 1544 in6->sin6_len = sizeof(*in6); 1545 in6->sin6_family = pf; 1546 in6->sin6_port = in6p->in6p_fport; 1547 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1548 in6->sin6_addr = in6p->in6p_faddr; 1549 in6->sin6_scope_id = 0; /* XXX? */ 1550 } 1551 break; 1552 #endif 1553 } 1554 1555 if (len >= elem_size && elem_count > 0) { 1556 error = copyout(&pcb, dp, out_size); 1557 if (error) 1558 return (error); 1559 dp += elem_size; 1560 len -= elem_size; 1561 } 1562 if (elem_count > 0) { 1563 needed += elem_size; 1564 if (elem_count != INT_MAX) 1565 elem_count--; 1566 } 1567 } 1568 1569 *oldlenp = needed; 1570 if (oldp == NULL) 1571 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1572 1573 return (error); 1574 } 1575 1576 static int 1577 sysctl_tcp_congctl(SYSCTLFN_ARGS) 1578 { 1579 struct sysctlnode node; 1580 int error, r; 1581 char newname[TCPCC_MAXLEN]; 1582 1583 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1584 1585 node = *rnode; 1586 node.sysctl_data = newname; 1587 node.sysctl_size = sizeof(newname); 1588 1589 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1590 1591 if (error || 1592 newp == NULL || 1593 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1594 return error; 1595 1596 if ((r = tcp_congctl_select(NULL, newname))) 1597 return r; 1598 1599 return error; 1600 } 1601 1602 static int 1603 sysctl_tcp_keep(SYSCTLFN_ARGS) 1604 { 1605 int error; 1606 u_int tmp; 1607 struct sysctlnode node; 1608 1609 node = *rnode; 1610 tmp = *(u_int *)rnode->sysctl_data; 1611 node.sysctl_data = &tmp; 1612 1613 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1614 if (error || newp == NULL) 1615 return error; 1616 1617 *(u_int *)rnode->sysctl_data = tmp; 1618 tcp_tcpcb_template(); /* update the template */ 1619 return 0; 1620 } 1621 1622 static int 1623 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 1624 { 1625 netstat_sysctl_context ctx; 1626 uint64_t tcps[TCP_NSTATS]; 1627 1628 ctx.ctx_stat = tcpstat_percpu; 1629 ctx.ctx_counters = tcps; 1630 ctx.ctx_ncounters = TCP_NSTATS; 1631 return (NETSTAT_SYSCTL(&ctx)); 1632 } 1633 1634 /* 1635 * this (second stage) setup routine is a replacement for tcp_sysctl() 1636 * (which is currently used for ipv4 and ipv6) 1637 */ 1638 static void 1639 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1640 const char *tcpname) 1641 { 1642 const struct sysctlnode *sack_node; 1643 const struct sysctlnode *abc_node; 1644 const struct sysctlnode *ecn_node; 1645 const struct sysctlnode *congctl_node; 1646 #ifdef TCP_DEBUG 1647 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1648 extern int tcp_debx; 1649 #endif 1650 1651 sysctl_createv(clog, 0, NULL, NULL, 1652 CTLFLAG_PERMANENT, 1653 CTLTYPE_NODE, "net", NULL, 1654 NULL, 0, NULL, 0, 1655 CTL_NET, CTL_EOL); 1656 sysctl_createv(clog, 0, NULL, NULL, 1657 CTLFLAG_PERMANENT, 1658 CTLTYPE_NODE, pfname, NULL, 1659 NULL, 0, NULL, 0, 1660 CTL_NET, pf, CTL_EOL); 1661 sysctl_createv(clog, 0, NULL, NULL, 1662 CTLFLAG_PERMANENT, 1663 CTLTYPE_NODE, tcpname, 1664 SYSCTL_DESCR("TCP related settings"), 1665 NULL, 0, NULL, 0, 1666 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1667 1668 sysctl_createv(clog, 0, NULL, NULL, 1669 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1670 CTLTYPE_INT, "rfc1323", 1671 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1672 NULL, 0, &tcp_do_rfc1323, 0, 1673 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1674 sysctl_createv(clog, 0, NULL, NULL, 1675 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1676 CTLTYPE_INT, "sendspace", 1677 SYSCTL_DESCR("Default TCP send buffer size"), 1678 NULL, 0, &tcp_sendspace, 0, 1679 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1680 sysctl_createv(clog, 0, NULL, NULL, 1681 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1682 CTLTYPE_INT, "recvspace", 1683 SYSCTL_DESCR("Default TCP receive buffer size"), 1684 NULL, 0, &tcp_recvspace, 0, 1685 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1686 sysctl_createv(clog, 0, NULL, NULL, 1687 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1688 CTLTYPE_INT, "mssdflt", 1689 SYSCTL_DESCR("Default maximum segment size"), 1690 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1691 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1692 sysctl_createv(clog, 0, NULL, NULL, 1693 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1694 CTLTYPE_INT, "minmss", 1695 SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 1696 NULL, 0, &tcp_minmss, 0, 1697 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1698 sysctl_createv(clog, 0, NULL, NULL, 1699 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1700 CTLTYPE_INT, "syn_cache_limit", 1701 SYSCTL_DESCR("Maximum number of entries in the TCP " 1702 "compressed state engine"), 1703 NULL, 0, &tcp_syn_cache_limit, 0, 1704 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1705 CTL_EOL); 1706 sysctl_createv(clog, 0, NULL, NULL, 1707 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1708 CTLTYPE_INT, "syn_bucket_limit", 1709 SYSCTL_DESCR("Maximum number of entries per hash " 1710 "bucket in the TCP compressed state " 1711 "engine"), 1712 NULL, 0, &tcp_syn_bucket_limit, 0, 1713 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1714 CTL_EOL); 1715 #if 0 /* obsoleted */ 1716 sysctl_createv(clog, 0, NULL, NULL, 1717 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1718 CTLTYPE_INT, "syn_cache_interval", 1719 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1720 NULL, 0, &tcp_syn_cache_interval, 0, 1721 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1722 CTL_EOL); 1723 #endif 1724 sysctl_createv(clog, 0, NULL, NULL, 1725 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1726 CTLTYPE_INT, "init_win", 1727 SYSCTL_DESCR("Initial TCP congestion window"), 1728 NULL, 0, &tcp_init_win, 0, 1729 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1730 sysctl_createv(clog, 0, NULL, NULL, 1731 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1732 CTLTYPE_INT, "mss_ifmtu", 1733 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1734 NULL, 0, &tcp_mss_ifmtu, 0, 1735 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1736 sysctl_createv(clog, 0, NULL, &sack_node, 1737 CTLFLAG_PERMANENT, 1738 CTLTYPE_NODE, "sack", 1739 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1740 NULL, 0, NULL, 0, 1741 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1742 1743 /* Congctl subtree */ 1744 sysctl_createv(clog, 0, NULL, &congctl_node, 1745 CTLFLAG_PERMANENT, 1746 CTLTYPE_NODE, "congctl", 1747 SYSCTL_DESCR("TCP Congestion Control"), 1748 NULL, 0, NULL, 0, 1749 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1750 sysctl_createv(clog, 0, &congctl_node, NULL, 1751 CTLFLAG_PERMANENT, 1752 CTLTYPE_STRING, "available", 1753 SYSCTL_DESCR("Available Congestion Control Mechanisms"), 1754 NULL, 0, &tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 1755 sysctl_createv(clog, 0, &congctl_node, NULL, 1756 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1757 CTLTYPE_STRING, "selected", 1758 SYSCTL_DESCR("Selected Congestion Control Mechanism"), 1759 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 1760 CTL_CREATE, CTL_EOL); 1761 1762 sysctl_createv(clog, 0, NULL, NULL, 1763 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1764 CTLTYPE_INT, "win_scale", 1765 SYSCTL_DESCR("Use RFC1323 window scale options"), 1766 NULL, 0, &tcp_do_win_scale, 0, 1767 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1768 sysctl_createv(clog, 0, NULL, NULL, 1769 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1770 CTLTYPE_INT, "timestamps", 1771 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1772 NULL, 0, &tcp_do_timestamps, 0, 1773 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1774 sysctl_createv(clog, 0, NULL, NULL, 1775 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1776 CTLTYPE_INT, "compat_42", 1777 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1778 NULL, 0, &tcp_compat_42, 0, 1779 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1780 sysctl_createv(clog, 0, NULL, NULL, 1781 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1782 CTLTYPE_INT, "cwm", 1783 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1784 "Monitoring"), 1785 NULL, 0, &tcp_cwm, 0, 1786 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1787 sysctl_createv(clog, 0, NULL, NULL, 1788 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1789 CTLTYPE_INT, "cwm_burstsize", 1790 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1791 "burst count in packets"), 1792 NULL, 0, &tcp_cwm_burstsize, 0, 1793 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1794 CTL_EOL); 1795 sysctl_createv(clog, 0, NULL, NULL, 1796 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1797 CTLTYPE_INT, "ack_on_push", 1798 SYSCTL_DESCR("Immediately return ACK when PSH is " 1799 "received"), 1800 NULL, 0, &tcp_ack_on_push, 0, 1801 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1802 sysctl_createv(clog, 0, NULL, NULL, 1803 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1804 CTLTYPE_INT, "keepidle", 1805 SYSCTL_DESCR("Allowed connection idle ticks before a " 1806 "keepalive probe is sent"), 1807 sysctl_tcp_keep, 0, &tcp_keepidle, 0, 1808 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1809 sysctl_createv(clog, 0, NULL, NULL, 1810 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1811 CTLTYPE_INT, "keepintvl", 1812 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1813 sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 1814 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1815 sysctl_createv(clog, 0, NULL, NULL, 1816 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1817 CTLTYPE_INT, "keepcnt", 1818 SYSCTL_DESCR("Number of keepalive probes to send"), 1819 sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 1820 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1821 sysctl_createv(clog, 0, NULL, NULL, 1822 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1823 CTLTYPE_INT, "slowhz", 1824 SYSCTL_DESCR("Keepalive ticks per second"), 1825 NULL, PR_SLOWHZ, NULL, 0, 1826 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1827 sysctl_createv(clog, 0, NULL, NULL, 1828 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1829 CTLTYPE_INT, "log_refused", 1830 SYSCTL_DESCR("Log refused TCP connections"), 1831 NULL, 0, &tcp_log_refused, 0, 1832 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1833 #if 0 /* obsoleted */ 1834 sysctl_createv(clog, 0, NULL, NULL, 1835 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1836 CTLTYPE_INT, "rstratelimit", NULL, 1837 NULL, 0, &tcp_rst_ratelim, 0, 1838 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1839 #endif 1840 sysctl_createv(clog, 0, NULL, NULL, 1841 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1842 CTLTYPE_INT, "rstppslimit", 1843 SYSCTL_DESCR("Maximum number of RST packets to send " 1844 "per second"), 1845 NULL, 0, &tcp_rst_ppslim, 0, 1846 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1847 sysctl_createv(clog, 0, NULL, NULL, 1848 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1849 CTLTYPE_INT, "delack_ticks", 1850 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1851 NULL, 0, &tcp_delack_ticks, 0, 1852 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1853 sysctl_createv(clog, 0, NULL, NULL, 1854 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1855 CTLTYPE_INT, "init_win_local", 1856 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1857 NULL, 0, &tcp_init_win_local, 0, 1858 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1859 CTL_EOL); 1860 sysctl_createv(clog, 0, NULL, NULL, 1861 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1862 CTLTYPE_STRUCT, "ident", 1863 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1864 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1865 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1866 sysctl_createv(clog, 0, NULL, NULL, 1867 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1868 CTLTYPE_INT, "do_loopback_cksum", 1869 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1870 NULL, 0, &tcp_do_loopback_cksum, 0, 1871 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1872 CTL_EOL); 1873 sysctl_createv(clog, 0, NULL, NULL, 1874 CTLFLAG_PERMANENT, 1875 CTLTYPE_STRUCT, "pcblist", 1876 SYSCTL_DESCR("TCP protocol control block list"), 1877 sysctl_inpcblist, 0, &tcbtable, 0, 1878 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1879 CTL_EOL); 1880 sysctl_createv(clog, 0, NULL, NULL, 1881 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1882 CTLTYPE_INT, "keepinit", 1883 SYSCTL_DESCR("Ticks before initial tcp connection times out"), 1884 sysctl_tcp_keep, 0, &tcp_keepinit, 0, 1885 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1886 1887 /* TCP socket buffers auto-sizing nodes */ 1888 sysctl_createv(clog, 0, NULL, NULL, 1889 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1890 CTLTYPE_INT, "recvbuf_auto", 1891 SYSCTL_DESCR("Enable automatic receive " 1892 "buffer sizing (experimental)"), 1893 NULL, 0, &tcp_do_autorcvbuf, 0, 1894 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1895 sysctl_createv(clog, 0, NULL, NULL, 1896 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1897 CTLTYPE_INT, "recvbuf_inc", 1898 SYSCTL_DESCR("Incrementor step size of " 1899 "automatic receive buffer"), 1900 NULL, 0, &tcp_autorcvbuf_inc, 0, 1901 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1902 sysctl_createv(clog, 0, NULL, NULL, 1903 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1904 CTLTYPE_INT, "recvbuf_max", 1905 SYSCTL_DESCR("Max size of automatic receive buffer"), 1906 NULL, 0, &tcp_autorcvbuf_max, 0, 1907 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1908 1909 sysctl_createv(clog, 0, NULL, NULL, 1910 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1911 CTLTYPE_INT, "sendbuf_auto", 1912 SYSCTL_DESCR("Enable automatic send " 1913 "buffer sizing (experimental)"), 1914 NULL, 0, &tcp_do_autosndbuf, 0, 1915 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1916 sysctl_createv(clog, 0, NULL, NULL, 1917 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1918 CTLTYPE_INT, "sendbuf_inc", 1919 SYSCTL_DESCR("Incrementor step size of " 1920 "automatic send buffer"), 1921 NULL, 0, &tcp_autosndbuf_inc, 0, 1922 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1923 sysctl_createv(clog, 0, NULL, NULL, 1924 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1925 CTLTYPE_INT, "sendbuf_max", 1926 SYSCTL_DESCR("Max size of automatic send buffer"), 1927 NULL, 0, &tcp_autosndbuf_max, 0, 1928 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1929 1930 /* ECN subtree */ 1931 sysctl_createv(clog, 0, NULL, &ecn_node, 1932 CTLFLAG_PERMANENT, 1933 CTLTYPE_NODE, "ecn", 1934 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 1935 NULL, 0, NULL, 0, 1936 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1937 sysctl_createv(clog, 0, &ecn_node, NULL, 1938 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1939 CTLTYPE_INT, "enable", 1940 SYSCTL_DESCR("Enable TCP Explicit Congestion " 1941 "Notification"), 1942 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 1943 sysctl_createv(clog, 0, &ecn_node, NULL, 1944 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1945 CTLTYPE_INT, "maxretries", 1946 SYSCTL_DESCR("Number of times to retry ECN setup " 1947 "before disabling ECN on the connection"), 1948 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 1949 1950 /* SACK gets it's own little subtree. */ 1951 sysctl_createv(clog, 0, NULL, &sack_node, 1952 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1953 CTLTYPE_INT, "enable", 1954 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 1955 NULL, 0, &tcp_do_sack, 0, 1956 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1957 sysctl_createv(clog, 0, NULL, &sack_node, 1958 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1959 CTLTYPE_INT, "maxholes", 1960 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 1961 NULL, 0, &tcp_sack_tp_maxholes, 0, 1962 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1963 sysctl_createv(clog, 0, NULL, &sack_node, 1964 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1965 CTLTYPE_INT, "globalmaxholes", 1966 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 1967 NULL, 0, &tcp_sack_globalmaxholes, 0, 1968 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1969 sysctl_createv(clog, 0, NULL, &sack_node, 1970 CTLFLAG_PERMANENT, 1971 CTLTYPE_INT, "globalholes", 1972 SYSCTL_DESCR("Global number of TCP SACK holes"), 1973 NULL, 0, &tcp_sack_globalholes, 0, 1974 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 1975 1976 sysctl_createv(clog, 0, NULL, NULL, 1977 CTLFLAG_PERMANENT, 1978 CTLTYPE_STRUCT, "stats", 1979 SYSCTL_DESCR("TCP statistics"), 1980 sysctl_net_inet_tcp_stats, 0, NULL, 0, 1981 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 1982 CTL_EOL); 1983 #ifdef TCP_DEBUG 1984 sysctl_createv(clog, 0, NULL, NULL, 1985 CTLFLAG_PERMANENT, 1986 CTLTYPE_STRUCT, "debug", 1987 SYSCTL_DESCR("TCP sockets debug information"), 1988 NULL, 0, &tcp_debug, sizeof(tcp_debug), 1989 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 1990 CTL_EOL); 1991 sysctl_createv(clog, 0, NULL, NULL, 1992 CTLFLAG_PERMANENT, 1993 CTLTYPE_INT, "debx", 1994 SYSCTL_DESCR("Number of TCP debug sockets messages"), 1995 NULL, 0, &tcp_debx, sizeof(tcp_debx), 1996 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 1997 CTL_EOL); 1998 #endif 1999 sysctl_createv(clog, 0, NULL, NULL, 2000 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2001 CTLTYPE_STRUCT, "drop", 2002 SYSCTL_DESCR("TCP drop connection"), 2003 sysctl_net_inet_tcp_drop, 0, NULL, 0, 2004 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2005 #if NRND > 0 2006 sysctl_createv(clog, 0, NULL, NULL, 2007 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2008 CTLTYPE_INT, "iss_hash", 2009 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2010 "hash computation"), 2011 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2012 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2013 CTL_EOL); 2014 #endif 2015 2016 /* ABC subtree */ 2017 2018 sysctl_createv(clog, 0, NULL, &abc_node, 2019 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2020 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2021 NULL, 0, NULL, 0, 2022 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2023 sysctl_createv(clog, 0, &abc_node, NULL, 2024 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2025 CTLTYPE_INT, "enable", 2026 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2027 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2028 sysctl_createv(clog, 0, &abc_node, NULL, 2029 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2030 CTLTYPE_INT, "aggressive", 2031 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2032 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2033 } 2034 2035 /* 2036 * Sysctl for tcp variables. 2037 */ 2038 #ifdef INET 2039 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup") 2040 { 2041 2042 sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp"); 2043 } 2044 #endif /* INET */ 2045 2046 #ifdef INET6 2047 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup") 2048 { 2049 2050 sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6"); 2051 } 2052 #endif /* INET6 */ 2053