1 /* $NetBSD: tcp_usrreq.c,v 1.177 2014/05/22 00:28:32 rmind Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 */ 65 66 /* 67 * Copyright (c) 1982, 1986, 1988, 1993, 1995 68 * The Regents of the University of California. All rights reserved. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 3. Neither the name of the University nor the names of its contributors 79 * may be used to endorse or promote products derived from this software 80 * without specific prior written permission. 81 * 82 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 * SUCH DAMAGE. 93 * 94 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 95 */ 96 97 /* 98 * TCP protocol interface to socket abstraction. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.177 2014/05/22 00:28:32 rmind Exp $"); 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 #include "opt_tcp_debug.h" 107 #include "opt_mbuftrace.h" 108 109 #include <sys/param.h> 110 #include <sys/systm.h> 111 #include <sys/kernel.h> 112 #include <sys/malloc.h> 113 #include <sys/mbuf.h> 114 #include <sys/socket.h> 115 #include <sys/socketvar.h> 116 #include <sys/protosw.h> 117 #include <sys/errno.h> 118 #include <sys/stat.h> 119 #include <sys/proc.h> 120 #include <sys/domain.h> 121 #include <sys/sysctl.h> 122 #include <sys/kauth.h> 123 #include <sys/uidinfo.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 #include <netinet/in_offload.h> 135 136 #ifdef INET6 137 #ifndef INET 138 #include <netinet/in.h> 139 #endif 140 #include <netinet/ip6.h> 141 #include <netinet6/in6_pcb.h> 142 #include <netinet6/ip6_var.h> 143 #include <netinet6/scope6_var.h> 144 #endif 145 146 #include <netinet/tcp.h> 147 #include <netinet/tcp_fsm.h> 148 #include <netinet/tcp_seq.h> 149 #include <netinet/tcp_timer.h> 150 #include <netinet/tcp_var.h> 151 #include <netinet/tcp_private.h> 152 #include <netinet/tcp_congctl.h> 153 #include <netinet/tcpip.h> 154 #include <netinet/tcp_debug.h> 155 #include <netinet/tcp_vtw.h> 156 157 #include "opt_tcp_space.h" 158 159 /* 160 * Process a TCP user request for TCP tb. If this is a send request 161 * then m is the mbuf chain of send data. If this is a timer expiration 162 * (called from the software clock routine), then timertype tells which timer. 163 */ 164 static int 165 tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 166 struct mbuf *control, struct lwp *l) 167 { 168 struct inpcb *inp; 169 #ifdef INET6 170 struct in6pcb *in6p; 171 #endif 172 struct tcpcb *tp = NULL; 173 int s; 174 int error = 0; 175 #ifdef TCP_DEBUG 176 int ostate = 0; 177 #endif 178 int family; /* family of the socket */ 179 180 KASSERT(req != PRU_ATTACH); 181 KASSERT(req != PRU_DETACH); 182 183 family = so->so_proto->pr_domain->dom_family; 184 185 if (req == PRU_CONTROL) { 186 switch (family) { 187 #ifdef INET 188 case PF_INET: 189 return (in_control(so, (long)m, (void *)nam, 190 (struct ifnet *)control, l)); 191 #endif 192 #ifdef INET6 193 case PF_INET6: 194 return (in6_control(so, (long)m, (void *)nam, 195 (struct ifnet *)control, l)); 196 #endif 197 default: 198 return EAFNOSUPPORT; 199 } 200 } 201 202 s = splsoftnet(); 203 204 if (req == PRU_PURGEIF) { 205 mutex_enter(softnet_lock); 206 switch (family) { 207 #ifdef INET 208 case PF_INET: 209 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 210 in_purgeif((struct ifnet *)control); 211 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 212 break; 213 #endif 214 #ifdef INET6 215 case PF_INET6: 216 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 217 in6_purgeif((struct ifnet *)control); 218 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 219 break; 220 #endif 221 default: 222 mutex_exit(softnet_lock); 223 splx(s); 224 return (EAFNOSUPPORT); 225 } 226 mutex_exit(softnet_lock); 227 splx(s); 228 return (0); 229 } 230 231 KASSERT(solocked(so)); 232 233 switch (family) { 234 #ifdef INET 235 case PF_INET: 236 inp = sotoinpcb(so); 237 #ifdef INET6 238 in6p = NULL; 239 #endif 240 break; 241 #endif 242 #ifdef INET6 243 case PF_INET6: 244 inp = NULL; 245 in6p = sotoin6pcb(so); 246 break; 247 #endif 248 default: 249 splx(s); 250 return EAFNOSUPPORT; 251 } 252 KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB)); 253 #ifdef INET6 254 /* XXX: KASSERT((inp != NULL) ^ (in6p != NULL)); */ 255 #endif 256 /* 257 * When a TCP is attached to a socket, then there will be 258 * a (struct inpcb) pointed at by the socket, and this 259 * structure will point at a subsidary (struct tcpcb). 260 */ 261 if ((inp == NULL 262 #ifdef INET6 263 && in6p == NULL 264 #endif 265 ) && req != PRU_SENSE) 266 { 267 error = EINVAL; 268 goto release; 269 } 270 #ifdef INET 271 if (inp) { 272 tp = intotcpcb(inp); 273 /* WHAT IF TP IS 0? */ 274 #ifdef KPROF 275 tcp_acounts[tp->t_state][req]++; 276 #endif 277 #ifdef TCP_DEBUG 278 ostate = tp->t_state; 279 #endif 280 } 281 #endif 282 #ifdef INET6 283 if (in6p) { 284 tp = in6totcpcb(in6p); 285 /* WHAT IF TP IS 0? */ 286 #ifdef KPROF 287 tcp_acounts[tp->t_state][req]++; 288 #endif 289 #ifdef TCP_DEBUG 290 ostate = tp->t_state; 291 #endif 292 } 293 #endif 294 295 switch (req) { 296 297 /* 298 * Give the socket an address. 299 */ 300 case PRU_BIND: 301 switch (family) { 302 #ifdef INET 303 case PF_INET: 304 error = in_pcbbind(inp, nam, l); 305 break; 306 #endif 307 #ifdef INET6 308 case PF_INET6: 309 error = in6_pcbbind(in6p, nam, l); 310 if (!error) { 311 /* mapped addr case */ 312 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 313 tp->t_family = AF_INET; 314 else 315 tp->t_family = AF_INET6; 316 } 317 break; 318 #endif 319 } 320 break; 321 322 /* 323 * Prepare to accept connections. 324 */ 325 case PRU_LISTEN: 326 #ifdef INET 327 if (inp && inp->inp_lport == 0) { 328 error = in_pcbbind(inp, NULL, l); 329 if (error) 330 break; 331 } 332 #endif 333 #ifdef INET6 334 if (in6p && in6p->in6p_lport == 0) { 335 error = in6_pcbbind(in6p, NULL, l); 336 if (error) 337 break; 338 } 339 #endif 340 tp->t_state = TCPS_LISTEN; 341 break; 342 343 /* 344 * Initiate connection to peer. 345 * Create a template for use in transmissions on this connection. 346 * Enter SYN_SENT state, and mark socket as connecting. 347 * Start keep-alive timer, and seed output sequence space. 348 * Send initial segment on connection. 349 */ 350 case PRU_CONNECT: 351 #ifdef INET 352 if (inp) { 353 if (inp->inp_lport == 0) { 354 error = in_pcbbind(inp, NULL, l); 355 if (error) 356 break; 357 } 358 error = in_pcbconnect(inp, nam, l); 359 } 360 #endif 361 #ifdef INET6 362 if (in6p) { 363 if (in6p->in6p_lport == 0) { 364 error = in6_pcbbind(in6p, NULL, l); 365 if (error) 366 break; 367 } 368 error = in6_pcbconnect(in6p, nam, l); 369 if (!error) { 370 /* mapped addr case */ 371 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 372 tp->t_family = AF_INET; 373 else 374 tp->t_family = AF_INET6; 375 } 376 } 377 #endif 378 if (error) 379 break; 380 tp->t_template = tcp_template(tp); 381 if (tp->t_template == 0) { 382 #ifdef INET 383 if (inp) 384 in_pcbdisconnect(inp); 385 #endif 386 #ifdef INET6 387 if (in6p) 388 in6_pcbdisconnect(in6p); 389 #endif 390 error = ENOBUFS; 391 break; 392 } 393 /* 394 * Compute window scaling to request. 395 * XXX: This should be moved to tcp_output(). 396 */ 397 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 398 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 399 tp->request_r_scale++; 400 soisconnecting(so); 401 TCP_STATINC(TCP_STAT_CONNATTEMPT); 402 tp->t_state = TCPS_SYN_SENT; 403 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 404 tp->iss = tcp_new_iss(tp, 0); 405 tcp_sendseqinit(tp); 406 error = tcp_output(tp); 407 break; 408 409 /* 410 * Create a TCP connection between two sockets. 411 */ 412 case PRU_CONNECT2: 413 error = EOPNOTSUPP; 414 break; 415 416 /* 417 * Initiate disconnect from peer. 418 * If connection never passed embryonic stage, just drop; 419 * else if don't need to let data drain, then can just drop anyways, 420 * else have to begin TCP shutdown process: mark socket disconnecting, 421 * drain unread data, state switch to reflect user close, and 422 * send segment (e.g. FIN) to peer. Socket will be really disconnected 423 * when peer sends FIN and acks ours. 424 * 425 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 426 */ 427 case PRU_DISCONNECT: 428 tp = tcp_disconnect(tp); 429 break; 430 431 /* 432 * Accept a connection. Essentially all the work is 433 * done at higher levels; just return the address 434 * of the peer, storing through addr. 435 */ 436 case PRU_ACCEPT: 437 #ifdef INET 438 if (inp) 439 in_setpeeraddr(inp, nam); 440 #endif 441 #ifdef INET6 442 if (in6p) 443 in6_setpeeraddr(in6p, nam); 444 #endif 445 break; 446 447 /* 448 * Mark the connection as being incapable of further output. 449 */ 450 case PRU_SHUTDOWN: 451 socantsendmore(so); 452 tp = tcp_usrclosed(tp); 453 if (tp) 454 error = tcp_output(tp); 455 break; 456 457 /* 458 * After a receive, possibly send window update to peer. 459 */ 460 case PRU_RCVD: 461 /* 462 * soreceive() calls this function when a user receives 463 * ancillary data on a listening socket. We don't call 464 * tcp_output in such a case, since there is no header 465 * template for a listening socket and hence the kernel 466 * will panic. 467 */ 468 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 469 (void) tcp_output(tp); 470 break; 471 472 /* 473 * Do a send by putting data in output queue and updating urgent 474 * marker if URG set. Possibly send more data. 475 */ 476 case PRU_SEND: 477 if (control && control->m_len) { 478 m_freem(control); 479 m_freem(m); 480 error = EINVAL; 481 break; 482 } 483 sbappendstream(&so->so_snd, m); 484 error = tcp_output(tp); 485 break; 486 487 /* 488 * Abort the TCP. 489 */ 490 case PRU_ABORT: 491 tp = tcp_drop(tp, ECONNABORTED); 492 break; 493 494 case PRU_SENSE: 495 /* 496 * stat: don't bother with a blocksize. 497 */ 498 splx(s); 499 return (0); 500 501 case PRU_RCVOOB: 502 if (control && control->m_len) { 503 m_freem(control); 504 m_freem(m); 505 error = EINVAL; 506 break; 507 } 508 if ((so->so_oobmark == 0 && 509 (so->so_state & SS_RCVATMARK) == 0) || 510 so->so_options & SO_OOBINLINE || 511 tp->t_oobflags & TCPOOB_HADDATA) { 512 error = EINVAL; 513 break; 514 } 515 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 516 error = EWOULDBLOCK; 517 break; 518 } 519 m->m_len = 1; 520 *mtod(m, char *) = tp->t_iobc; 521 if (((long)nam & MSG_PEEK) == 0) 522 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 523 break; 524 525 case PRU_SENDOOB: 526 if (sbspace(&so->so_snd) < -512) { 527 m_freem(m); 528 error = ENOBUFS; 529 break; 530 } 531 /* 532 * According to RFC961 (Assigned Protocols), 533 * the urgent pointer points to the last octet 534 * of urgent data. We continue, however, 535 * to consider it to indicate the first octet 536 * of data past the urgent section. 537 * Otherwise, snd_up should be one lower. 538 */ 539 sbappendstream(&so->so_snd, m); 540 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 541 tp->t_force = 1; 542 error = tcp_output(tp); 543 tp->t_force = 0; 544 break; 545 546 case PRU_SOCKADDR: 547 #ifdef INET 548 if (inp) 549 in_setsockaddr(inp, nam); 550 #endif 551 #ifdef INET6 552 if (in6p) 553 in6_setsockaddr(in6p, nam); 554 #endif 555 break; 556 557 case PRU_PEERADDR: 558 #ifdef INET 559 if (inp) 560 in_setpeeraddr(inp, nam); 561 #endif 562 #ifdef INET6 563 if (in6p) 564 in6_setpeeraddr(in6p, nam); 565 #endif 566 break; 567 568 default: 569 panic("tcp_usrreq"); 570 } 571 #ifdef TCP_DEBUG 572 if (tp && (so->so_options & SO_DEBUG)) 573 tcp_trace(TA_USER, ostate, tp, NULL, req); 574 #endif 575 576 release: 577 splx(s); 578 return (error); 579 } 580 581 static void 582 change_keepalive(struct socket *so, struct tcpcb *tp) 583 { 584 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 585 TCP_TIMER_DISARM(tp, TCPT_KEEP); 586 TCP_TIMER_DISARM(tp, TCPT_2MSL); 587 588 if (tp->t_state == TCPS_SYN_RECEIVED || 589 tp->t_state == TCPS_SYN_SENT) { 590 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 591 } else if (so->so_options & SO_KEEPALIVE && 592 tp->t_state <= TCPS_CLOSE_WAIT) { 593 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 594 } else { 595 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 596 } 597 598 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 599 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 600 } 601 602 603 int 604 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt) 605 { 606 int error = 0, s; 607 struct inpcb *inp; 608 #ifdef INET6 609 struct in6pcb *in6p; 610 #endif 611 struct tcpcb *tp; 612 u_int ui; 613 int family; /* family of the socket */ 614 int level, optname, optval; 615 616 level = sopt->sopt_level; 617 optname = sopt->sopt_name; 618 619 family = so->so_proto->pr_domain->dom_family; 620 621 s = splsoftnet(); 622 switch (family) { 623 #ifdef INET 624 case PF_INET: 625 inp = sotoinpcb(so); 626 #ifdef INET6 627 in6p = NULL; 628 #endif 629 break; 630 #endif 631 #ifdef INET6 632 case PF_INET6: 633 inp = NULL; 634 in6p = sotoin6pcb(so); 635 break; 636 #endif 637 default: 638 splx(s); 639 panic("%s: af %d", __func__, family); 640 } 641 #ifndef INET6 642 if (inp == NULL) 643 #else 644 if (inp == NULL && in6p == NULL) 645 #endif 646 { 647 splx(s); 648 return (ECONNRESET); 649 } 650 if (level != IPPROTO_TCP) { 651 switch (family) { 652 #ifdef INET 653 case PF_INET: 654 error = ip_ctloutput(op, so, sopt); 655 break; 656 #endif 657 #ifdef INET6 658 case PF_INET6: 659 error = ip6_ctloutput(op, so, sopt); 660 break; 661 #endif 662 } 663 splx(s); 664 return (error); 665 } 666 if (inp) 667 tp = intotcpcb(inp); 668 #ifdef INET6 669 else if (in6p) 670 tp = in6totcpcb(in6p); 671 #endif 672 else 673 tp = NULL; 674 675 switch (op) { 676 case PRCO_SETOPT: 677 switch (optname) { 678 #ifdef TCP_SIGNATURE 679 case TCP_MD5SIG: 680 error = sockopt_getint(sopt, &optval); 681 if (error) 682 break; 683 if (optval > 0) 684 tp->t_flags |= TF_SIGNATURE; 685 else 686 tp->t_flags &= ~TF_SIGNATURE; 687 break; 688 #endif /* TCP_SIGNATURE */ 689 690 case TCP_NODELAY: 691 error = sockopt_getint(sopt, &optval); 692 if (error) 693 break; 694 if (optval) 695 tp->t_flags |= TF_NODELAY; 696 else 697 tp->t_flags &= ~TF_NODELAY; 698 break; 699 700 case TCP_MAXSEG: 701 error = sockopt_getint(sopt, &optval); 702 if (error) 703 break; 704 if (optval > 0 && optval <= tp->t_peermss) 705 tp->t_peermss = optval; /* limit on send size */ 706 else 707 error = EINVAL; 708 break; 709 #ifdef notyet 710 case TCP_CONGCTL: 711 /* XXX string overflow XXX */ 712 error = tcp_congctl_select(tp, sopt->sopt_data); 713 break; 714 #endif 715 716 case TCP_KEEPIDLE: 717 error = sockopt_get(sopt, &ui, sizeof(ui)); 718 if (error) 719 break; 720 if (ui > 0) { 721 tp->t_keepidle = ui; 722 change_keepalive(so, tp); 723 } else 724 error = EINVAL; 725 break; 726 727 case TCP_KEEPINTVL: 728 error = sockopt_get(sopt, &ui, sizeof(ui)); 729 if (error) 730 break; 731 if (ui > 0) { 732 tp->t_keepintvl = ui; 733 change_keepalive(so, tp); 734 } else 735 error = EINVAL; 736 break; 737 738 case TCP_KEEPCNT: 739 error = sockopt_get(sopt, &ui, sizeof(ui)); 740 if (error) 741 break; 742 if (ui > 0) { 743 tp->t_keepcnt = ui; 744 change_keepalive(so, tp); 745 } else 746 error = EINVAL; 747 break; 748 749 case TCP_KEEPINIT: 750 error = sockopt_get(sopt, &ui, sizeof(ui)); 751 if (error) 752 break; 753 if (ui > 0) { 754 tp->t_keepinit = ui; 755 change_keepalive(so, tp); 756 } else 757 error = EINVAL; 758 break; 759 760 default: 761 error = ENOPROTOOPT; 762 break; 763 } 764 break; 765 766 case PRCO_GETOPT: 767 switch (optname) { 768 #ifdef TCP_SIGNATURE 769 case TCP_MD5SIG: 770 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 771 error = sockopt_set(sopt, &optval, sizeof(optval)); 772 break; 773 #endif 774 case TCP_NODELAY: 775 optval = tp->t_flags & TF_NODELAY; 776 error = sockopt_set(sopt, &optval, sizeof(optval)); 777 break; 778 case TCP_MAXSEG: 779 optval = tp->t_peermss; 780 error = sockopt_set(sopt, &optval, sizeof(optval)); 781 break; 782 #ifdef notyet 783 case TCP_CONGCTL: 784 break; 785 #endif 786 default: 787 error = ENOPROTOOPT; 788 break; 789 } 790 break; 791 } 792 splx(s); 793 return (error); 794 } 795 796 #ifndef TCP_SENDSPACE 797 #define TCP_SENDSPACE 1024*32 798 #endif 799 int tcp_sendspace = TCP_SENDSPACE; 800 #ifndef TCP_RECVSPACE 801 #define TCP_RECVSPACE 1024*32 802 #endif 803 int tcp_recvspace = TCP_RECVSPACE; 804 805 /* 806 * tcp_attach: attach TCP protocol to socket, allocating internet protocol 807 * control block, TCP control block, buffer space and entering LISTEN state 808 * if to accept connections. 809 */ 810 static int 811 tcp_attach(struct socket *so, int proto) 812 { 813 struct tcpcb *tp; 814 struct inpcb *inp; 815 #ifdef INET6 816 struct in6pcb *in6p; 817 #endif 818 int s, error, family; 819 820 /* Assign the lock (must happen even if we will error out). */ 821 s = splsoftnet(); 822 sosetlock(so); 823 KASSERT(solocked(so)); 824 825 family = so->so_proto->pr_domain->dom_family; 826 switch (family) { 827 #ifdef INET 828 case PF_INET: 829 inp = sotoinpcb(so); 830 #ifdef INET6 831 in6p = NULL; 832 #endif 833 break; 834 #endif 835 #ifdef INET6 836 case PF_INET6: 837 inp = NULL; 838 in6p = sotoin6pcb(so); 839 break; 840 #endif 841 default: 842 error = EAFNOSUPPORT; 843 goto out; 844 } 845 846 KASSERT(inp == NULL); 847 #ifdef INET6 848 KASSERT(in6p == NULL); 849 #endif 850 851 #ifdef MBUFTRACE 852 so->so_mowner = &tcp_sock_mowner; 853 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 854 so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 855 #endif 856 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 857 error = soreserve(so, tcp_sendspace, tcp_recvspace); 858 if (error) 859 goto out; 860 } 861 862 so->so_rcv.sb_flags |= SB_AUTOSIZE; 863 so->so_snd.sb_flags |= SB_AUTOSIZE; 864 865 switch (family) { 866 #ifdef INET 867 case PF_INET: 868 error = in_pcballoc(so, &tcbtable); 869 if (error) 870 goto out; 871 inp = sotoinpcb(so); 872 #ifdef INET6 873 in6p = NULL; 874 #endif 875 break; 876 #endif 877 #ifdef INET6 878 case PF_INET6: 879 error = in6_pcballoc(so, &tcbtable); 880 if (error) 881 goto out; 882 inp = NULL; 883 in6p = sotoin6pcb(so); 884 break; 885 #endif 886 default: 887 error = EAFNOSUPPORT; 888 goto out; 889 } 890 if (inp) 891 tp = tcp_newtcpcb(family, (void *)inp); 892 #ifdef INET6 893 else if (in6p) 894 tp = tcp_newtcpcb(family, (void *)in6p); 895 #endif 896 else 897 tp = NULL; 898 899 if (tp == NULL) { 900 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 901 902 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 903 #ifdef INET 904 if (inp) 905 in_pcbdetach(inp); 906 #endif 907 #ifdef INET6 908 if (in6p) 909 in6_pcbdetach(in6p); 910 #endif 911 so->so_state |= nofd; 912 error = ENOBUFS; 913 goto out; 914 } 915 tp->t_state = TCPS_CLOSED; 916 if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 917 so->so_linger = TCP_LINGERTIME; 918 } 919 out: 920 KASSERT(solocked(so)); 921 splx(s); 922 return error; 923 } 924 925 static void 926 tcp_detach(struct socket *so) 927 { 928 struct inpcb *inp; 929 #ifdef INET6 930 struct in6pcb *in6p; 931 #endif 932 struct tcpcb *tp = NULL; 933 int s, family; 934 935 KASSERT(solocked(so)); 936 937 s = splsoftnet(); 938 family = so->so_proto->pr_domain->dom_family; 939 switch (family) { 940 #ifdef INET 941 case PF_INET: 942 inp = sotoinpcb(so); 943 tp = intotcpcb(inp); 944 break; 945 #endif 946 #ifdef INET6 947 case PF_INET6: 948 in6p = sotoin6pcb(so); 949 tp = in6totcpcb(in6p); 950 break; 951 #endif 952 default: 953 splx(s); 954 return; 955 } 956 KASSERT(tp != NULL); 957 (void)tcp_disconnect(tp); 958 splx(s); 959 } 960 961 /* 962 * Initiate (or continue) disconnect. 963 * If embryonic state, just send reset (once). 964 * If in ``let data drain'' option and linger null, just drop. 965 * Otherwise (hard), mark socket disconnecting and drop 966 * current input data; switch states based on user close, and 967 * send segment to peer (with FIN). 968 */ 969 struct tcpcb * 970 tcp_disconnect(struct tcpcb *tp) 971 { 972 struct socket *so; 973 974 if (tp->t_inpcb) 975 so = tp->t_inpcb->inp_socket; 976 #ifdef INET6 977 else if (tp->t_in6pcb) 978 so = tp->t_in6pcb->in6p_socket; 979 #endif 980 else 981 so = NULL; 982 983 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 984 tp = tcp_close(tp); 985 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 986 tp = tcp_drop(tp, 0); 987 else { 988 soisdisconnecting(so); 989 sbflush(&so->so_rcv); 990 tp = tcp_usrclosed(tp); 991 if (tp) 992 (void) tcp_output(tp); 993 } 994 return (tp); 995 } 996 997 /* 998 * User issued close, and wish to trail through shutdown states: 999 * if never received SYN, just forget it. If got a SYN from peer, 1000 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1001 * If already got a FIN from peer, then almost done; go to LAST_ACK 1002 * state. In all other cases, have already sent FIN to peer (e.g. 1003 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1004 * for peer to send FIN or not respond to keep-alives, etc. 1005 * We can let the user exit from the close as soon as the FIN is acked. 1006 */ 1007 struct tcpcb * 1008 tcp_usrclosed(struct tcpcb *tp) 1009 { 1010 1011 switch (tp->t_state) { 1012 1013 case TCPS_CLOSED: 1014 case TCPS_LISTEN: 1015 case TCPS_SYN_SENT: 1016 tp->t_state = TCPS_CLOSED; 1017 tp = tcp_close(tp); 1018 break; 1019 1020 case TCPS_SYN_RECEIVED: 1021 case TCPS_ESTABLISHED: 1022 tp->t_state = TCPS_FIN_WAIT_1; 1023 break; 1024 1025 case TCPS_CLOSE_WAIT: 1026 tp->t_state = TCPS_LAST_ACK; 1027 break; 1028 } 1029 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1030 struct socket *so; 1031 if (tp->t_inpcb) 1032 so = tp->t_inpcb->inp_socket; 1033 #ifdef INET6 1034 else if (tp->t_in6pcb) 1035 so = tp->t_in6pcb->in6p_socket; 1036 #endif 1037 else 1038 so = NULL; 1039 if (so) 1040 soisdisconnected(so); 1041 /* 1042 * If we are in FIN_WAIT_2, we arrived here because the 1043 * application did a shutdown of the send side. Like the 1044 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1045 * a full close, we start a timer to make sure sockets are 1046 * not left in FIN_WAIT_2 forever. 1047 */ 1048 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1049 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1050 else if (tp->t_state == TCPS_TIME_WAIT 1051 && ((tp->t_inpcb 1052 && (tcp4_vtw_enable & 1) 1053 && vtw_add(AF_INET, tp)) 1054 || 1055 (tp->t_in6pcb 1056 && (tcp6_vtw_enable & 1) 1057 && vtw_add(AF_INET6, tp)))) { 1058 tp = 0; 1059 } 1060 } 1061 return (tp); 1062 } 1063 1064 /* 1065 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1066 * than 32. 1067 */ 1068 static int 1069 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1070 { 1071 int error, mssdflt; 1072 struct sysctlnode node; 1073 1074 mssdflt = tcp_mssdflt; 1075 node = *rnode; 1076 node.sysctl_data = &mssdflt; 1077 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1078 if (error || newp == NULL) 1079 return (error); 1080 1081 if (mssdflt < 32) 1082 return (EINVAL); 1083 tcp_mssdflt = mssdflt; 1084 1085 mutex_enter(softnet_lock); 1086 tcp_tcpcb_template(); 1087 mutex_exit(softnet_lock); 1088 1089 return (0); 1090 } 1091 1092 /* 1093 * sysctl helper for TCP CB template update 1094 */ 1095 static int 1096 sysctl_update_tcpcb_template(SYSCTLFN_ARGS) 1097 { 1098 int t, error; 1099 struct sysctlnode node; 1100 1101 /* follow procedures in sysctl(9) manpage */ 1102 t = *(int *)rnode->sysctl_data; 1103 node = *rnode; 1104 node.sysctl_data = &t; 1105 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1106 if (error || newp == NULL) 1107 return error; 1108 1109 if (t < 0) 1110 return EINVAL; 1111 1112 *(int *)rnode->sysctl_data = t; 1113 1114 mutex_enter(softnet_lock); 1115 tcp_tcpcb_template(); 1116 mutex_exit(softnet_lock); 1117 1118 return 0; 1119 } 1120 1121 /* 1122 * sysctl helper routine for setting port related values under 1123 * net.inet.ip and net.inet6.ip6. does basic range checking and does 1124 * additional checks for each type. this code has placed in 1125 * tcp_input.c since INET and INET6 both use the same tcp code. 1126 * 1127 * this helper is not static so that both inet and inet6 can use it. 1128 */ 1129 int 1130 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1131 { 1132 int error, tmp; 1133 int apmin, apmax; 1134 #ifndef IPNOPRIVPORTS 1135 int lpmin, lpmax; 1136 #endif /* IPNOPRIVPORTS */ 1137 struct sysctlnode node; 1138 1139 if (namelen != 0) 1140 return (EINVAL); 1141 1142 switch (name[-3]) { 1143 #ifdef INET 1144 case PF_INET: 1145 apmin = anonportmin; 1146 apmax = anonportmax; 1147 #ifndef IPNOPRIVPORTS 1148 lpmin = lowportmin; 1149 lpmax = lowportmax; 1150 #endif /* IPNOPRIVPORTS */ 1151 break; 1152 #endif /* INET */ 1153 #ifdef INET6 1154 case PF_INET6: 1155 apmin = ip6_anonportmin; 1156 apmax = ip6_anonportmax; 1157 #ifndef IPNOPRIVPORTS 1158 lpmin = ip6_lowportmin; 1159 lpmax = ip6_lowportmax; 1160 #endif /* IPNOPRIVPORTS */ 1161 break; 1162 #endif /* INET6 */ 1163 default: 1164 return (EINVAL); 1165 } 1166 1167 /* 1168 * insert temporary copy into node, perform lookup on 1169 * temporary, then restore pointer 1170 */ 1171 node = *rnode; 1172 tmp = *(int*)rnode->sysctl_data; 1173 node.sysctl_data = &tmp; 1174 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1175 if (error || newp == NULL) 1176 return (error); 1177 1178 /* 1179 * simple port range check 1180 */ 1181 if (tmp < 0 || tmp > 65535) 1182 return (EINVAL); 1183 1184 /* 1185 * per-node range checks 1186 */ 1187 switch (rnode->sysctl_num) { 1188 case IPCTL_ANONPORTMIN: 1189 case IPV6CTL_ANONPORTMIN: 1190 if (tmp >= apmax) 1191 return (EINVAL); 1192 #ifndef IPNOPRIVPORTS 1193 if (tmp < IPPORT_RESERVED) 1194 return (EINVAL); 1195 #endif /* IPNOPRIVPORTS */ 1196 break; 1197 1198 case IPCTL_ANONPORTMAX: 1199 case IPV6CTL_ANONPORTMAX: 1200 if (apmin >= tmp) 1201 return (EINVAL); 1202 #ifndef IPNOPRIVPORTS 1203 if (tmp < IPPORT_RESERVED) 1204 return (EINVAL); 1205 #endif /* IPNOPRIVPORTS */ 1206 break; 1207 1208 #ifndef IPNOPRIVPORTS 1209 case IPCTL_LOWPORTMIN: 1210 case IPV6CTL_LOWPORTMIN: 1211 if (tmp >= lpmax || 1212 tmp > IPPORT_RESERVEDMAX || 1213 tmp < IPPORT_RESERVEDMIN) 1214 return (EINVAL); 1215 break; 1216 1217 case IPCTL_LOWPORTMAX: 1218 case IPV6CTL_LOWPORTMAX: 1219 if (lpmin >= tmp || 1220 tmp > IPPORT_RESERVEDMAX || 1221 tmp < IPPORT_RESERVEDMIN) 1222 return (EINVAL); 1223 break; 1224 #endif /* IPNOPRIVPORTS */ 1225 1226 default: 1227 return (EINVAL); 1228 } 1229 1230 *(int*)rnode->sysctl_data = tmp; 1231 1232 return (0); 1233 } 1234 1235 static inline int 1236 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1237 { 1238 if (oldp) { 1239 size_t sz; 1240 uid_t uid; 1241 int error; 1242 1243 if (sockp->so_cred == NULL) 1244 return EPERM; 1245 1246 uid = kauth_cred_geteuid(sockp->so_cred); 1247 sz = MIN(sizeof(uid), *oldlenp); 1248 if ((error = copyout(&uid, oldp, sz)) != 0) 1249 return error; 1250 } 1251 *oldlenp = sizeof(uid_t); 1252 return 0; 1253 } 1254 1255 static inline int 1256 inet4_ident_core(struct in_addr raddr, u_int rport, 1257 struct in_addr laddr, u_int lport, 1258 void *oldp, size_t *oldlenp, 1259 struct lwp *l, int dodrop) 1260 { 1261 struct inpcb *inp; 1262 struct socket *sockp; 1263 1264 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0); 1265 1266 if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1267 return ESRCH; 1268 1269 if (dodrop) { 1270 struct tcpcb *tp; 1271 int error; 1272 1273 if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1274 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1275 return ESRCH; 1276 1277 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1278 KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1279 if (error) 1280 return (error); 1281 1282 (void)tcp_drop(tp, ECONNABORTED); 1283 return 0; 1284 } 1285 else 1286 return copyout_uid(sockp, oldp, oldlenp); 1287 } 1288 1289 #ifdef INET6 1290 static inline int 1291 inet6_ident_core(struct in6_addr *raddr, u_int rport, 1292 struct in6_addr *laddr, u_int lport, 1293 void *oldp, size_t *oldlenp, 1294 struct lwp *l, int dodrop) 1295 { 1296 struct in6pcb *in6p; 1297 struct socket *sockp; 1298 1299 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0); 1300 1301 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL) 1302 return ESRCH; 1303 1304 if (dodrop) { 1305 struct tcpcb *tp; 1306 int error; 1307 1308 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL || 1309 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0) 1310 return ESRCH; 1311 1312 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1313 KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL); 1314 if (error) 1315 return (error); 1316 1317 (void)tcp_drop(tp, ECONNABORTED); 1318 return 0; 1319 } 1320 else 1321 return copyout_uid(sockp, oldp, oldlenp); 1322 } 1323 #endif 1324 1325 /* 1326 * sysctl helper routine for the net.inet.tcp.drop and 1327 * net.inet6.tcp6.drop nodes. 1328 */ 1329 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1330 1331 /* 1332 * sysctl helper routine for the net.inet.tcp.ident and 1333 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1334 * old way of looking up the ident information for ipv4 which involves 1335 * stuffing the port/addr pairs into the mib lookup. 1336 */ 1337 static int 1338 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1339 { 1340 #ifdef INET 1341 struct sockaddr_in *si4[2]; 1342 #endif /* INET */ 1343 #ifdef INET6 1344 struct sockaddr_in6 *si6[2]; 1345 #endif /* INET6 */ 1346 struct sockaddr_storage sa[2]; 1347 int error, pf, dodrop; 1348 1349 dodrop = name[-1] == TCPCTL_DROP; 1350 if (dodrop) { 1351 if (oldp != NULL || *oldlenp != 0) 1352 return EINVAL; 1353 if (newp == NULL) 1354 return EPERM; 1355 if (newlen < sizeof(sa)) 1356 return ENOMEM; 1357 } 1358 if (namelen != 4 && namelen != 0) 1359 return EINVAL; 1360 if (name[-2] != IPPROTO_TCP) 1361 return EINVAL; 1362 pf = name[-3]; 1363 1364 /* old style lookup, ipv4 only */ 1365 if (namelen == 4) { 1366 #ifdef INET 1367 struct in_addr laddr, raddr; 1368 u_int lport, rport; 1369 1370 if (pf != PF_INET) 1371 return EPROTONOSUPPORT; 1372 raddr.s_addr = (uint32_t)name[0]; 1373 rport = (u_int)name[1]; 1374 laddr.s_addr = (uint32_t)name[2]; 1375 lport = (u_int)name[3]; 1376 1377 mutex_enter(softnet_lock); 1378 error = inet4_ident_core(raddr, rport, laddr, lport, 1379 oldp, oldlenp, l, dodrop); 1380 mutex_exit(softnet_lock); 1381 return error; 1382 #else /* INET */ 1383 return EINVAL; 1384 #endif /* INET */ 1385 } 1386 1387 if (newp == NULL || newlen != sizeof(sa)) 1388 return EINVAL; 1389 error = copyin(newp, &sa, newlen); 1390 if (error) 1391 return error; 1392 1393 /* 1394 * requested families must match 1395 */ 1396 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1397 return EINVAL; 1398 1399 switch (pf) { 1400 #ifdef INET6 1401 case PF_INET6: 1402 si6[0] = (struct sockaddr_in6*)&sa[0]; 1403 si6[1] = (struct sockaddr_in6*)&sa[1]; 1404 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1405 si6[1]->sin6_len != sizeof(*si6[1])) 1406 return EINVAL; 1407 1408 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1409 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1410 error = sa6_embedscope(si6[0], ip6_use_defzone); 1411 if (error) 1412 return error; 1413 error = sa6_embedscope(si6[1], ip6_use_defzone); 1414 if (error) 1415 return error; 1416 1417 mutex_enter(softnet_lock); 1418 error = inet6_ident_core(&si6[0]->sin6_addr, 1419 si6[0]->sin6_port, &si6[1]->sin6_addr, 1420 si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1421 mutex_exit(softnet_lock); 1422 return error; 1423 } 1424 1425 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1426 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1427 return EINVAL; 1428 1429 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1430 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1431 /*FALLTHROUGH*/ 1432 #endif /* INET6 */ 1433 #ifdef INET 1434 case PF_INET: 1435 si4[0] = (struct sockaddr_in*)&sa[0]; 1436 si4[1] = (struct sockaddr_in*)&sa[1]; 1437 if (si4[0]->sin_len != sizeof(*si4[0]) || 1438 si4[0]->sin_len != sizeof(*si4[1])) 1439 return EINVAL; 1440 1441 mutex_enter(softnet_lock); 1442 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1443 si4[1]->sin_addr, si4[1]->sin_port, 1444 oldp, oldlenp, l, dodrop); 1445 mutex_exit(softnet_lock); 1446 return error; 1447 #endif /* INET */ 1448 default: 1449 return EPROTONOSUPPORT; 1450 } 1451 } 1452 1453 /* 1454 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1455 * inet/inet6, as well as raw pcbs for each. specifically not 1456 * declared static so that raw sockets and udp/udp6 can use it as 1457 * well. 1458 */ 1459 int 1460 sysctl_inpcblist(SYSCTLFN_ARGS) 1461 { 1462 #ifdef INET 1463 struct sockaddr_in *in; 1464 const struct inpcb *inp; 1465 #endif 1466 #ifdef INET6 1467 struct sockaddr_in6 *in6; 1468 const struct in6pcb *in6p; 1469 #endif 1470 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1471 const struct inpcb_hdr *inph; 1472 struct tcpcb *tp; 1473 struct kinfo_pcb pcb; 1474 char *dp; 1475 size_t len, needed, elem_size, out_size; 1476 int error, elem_count, pf, proto, pf2; 1477 1478 if (namelen != 4) 1479 return (EINVAL); 1480 1481 if (oldp != NULL) { 1482 len = *oldlenp; 1483 elem_size = name[2]; 1484 elem_count = name[3]; 1485 if (elem_size != sizeof(pcb)) 1486 return EINVAL; 1487 } else { 1488 len = 0; 1489 elem_count = INT_MAX; 1490 elem_size = sizeof(pcb); 1491 } 1492 error = 0; 1493 dp = oldp; 1494 out_size = elem_size; 1495 needed = 0; 1496 1497 if (namelen == 1 && name[0] == CTL_QUERY) 1498 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1499 1500 if (name - oname != 4) 1501 return (EINVAL); 1502 1503 pf = oname[1]; 1504 proto = oname[2]; 1505 pf2 = (oldp != NULL) ? pf : 0; 1506 1507 mutex_enter(softnet_lock); 1508 1509 TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1510 #ifdef INET 1511 inp = (const struct inpcb *)inph; 1512 #endif 1513 #ifdef INET6 1514 in6p = (const struct in6pcb *)inph; 1515 #endif 1516 1517 if (inph->inph_af != pf) 1518 continue; 1519 1520 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1521 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL, 1522 NULL) != 0) 1523 continue; 1524 1525 memset(&pcb, 0, sizeof(pcb)); 1526 1527 pcb.ki_family = pf; 1528 pcb.ki_type = proto; 1529 1530 switch (pf2) { 1531 case 0: 1532 /* just probing for size */ 1533 break; 1534 #ifdef INET 1535 case PF_INET: 1536 pcb.ki_family = inp->inp_socket->so_proto-> 1537 pr_domain->dom_family; 1538 pcb.ki_type = inp->inp_socket->so_proto-> 1539 pr_type; 1540 pcb.ki_protocol = inp->inp_socket->so_proto-> 1541 pr_protocol; 1542 pcb.ki_pflags = inp->inp_flags; 1543 1544 pcb.ki_sostate = inp->inp_socket->so_state; 1545 pcb.ki_prstate = inp->inp_state; 1546 if (proto == IPPROTO_TCP) { 1547 tp = intotcpcb(inp); 1548 pcb.ki_tstate = tp->t_state; 1549 pcb.ki_tflags = tp->t_flags; 1550 } 1551 1552 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1553 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1554 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1555 1556 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1557 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1558 1559 in = satosin(&pcb.ki_src); 1560 in->sin_len = sizeof(*in); 1561 in->sin_family = pf; 1562 in->sin_port = inp->inp_lport; 1563 in->sin_addr = inp->inp_laddr; 1564 if (pcb.ki_prstate >= INP_CONNECTED) { 1565 in = satosin(&pcb.ki_dst); 1566 in->sin_len = sizeof(*in); 1567 in->sin_family = pf; 1568 in->sin_port = inp->inp_fport; 1569 in->sin_addr = inp->inp_faddr; 1570 } 1571 break; 1572 #endif 1573 #ifdef INET6 1574 case PF_INET6: 1575 pcb.ki_family = in6p->in6p_socket->so_proto-> 1576 pr_domain->dom_family; 1577 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1578 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1579 pr_protocol; 1580 pcb.ki_pflags = in6p->in6p_flags; 1581 1582 pcb.ki_sostate = in6p->in6p_socket->so_state; 1583 pcb.ki_prstate = in6p->in6p_state; 1584 if (proto == IPPROTO_TCP) { 1585 tp = in6totcpcb(in6p); 1586 pcb.ki_tstate = tp->t_state; 1587 pcb.ki_tflags = tp->t_flags; 1588 } 1589 1590 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1591 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1592 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1593 1594 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1595 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1596 1597 in6 = satosin6(&pcb.ki_src); 1598 in6->sin6_len = sizeof(*in6); 1599 in6->sin6_family = pf; 1600 in6->sin6_port = in6p->in6p_lport; 1601 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1602 in6->sin6_addr = in6p->in6p_laddr; 1603 in6->sin6_scope_id = 0; /* XXX? */ 1604 1605 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1606 in6 = satosin6(&pcb.ki_dst); 1607 in6->sin6_len = sizeof(*in6); 1608 in6->sin6_family = pf; 1609 in6->sin6_port = in6p->in6p_fport; 1610 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1611 in6->sin6_addr = in6p->in6p_faddr; 1612 in6->sin6_scope_id = 0; /* XXX? */ 1613 } 1614 break; 1615 #endif 1616 } 1617 1618 if (len >= elem_size && elem_count > 0) { 1619 error = copyout(&pcb, dp, out_size); 1620 if (error) { 1621 mutex_exit(softnet_lock); 1622 return (error); 1623 } 1624 dp += elem_size; 1625 len -= elem_size; 1626 } 1627 needed += elem_size; 1628 if (elem_count > 0 && elem_count != INT_MAX) 1629 elem_count--; 1630 } 1631 1632 *oldlenp = needed; 1633 if (oldp == NULL) 1634 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1635 1636 mutex_exit(softnet_lock); 1637 1638 return (error); 1639 } 1640 1641 static int 1642 sysctl_tcp_congctl(SYSCTLFN_ARGS) 1643 { 1644 struct sysctlnode node; 1645 int error; 1646 char newname[TCPCC_MAXLEN]; 1647 1648 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1649 1650 node = *rnode; 1651 node.sysctl_data = newname; 1652 node.sysctl_size = sizeof(newname); 1653 1654 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1655 1656 if (error || 1657 newp == NULL || 1658 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1659 return error; 1660 1661 mutex_enter(softnet_lock); 1662 error = tcp_congctl_select(NULL, newname); 1663 mutex_exit(softnet_lock); 1664 1665 return error; 1666 } 1667 1668 static int 1669 sysctl_tcp_init_win(SYSCTLFN_ARGS) 1670 { 1671 int error; 1672 u_int iw; 1673 struct sysctlnode node; 1674 1675 iw = *(u_int *)rnode->sysctl_data; 1676 node = *rnode; 1677 node.sysctl_data = &iw; 1678 node.sysctl_size = sizeof(iw); 1679 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1680 if (error || newp == NULL) 1681 return error; 1682 1683 if (iw >= __arraycount(tcp_init_win_max)) 1684 return EINVAL; 1685 *(u_int *)rnode->sysctl_data = iw; 1686 return 0; 1687 } 1688 1689 static int 1690 sysctl_tcp_keep(SYSCTLFN_ARGS) 1691 { 1692 int error; 1693 u_int tmp; 1694 struct sysctlnode node; 1695 1696 node = *rnode; 1697 tmp = *(u_int *)rnode->sysctl_data; 1698 node.sysctl_data = &tmp; 1699 1700 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1701 if (error || newp == NULL) 1702 return error; 1703 1704 mutex_enter(softnet_lock); 1705 1706 *(u_int *)rnode->sysctl_data = tmp; 1707 tcp_tcpcb_template(); /* update the template */ 1708 1709 mutex_exit(softnet_lock); 1710 return 0; 1711 } 1712 1713 static int 1714 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 1715 { 1716 1717 return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS)); 1718 } 1719 1720 /* 1721 * this (second stage) setup routine is a replacement for tcp_sysctl() 1722 * (which is currently used for ipv4 and ipv6) 1723 */ 1724 static void 1725 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1726 const char *tcpname) 1727 { 1728 const struct sysctlnode *sack_node; 1729 const struct sysctlnode *abc_node; 1730 const struct sysctlnode *ecn_node; 1731 const struct sysctlnode *congctl_node; 1732 const struct sysctlnode *mslt_node; 1733 const struct sysctlnode *vtw_node; 1734 #ifdef TCP_DEBUG 1735 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1736 extern int tcp_debx; 1737 #endif 1738 1739 sysctl_createv(clog, 0, NULL, NULL, 1740 CTLFLAG_PERMANENT, 1741 CTLTYPE_NODE, pfname, NULL, 1742 NULL, 0, NULL, 0, 1743 CTL_NET, pf, CTL_EOL); 1744 sysctl_createv(clog, 0, NULL, NULL, 1745 CTLFLAG_PERMANENT, 1746 CTLTYPE_NODE, tcpname, 1747 SYSCTL_DESCR("TCP related settings"), 1748 NULL, 0, NULL, 0, 1749 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1750 1751 sysctl_createv(clog, 0, NULL, NULL, 1752 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1753 CTLTYPE_INT, "rfc1323", 1754 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1755 sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0, 1756 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1757 sysctl_createv(clog, 0, NULL, NULL, 1758 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1759 CTLTYPE_INT, "sendspace", 1760 SYSCTL_DESCR("Default TCP send buffer size"), 1761 NULL, 0, &tcp_sendspace, 0, 1762 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1763 sysctl_createv(clog, 0, NULL, NULL, 1764 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1765 CTLTYPE_INT, "recvspace", 1766 SYSCTL_DESCR("Default TCP receive buffer size"), 1767 NULL, 0, &tcp_recvspace, 0, 1768 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1769 sysctl_createv(clog, 0, NULL, NULL, 1770 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1771 CTLTYPE_INT, "mssdflt", 1772 SYSCTL_DESCR("Default maximum segment size"), 1773 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1774 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1775 sysctl_createv(clog, 0, NULL, NULL, 1776 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1777 CTLTYPE_INT, "minmss", 1778 SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 1779 NULL, 0, &tcp_minmss, 0, 1780 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1781 sysctl_createv(clog, 0, NULL, NULL, 1782 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1783 CTLTYPE_INT, "msl", 1784 SYSCTL_DESCR("Maximum Segment Life"), 1785 NULL, 0, &tcp_msl, 0, 1786 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL); 1787 sysctl_createv(clog, 0, NULL, NULL, 1788 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1789 CTLTYPE_INT, "syn_cache_limit", 1790 SYSCTL_DESCR("Maximum number of entries in the TCP " 1791 "compressed state engine"), 1792 NULL, 0, &tcp_syn_cache_limit, 0, 1793 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1794 CTL_EOL); 1795 sysctl_createv(clog, 0, NULL, NULL, 1796 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1797 CTLTYPE_INT, "syn_bucket_limit", 1798 SYSCTL_DESCR("Maximum number of entries per hash " 1799 "bucket in the TCP compressed state " 1800 "engine"), 1801 NULL, 0, &tcp_syn_bucket_limit, 0, 1802 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1803 CTL_EOL); 1804 #if 0 /* obsoleted */ 1805 sysctl_createv(clog, 0, NULL, NULL, 1806 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1807 CTLTYPE_INT, "syn_cache_interval", 1808 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1809 NULL, 0, &tcp_syn_cache_interval, 0, 1810 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1811 CTL_EOL); 1812 #endif 1813 sysctl_createv(clog, 0, NULL, NULL, 1814 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1815 CTLTYPE_INT, "init_win", 1816 SYSCTL_DESCR("Initial TCP congestion window"), 1817 sysctl_tcp_init_win, 0, &tcp_init_win, 0, 1818 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1819 sysctl_createv(clog, 0, NULL, NULL, 1820 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1821 CTLTYPE_INT, "mss_ifmtu", 1822 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1823 NULL, 0, &tcp_mss_ifmtu, 0, 1824 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1825 sysctl_createv(clog, 0, NULL, &sack_node, 1826 CTLFLAG_PERMANENT, 1827 CTLTYPE_NODE, "sack", 1828 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1829 NULL, 0, NULL, 0, 1830 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1831 1832 /* Congctl subtree */ 1833 sysctl_createv(clog, 0, NULL, &congctl_node, 1834 CTLFLAG_PERMANENT, 1835 CTLTYPE_NODE, "congctl", 1836 SYSCTL_DESCR("TCP Congestion Control"), 1837 NULL, 0, NULL, 0, 1838 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1839 sysctl_createv(clog, 0, &congctl_node, NULL, 1840 CTLFLAG_PERMANENT, 1841 CTLTYPE_STRING, "available", 1842 SYSCTL_DESCR("Available Congestion Control Mechanisms"), 1843 NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 1844 sysctl_createv(clog, 0, &congctl_node, NULL, 1845 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1846 CTLTYPE_STRING, "selected", 1847 SYSCTL_DESCR("Selected Congestion Control Mechanism"), 1848 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 1849 CTL_CREATE, CTL_EOL); 1850 1851 sysctl_createv(clog, 0, NULL, NULL, 1852 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1853 CTLTYPE_INT, "win_scale", 1854 SYSCTL_DESCR("Use RFC1323 window scale options"), 1855 sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0, 1856 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1857 sysctl_createv(clog, 0, NULL, NULL, 1858 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1859 CTLTYPE_INT, "timestamps", 1860 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1861 sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0, 1862 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1863 sysctl_createv(clog, 0, NULL, NULL, 1864 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1865 CTLTYPE_INT, "compat_42", 1866 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1867 NULL, 0, &tcp_compat_42, 0, 1868 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1869 sysctl_createv(clog, 0, NULL, NULL, 1870 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1871 CTLTYPE_INT, "cwm", 1872 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1873 "Monitoring"), 1874 NULL, 0, &tcp_cwm, 0, 1875 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1876 sysctl_createv(clog, 0, NULL, NULL, 1877 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1878 CTLTYPE_INT, "cwm_burstsize", 1879 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1880 "burst count in packets"), 1881 NULL, 0, &tcp_cwm_burstsize, 0, 1882 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1883 CTL_EOL); 1884 sysctl_createv(clog, 0, NULL, NULL, 1885 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1886 CTLTYPE_INT, "ack_on_push", 1887 SYSCTL_DESCR("Immediately return ACK when PSH is " 1888 "received"), 1889 NULL, 0, &tcp_ack_on_push, 0, 1890 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1891 sysctl_createv(clog, 0, NULL, NULL, 1892 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1893 CTLTYPE_INT, "keepidle", 1894 SYSCTL_DESCR("Allowed connection idle ticks before a " 1895 "keepalive probe is sent"), 1896 sysctl_tcp_keep, 0, &tcp_keepidle, 0, 1897 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1898 sysctl_createv(clog, 0, NULL, NULL, 1899 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1900 CTLTYPE_INT, "keepintvl", 1901 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1902 sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 1903 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1904 sysctl_createv(clog, 0, NULL, NULL, 1905 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1906 CTLTYPE_INT, "keepcnt", 1907 SYSCTL_DESCR("Number of keepalive probes to send"), 1908 sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 1909 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1910 sysctl_createv(clog, 0, NULL, NULL, 1911 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1912 CTLTYPE_INT, "slowhz", 1913 SYSCTL_DESCR("Keepalive ticks per second"), 1914 NULL, PR_SLOWHZ, NULL, 0, 1915 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1916 sysctl_createv(clog, 0, NULL, NULL, 1917 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1918 CTLTYPE_INT, "log_refused", 1919 SYSCTL_DESCR("Log refused TCP connections"), 1920 NULL, 0, &tcp_log_refused, 0, 1921 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1922 #if 0 /* obsoleted */ 1923 sysctl_createv(clog, 0, NULL, NULL, 1924 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1925 CTLTYPE_INT, "rstratelimit", NULL, 1926 NULL, 0, &tcp_rst_ratelim, 0, 1927 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1928 #endif 1929 sysctl_createv(clog, 0, NULL, NULL, 1930 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1931 CTLTYPE_INT, "rstppslimit", 1932 SYSCTL_DESCR("Maximum number of RST packets to send " 1933 "per second"), 1934 NULL, 0, &tcp_rst_ppslim, 0, 1935 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1936 sysctl_createv(clog, 0, NULL, NULL, 1937 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1938 CTLTYPE_INT, "delack_ticks", 1939 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1940 NULL, 0, &tcp_delack_ticks, 0, 1941 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1942 sysctl_createv(clog, 0, NULL, NULL, 1943 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1944 CTLTYPE_INT, "init_win_local", 1945 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1946 sysctl_tcp_init_win, 0, &tcp_init_win_local, 0, 1947 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1948 CTL_EOL); 1949 sysctl_createv(clog, 0, NULL, NULL, 1950 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1951 CTLTYPE_STRUCT, "ident", 1952 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1953 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1954 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1955 sysctl_createv(clog, 0, NULL, NULL, 1956 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1957 CTLTYPE_INT, "do_loopback_cksum", 1958 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1959 NULL, 0, &tcp_do_loopback_cksum, 0, 1960 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1961 CTL_EOL); 1962 sysctl_createv(clog, 0, NULL, NULL, 1963 CTLFLAG_PERMANENT, 1964 CTLTYPE_STRUCT, "pcblist", 1965 SYSCTL_DESCR("TCP protocol control block list"), 1966 sysctl_inpcblist, 0, &tcbtable, 0, 1967 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1968 CTL_EOL); 1969 sysctl_createv(clog, 0, NULL, NULL, 1970 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1971 CTLTYPE_INT, "keepinit", 1972 SYSCTL_DESCR("Ticks before initial tcp connection times out"), 1973 sysctl_tcp_keep, 0, &tcp_keepinit, 0, 1974 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1975 1976 /* TCP socket buffers auto-sizing nodes */ 1977 sysctl_createv(clog, 0, NULL, NULL, 1978 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1979 CTLTYPE_INT, "recvbuf_auto", 1980 SYSCTL_DESCR("Enable automatic receive " 1981 "buffer sizing (experimental)"), 1982 NULL, 0, &tcp_do_autorcvbuf, 0, 1983 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1984 sysctl_createv(clog, 0, NULL, NULL, 1985 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1986 CTLTYPE_INT, "recvbuf_inc", 1987 SYSCTL_DESCR("Incrementor step size of " 1988 "automatic receive buffer"), 1989 NULL, 0, &tcp_autorcvbuf_inc, 0, 1990 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1991 sysctl_createv(clog, 0, NULL, NULL, 1992 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1993 CTLTYPE_INT, "recvbuf_max", 1994 SYSCTL_DESCR("Max size of automatic receive buffer"), 1995 NULL, 0, &tcp_autorcvbuf_max, 0, 1996 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1997 1998 sysctl_createv(clog, 0, NULL, NULL, 1999 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2000 CTLTYPE_INT, "sendbuf_auto", 2001 SYSCTL_DESCR("Enable automatic send " 2002 "buffer sizing (experimental)"), 2003 NULL, 0, &tcp_do_autosndbuf, 0, 2004 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2005 sysctl_createv(clog, 0, NULL, NULL, 2006 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2007 CTLTYPE_INT, "sendbuf_inc", 2008 SYSCTL_DESCR("Incrementor step size of " 2009 "automatic send buffer"), 2010 NULL, 0, &tcp_autosndbuf_inc, 0, 2011 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2012 sysctl_createv(clog, 0, NULL, NULL, 2013 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2014 CTLTYPE_INT, "sendbuf_max", 2015 SYSCTL_DESCR("Max size of automatic send buffer"), 2016 NULL, 0, &tcp_autosndbuf_max, 0, 2017 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2018 2019 /* ECN subtree */ 2020 sysctl_createv(clog, 0, NULL, &ecn_node, 2021 CTLFLAG_PERMANENT, 2022 CTLTYPE_NODE, "ecn", 2023 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 2024 NULL, 0, NULL, 0, 2025 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2026 sysctl_createv(clog, 0, &ecn_node, NULL, 2027 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2028 CTLTYPE_INT, "enable", 2029 SYSCTL_DESCR("Enable TCP Explicit Congestion " 2030 "Notification"), 2031 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 2032 sysctl_createv(clog, 0, &ecn_node, NULL, 2033 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2034 CTLTYPE_INT, "maxretries", 2035 SYSCTL_DESCR("Number of times to retry ECN setup " 2036 "before disabling ECN on the connection"), 2037 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 2038 2039 /* SACK gets it's own little subtree. */ 2040 sysctl_createv(clog, 0, NULL, &sack_node, 2041 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2042 CTLTYPE_INT, "enable", 2043 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 2044 NULL, 0, &tcp_do_sack, 0, 2045 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2046 sysctl_createv(clog, 0, NULL, &sack_node, 2047 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2048 CTLTYPE_INT, "maxholes", 2049 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 2050 NULL, 0, &tcp_sack_tp_maxholes, 0, 2051 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2052 sysctl_createv(clog, 0, NULL, &sack_node, 2053 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2054 CTLTYPE_INT, "globalmaxholes", 2055 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 2056 NULL, 0, &tcp_sack_globalmaxholes, 0, 2057 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2058 sysctl_createv(clog, 0, NULL, &sack_node, 2059 CTLFLAG_PERMANENT, 2060 CTLTYPE_INT, "globalholes", 2061 SYSCTL_DESCR("Global number of TCP SACK holes"), 2062 NULL, 0, &tcp_sack_globalholes, 0, 2063 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2064 2065 sysctl_createv(clog, 0, NULL, NULL, 2066 CTLFLAG_PERMANENT, 2067 CTLTYPE_STRUCT, "stats", 2068 SYSCTL_DESCR("TCP statistics"), 2069 sysctl_net_inet_tcp_stats, 0, NULL, 0, 2070 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 2071 CTL_EOL); 2072 sysctl_createv(clog, 0, NULL, NULL, 2073 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2074 CTLTYPE_INT, "local_by_rtt", 2075 SYSCTL_DESCR("Use RTT estimator to decide which hosts " 2076 "are local"), 2077 NULL, 0, &tcp_rttlocal, 0, 2078 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2079 #ifdef TCP_DEBUG 2080 sysctl_createv(clog, 0, NULL, NULL, 2081 CTLFLAG_PERMANENT, 2082 CTLTYPE_STRUCT, "debug", 2083 SYSCTL_DESCR("TCP sockets debug information"), 2084 NULL, 0, &tcp_debug, sizeof(tcp_debug), 2085 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 2086 CTL_EOL); 2087 sysctl_createv(clog, 0, NULL, NULL, 2088 CTLFLAG_PERMANENT, 2089 CTLTYPE_INT, "debx", 2090 SYSCTL_DESCR("Number of TCP debug sockets messages"), 2091 NULL, 0, &tcp_debx, sizeof(tcp_debx), 2092 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 2093 CTL_EOL); 2094 #endif 2095 sysctl_createv(clog, 0, NULL, NULL, 2096 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2097 CTLTYPE_STRUCT, "drop", 2098 SYSCTL_DESCR("TCP drop connection"), 2099 sysctl_net_inet_tcp_drop, 0, NULL, 0, 2100 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2101 sysctl_createv(clog, 0, NULL, NULL, 2102 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2103 CTLTYPE_INT, "iss_hash", 2104 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2105 "hash computation"), 2106 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2107 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2108 CTL_EOL); 2109 2110 /* ABC subtree */ 2111 2112 sysctl_createv(clog, 0, NULL, &abc_node, 2113 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2114 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2115 NULL, 0, NULL, 0, 2116 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2117 sysctl_createv(clog, 0, &abc_node, NULL, 2118 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2119 CTLTYPE_INT, "enable", 2120 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2121 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2122 sysctl_createv(clog, 0, &abc_node, NULL, 2123 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2124 CTLTYPE_INT, "aggressive", 2125 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2126 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2127 2128 /* MSL tuning subtree */ 2129 2130 sysctl_createv(clog, 0, NULL, &mslt_node, 2131 CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt", 2132 SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"), 2133 NULL, 0, NULL, 0, 2134 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2135 sysctl_createv(clog, 0, &mslt_node, NULL, 2136 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2137 CTLTYPE_INT, "enable", 2138 SYSCTL_DESCR("Enable TIME_WAIT truncation"), 2139 NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL); 2140 sysctl_createv(clog, 0, &mslt_node, NULL, 2141 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2142 CTLTYPE_INT, "loopback", 2143 SYSCTL_DESCR("MSL value to use for loopback connections"), 2144 NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL); 2145 sysctl_createv(clog, 0, &mslt_node, NULL, 2146 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2147 CTLTYPE_INT, "local", 2148 SYSCTL_DESCR("MSL value to use for local connections"), 2149 NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL); 2150 sysctl_createv(clog, 0, &mslt_node, NULL, 2151 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2152 CTLTYPE_INT, "remote", 2153 SYSCTL_DESCR("MSL value to use for remote connections"), 2154 NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL); 2155 sysctl_createv(clog, 0, &mslt_node, NULL, 2156 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2157 CTLTYPE_INT, "remote_threshold", 2158 SYSCTL_DESCR("RTT estimate value to promote local to remote"), 2159 NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL); 2160 2161 /* vestigial TIME_WAIT tuning subtree */ 2162 2163 sysctl_createv(clog, 0, NULL, &vtw_node, 2164 CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw", 2165 SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"), 2166 NULL, 0, NULL, 0, 2167 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2168 sysctl_createv(clog, 0, &vtw_node, NULL, 2169 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2170 CTLTYPE_INT, "enable", 2171 SYSCTL_DESCR("Enable Vestigial TIME_WAIT"), 2172 sysctl_tcp_vtw_enable, 0, 2173 (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable, 2174 0, CTL_CREATE, CTL_EOL); 2175 sysctl_createv(clog, 0, &vtw_node, NULL, 2176 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2177 CTLTYPE_INT, "entries", 2178 SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"), 2179 NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL); 2180 } 2181 2182 void 2183 tcp_usrreq_init(void) 2184 { 2185 2186 #ifdef INET 2187 sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp"); 2188 #endif 2189 #ifdef INET6 2190 sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6"); 2191 #endif 2192 } 2193 2194 PR_WRAP_USRREQS(tcp) 2195 #define tcp_attach tcp_attach_wrapper 2196 #define tcp_detach tcp_detach_wrapper 2197 #define tcp_usrreq tcp_usrreq_wrapper 2198 2199 const struct pr_usrreqs tcp_usrreqs = { 2200 .pr_attach = tcp_attach, 2201 .pr_detach = tcp_detach, 2202 .pr_generic = tcp_usrreq, 2203 }; 2204