1 /* $NetBSD: tcp_usrreq.c,v 1.180 2014/07/01 05:49:18 rtr Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 */ 65 66 /* 67 * Copyright (c) 1982, 1986, 1988, 1993, 1995 68 * The Regents of the University of California. All rights reserved. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 3. Neither the name of the University nor the names of its contributors 79 * may be used to endorse or promote products derived from this software 80 * without specific prior written permission. 81 * 82 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 * SUCH DAMAGE. 93 * 94 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 95 */ 96 97 /* 98 * TCP protocol interface to socket abstraction. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.180 2014/07/01 05:49:18 rtr Exp $"); 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 #include "opt_tcp_debug.h" 107 #include "opt_mbuftrace.h" 108 109 #include <sys/param.h> 110 #include <sys/systm.h> 111 #include <sys/kernel.h> 112 #include <sys/malloc.h> 113 #include <sys/mbuf.h> 114 #include <sys/socket.h> 115 #include <sys/socketvar.h> 116 #include <sys/protosw.h> 117 #include <sys/errno.h> 118 #include <sys/stat.h> 119 #include <sys/proc.h> 120 #include <sys/domain.h> 121 #include <sys/sysctl.h> 122 #include <sys/kauth.h> 123 #include <sys/uidinfo.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 #include <netinet/in_offload.h> 135 136 #ifdef INET6 137 #ifndef INET 138 #include <netinet/in.h> 139 #endif 140 #include <netinet/ip6.h> 141 #include <netinet6/in6_pcb.h> 142 #include <netinet6/ip6_var.h> 143 #include <netinet6/scope6_var.h> 144 #endif 145 146 #include <netinet/tcp.h> 147 #include <netinet/tcp_fsm.h> 148 #include <netinet/tcp_seq.h> 149 #include <netinet/tcp_timer.h> 150 #include <netinet/tcp_var.h> 151 #include <netinet/tcp_private.h> 152 #include <netinet/tcp_congctl.h> 153 #include <netinet/tcpip.h> 154 #include <netinet/tcp_debug.h> 155 #include <netinet/tcp_vtw.h> 156 157 #include "opt_tcp_space.h" 158 159 /* 160 * Process a TCP user request for TCP tb. If this is a send request 161 * then m is the mbuf chain of send data. If this is a timer expiration 162 * (called from the software clock routine), then timertype tells which timer. 163 */ 164 static int 165 tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 166 struct mbuf *control, struct lwp *l) 167 { 168 struct inpcb *inp; 169 #ifdef INET6 170 struct in6pcb *in6p; 171 #endif 172 struct tcpcb *tp = NULL; 173 int s; 174 int error = 0; 175 #ifdef TCP_DEBUG 176 int ostate = 0; 177 #endif 178 int family; /* family of the socket */ 179 180 KASSERT(req != PRU_ATTACH); 181 KASSERT(req != PRU_DETACH); 182 KASSERT(req != PRU_CONTROL); 183 184 family = so->so_proto->pr_domain->dom_family; 185 186 s = splsoftnet(); 187 188 if (req == PRU_PURGEIF) { 189 mutex_enter(softnet_lock); 190 switch (family) { 191 #ifdef INET 192 case PF_INET: 193 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 194 in_purgeif((struct ifnet *)control); 195 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 196 break; 197 #endif 198 #ifdef INET6 199 case PF_INET6: 200 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 201 in6_purgeif((struct ifnet *)control); 202 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 203 break; 204 #endif 205 default: 206 mutex_exit(softnet_lock); 207 splx(s); 208 return (EAFNOSUPPORT); 209 } 210 mutex_exit(softnet_lock); 211 splx(s); 212 return (0); 213 } 214 215 KASSERT(solocked(so)); 216 217 switch (family) { 218 #ifdef INET 219 case PF_INET: 220 inp = sotoinpcb(so); 221 #ifdef INET6 222 in6p = NULL; 223 #endif 224 break; 225 #endif 226 #ifdef INET6 227 case PF_INET6: 228 inp = NULL; 229 in6p = sotoin6pcb(so); 230 break; 231 #endif 232 default: 233 splx(s); 234 return EAFNOSUPPORT; 235 } 236 KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB)); 237 #ifdef INET6 238 /* XXX: KASSERT((inp != NULL) ^ (in6p != NULL)); */ 239 #endif 240 /* 241 * When a TCP is attached to a socket, then there will be 242 * a (struct inpcb) pointed at by the socket, and this 243 * structure will point at a subsidary (struct tcpcb). 244 */ 245 if ((inp == NULL 246 #ifdef INET6 247 && in6p == NULL 248 #endif 249 ) && req != PRU_SENSE) 250 { 251 error = EINVAL; 252 goto release; 253 } 254 #ifdef INET 255 if (inp) { 256 tp = intotcpcb(inp); 257 /* WHAT IF TP IS 0? */ 258 #ifdef KPROF 259 tcp_acounts[tp->t_state][req]++; 260 #endif 261 #ifdef TCP_DEBUG 262 ostate = tp->t_state; 263 #endif 264 } 265 #endif 266 #ifdef INET6 267 if (in6p) { 268 tp = in6totcpcb(in6p); 269 /* WHAT IF TP IS 0? */ 270 #ifdef KPROF 271 tcp_acounts[tp->t_state][req]++; 272 #endif 273 #ifdef TCP_DEBUG 274 ostate = tp->t_state; 275 #endif 276 } 277 #endif 278 279 switch (req) { 280 281 /* 282 * Give the socket an address. 283 */ 284 case PRU_BIND: 285 switch (family) { 286 #ifdef INET 287 case PF_INET: 288 error = in_pcbbind(inp, nam, l); 289 break; 290 #endif 291 #ifdef INET6 292 case PF_INET6: 293 error = in6_pcbbind(in6p, nam, l); 294 if (!error) { 295 /* mapped addr case */ 296 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 297 tp->t_family = AF_INET; 298 else 299 tp->t_family = AF_INET6; 300 } 301 break; 302 #endif 303 } 304 break; 305 306 /* 307 * Prepare to accept connections. 308 */ 309 case PRU_LISTEN: 310 #ifdef INET 311 if (inp && inp->inp_lport == 0) { 312 error = in_pcbbind(inp, NULL, l); 313 if (error) 314 break; 315 } 316 #endif 317 #ifdef INET6 318 if (in6p && in6p->in6p_lport == 0) { 319 error = in6_pcbbind(in6p, NULL, l); 320 if (error) 321 break; 322 } 323 #endif 324 tp->t_state = TCPS_LISTEN; 325 break; 326 327 /* 328 * Initiate connection to peer. 329 * Create a template for use in transmissions on this connection. 330 * Enter SYN_SENT state, and mark socket as connecting. 331 * Start keep-alive timer, and seed output sequence space. 332 * Send initial segment on connection. 333 */ 334 case PRU_CONNECT: 335 #ifdef INET 336 if (inp) { 337 if (inp->inp_lport == 0) { 338 error = in_pcbbind(inp, NULL, l); 339 if (error) 340 break; 341 } 342 error = in_pcbconnect(inp, nam, l); 343 } 344 #endif 345 #ifdef INET6 346 if (in6p) { 347 if (in6p->in6p_lport == 0) { 348 error = in6_pcbbind(in6p, NULL, l); 349 if (error) 350 break; 351 } 352 error = in6_pcbconnect(in6p, nam, l); 353 if (!error) { 354 /* mapped addr case */ 355 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 356 tp->t_family = AF_INET; 357 else 358 tp->t_family = AF_INET6; 359 } 360 } 361 #endif 362 if (error) 363 break; 364 tp->t_template = tcp_template(tp); 365 if (tp->t_template == 0) { 366 #ifdef INET 367 if (inp) 368 in_pcbdisconnect(inp); 369 #endif 370 #ifdef INET6 371 if (in6p) 372 in6_pcbdisconnect(in6p); 373 #endif 374 error = ENOBUFS; 375 break; 376 } 377 /* 378 * Compute window scaling to request. 379 * XXX: This should be moved to tcp_output(). 380 */ 381 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 382 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 383 tp->request_r_scale++; 384 soisconnecting(so); 385 TCP_STATINC(TCP_STAT_CONNATTEMPT); 386 tp->t_state = TCPS_SYN_SENT; 387 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 388 tp->iss = tcp_new_iss(tp, 0); 389 tcp_sendseqinit(tp); 390 error = tcp_output(tp); 391 break; 392 393 /* 394 * Create a TCP connection between two sockets. 395 */ 396 case PRU_CONNECT2: 397 error = EOPNOTSUPP; 398 break; 399 400 /* 401 * Initiate disconnect from peer. 402 * If connection never passed embryonic stage, just drop; 403 * else if don't need to let data drain, then can just drop anyways, 404 * else have to begin TCP shutdown process: mark socket disconnecting, 405 * drain unread data, state switch to reflect user close, and 406 * send segment (e.g. FIN) to peer. Socket will be really disconnected 407 * when peer sends FIN and acks ours. 408 * 409 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 410 */ 411 case PRU_DISCONNECT: 412 tp = tcp_disconnect(tp); 413 break; 414 415 /* 416 * Accept a connection. Essentially all the work is 417 * done at higher levels; just return the address 418 * of the peer, storing through addr. 419 */ 420 case PRU_ACCEPT: 421 #ifdef INET 422 if (inp) 423 in_setpeeraddr(inp, nam); 424 #endif 425 #ifdef INET6 426 if (in6p) 427 in6_setpeeraddr(in6p, nam); 428 #endif 429 break; 430 431 /* 432 * Mark the connection as being incapable of further output. 433 */ 434 case PRU_SHUTDOWN: 435 socantsendmore(so); 436 tp = tcp_usrclosed(tp); 437 if (tp) 438 error = tcp_output(tp); 439 break; 440 441 /* 442 * After a receive, possibly send window update to peer. 443 */ 444 case PRU_RCVD: 445 /* 446 * soreceive() calls this function when a user receives 447 * ancillary data on a listening socket. We don't call 448 * tcp_output in such a case, since there is no header 449 * template for a listening socket and hence the kernel 450 * will panic. 451 */ 452 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 453 (void) tcp_output(tp); 454 break; 455 456 /* 457 * Do a send by putting data in output queue and updating urgent 458 * marker if URG set. Possibly send more data. 459 */ 460 case PRU_SEND: 461 if (control && control->m_len) { 462 m_freem(control); 463 m_freem(m); 464 error = EINVAL; 465 break; 466 } 467 sbappendstream(&so->so_snd, m); 468 error = tcp_output(tp); 469 break; 470 471 /* 472 * Abort the TCP. 473 */ 474 case PRU_ABORT: 475 tp = tcp_drop(tp, ECONNABORTED); 476 break; 477 478 case PRU_SENSE: 479 /* 480 * stat: don't bother with a blocksize. 481 */ 482 splx(s); 483 return (0); 484 485 case PRU_RCVOOB: 486 if (control && control->m_len) { 487 m_freem(control); 488 m_freem(m); 489 error = EINVAL; 490 break; 491 } 492 if ((so->so_oobmark == 0 && 493 (so->so_state & SS_RCVATMARK) == 0) || 494 so->so_options & SO_OOBINLINE || 495 tp->t_oobflags & TCPOOB_HADDATA) { 496 error = EINVAL; 497 break; 498 } 499 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 500 error = EWOULDBLOCK; 501 break; 502 } 503 m->m_len = 1; 504 *mtod(m, char *) = tp->t_iobc; 505 if (((long)nam & MSG_PEEK) == 0) 506 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 507 break; 508 509 case PRU_SENDOOB: 510 if (sbspace(&so->so_snd) < -512) { 511 m_freem(m); 512 error = ENOBUFS; 513 break; 514 } 515 /* 516 * According to RFC961 (Assigned Protocols), 517 * the urgent pointer points to the last octet 518 * of urgent data. We continue, however, 519 * to consider it to indicate the first octet 520 * of data past the urgent section. 521 * Otherwise, snd_up should be one lower. 522 */ 523 sbappendstream(&so->so_snd, m); 524 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 525 tp->t_force = 1; 526 error = tcp_output(tp); 527 tp->t_force = 0; 528 break; 529 530 case PRU_SOCKADDR: 531 #ifdef INET 532 if (inp) 533 in_setsockaddr(inp, nam); 534 #endif 535 #ifdef INET6 536 if (in6p) 537 in6_setsockaddr(in6p, nam); 538 #endif 539 break; 540 541 case PRU_PEERADDR: 542 #ifdef INET 543 if (inp) 544 in_setpeeraddr(inp, nam); 545 #endif 546 #ifdef INET6 547 if (in6p) 548 in6_setpeeraddr(in6p, nam); 549 #endif 550 break; 551 552 default: 553 panic("tcp_usrreq"); 554 } 555 #ifdef TCP_DEBUG 556 if (tp && (so->so_options & SO_DEBUG)) 557 tcp_trace(TA_USER, ostate, tp, NULL, req); 558 #endif 559 560 release: 561 splx(s); 562 return (error); 563 } 564 565 static void 566 change_keepalive(struct socket *so, struct tcpcb *tp) 567 { 568 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 569 TCP_TIMER_DISARM(tp, TCPT_KEEP); 570 TCP_TIMER_DISARM(tp, TCPT_2MSL); 571 572 if (tp->t_state == TCPS_SYN_RECEIVED || 573 tp->t_state == TCPS_SYN_SENT) { 574 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 575 } else if (so->so_options & SO_KEEPALIVE && 576 tp->t_state <= TCPS_CLOSE_WAIT) { 577 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 578 } else { 579 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 580 } 581 582 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 583 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 584 } 585 586 587 int 588 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt) 589 { 590 int error = 0, s; 591 struct inpcb *inp; 592 #ifdef INET6 593 struct in6pcb *in6p; 594 #endif 595 struct tcpcb *tp; 596 u_int ui; 597 int family; /* family of the socket */ 598 int level, optname, optval; 599 600 level = sopt->sopt_level; 601 optname = sopt->sopt_name; 602 603 family = so->so_proto->pr_domain->dom_family; 604 605 s = splsoftnet(); 606 switch (family) { 607 #ifdef INET 608 case PF_INET: 609 inp = sotoinpcb(so); 610 #ifdef INET6 611 in6p = NULL; 612 #endif 613 break; 614 #endif 615 #ifdef INET6 616 case PF_INET6: 617 inp = NULL; 618 in6p = sotoin6pcb(so); 619 break; 620 #endif 621 default: 622 splx(s); 623 panic("%s: af %d", __func__, family); 624 } 625 #ifndef INET6 626 if (inp == NULL) 627 #else 628 if (inp == NULL && in6p == NULL) 629 #endif 630 { 631 splx(s); 632 return (ECONNRESET); 633 } 634 if (level != IPPROTO_TCP) { 635 switch (family) { 636 #ifdef INET 637 case PF_INET: 638 error = ip_ctloutput(op, so, sopt); 639 break; 640 #endif 641 #ifdef INET6 642 case PF_INET6: 643 error = ip6_ctloutput(op, so, sopt); 644 break; 645 #endif 646 } 647 splx(s); 648 return (error); 649 } 650 if (inp) 651 tp = intotcpcb(inp); 652 #ifdef INET6 653 else if (in6p) 654 tp = in6totcpcb(in6p); 655 #endif 656 else 657 tp = NULL; 658 659 switch (op) { 660 case PRCO_SETOPT: 661 switch (optname) { 662 #ifdef TCP_SIGNATURE 663 case TCP_MD5SIG: 664 error = sockopt_getint(sopt, &optval); 665 if (error) 666 break; 667 if (optval > 0) 668 tp->t_flags |= TF_SIGNATURE; 669 else 670 tp->t_flags &= ~TF_SIGNATURE; 671 break; 672 #endif /* TCP_SIGNATURE */ 673 674 case TCP_NODELAY: 675 error = sockopt_getint(sopt, &optval); 676 if (error) 677 break; 678 if (optval) 679 tp->t_flags |= TF_NODELAY; 680 else 681 tp->t_flags &= ~TF_NODELAY; 682 break; 683 684 case TCP_MAXSEG: 685 error = sockopt_getint(sopt, &optval); 686 if (error) 687 break; 688 if (optval > 0 && optval <= tp->t_peermss) 689 tp->t_peermss = optval; /* limit on send size */ 690 else 691 error = EINVAL; 692 break; 693 #ifdef notyet 694 case TCP_CONGCTL: 695 /* XXX string overflow XXX */ 696 error = tcp_congctl_select(tp, sopt->sopt_data); 697 break; 698 #endif 699 700 case TCP_KEEPIDLE: 701 error = sockopt_get(sopt, &ui, sizeof(ui)); 702 if (error) 703 break; 704 if (ui > 0) { 705 tp->t_keepidle = ui; 706 change_keepalive(so, tp); 707 } else 708 error = EINVAL; 709 break; 710 711 case TCP_KEEPINTVL: 712 error = sockopt_get(sopt, &ui, sizeof(ui)); 713 if (error) 714 break; 715 if (ui > 0) { 716 tp->t_keepintvl = ui; 717 change_keepalive(so, tp); 718 } else 719 error = EINVAL; 720 break; 721 722 case TCP_KEEPCNT: 723 error = sockopt_get(sopt, &ui, sizeof(ui)); 724 if (error) 725 break; 726 if (ui > 0) { 727 tp->t_keepcnt = ui; 728 change_keepalive(so, tp); 729 } else 730 error = EINVAL; 731 break; 732 733 case TCP_KEEPINIT: 734 error = sockopt_get(sopt, &ui, sizeof(ui)); 735 if (error) 736 break; 737 if (ui > 0) { 738 tp->t_keepinit = ui; 739 change_keepalive(so, tp); 740 } else 741 error = EINVAL; 742 break; 743 744 default: 745 error = ENOPROTOOPT; 746 break; 747 } 748 break; 749 750 case PRCO_GETOPT: 751 switch (optname) { 752 #ifdef TCP_SIGNATURE 753 case TCP_MD5SIG: 754 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 755 error = sockopt_set(sopt, &optval, sizeof(optval)); 756 break; 757 #endif 758 case TCP_NODELAY: 759 optval = tp->t_flags & TF_NODELAY; 760 error = sockopt_set(sopt, &optval, sizeof(optval)); 761 break; 762 case TCP_MAXSEG: 763 optval = tp->t_peermss; 764 error = sockopt_set(sopt, &optval, sizeof(optval)); 765 break; 766 #ifdef notyet 767 case TCP_CONGCTL: 768 break; 769 #endif 770 default: 771 error = ENOPROTOOPT; 772 break; 773 } 774 break; 775 } 776 splx(s); 777 return (error); 778 } 779 780 #ifndef TCP_SENDSPACE 781 #define TCP_SENDSPACE 1024*32 782 #endif 783 int tcp_sendspace = TCP_SENDSPACE; 784 #ifndef TCP_RECVSPACE 785 #define TCP_RECVSPACE 1024*32 786 #endif 787 int tcp_recvspace = TCP_RECVSPACE; 788 789 /* 790 * tcp_attach: attach TCP protocol to socket, allocating internet protocol 791 * control block, TCP control block, buffer space and entering LISTEN state 792 * if to accept connections. 793 */ 794 static int 795 tcp_attach(struct socket *so, int proto) 796 { 797 struct tcpcb *tp; 798 struct inpcb *inp; 799 #ifdef INET6 800 struct in6pcb *in6p; 801 #endif 802 int s, error, family; 803 804 /* Assign the lock (must happen even if we will error out). */ 805 s = splsoftnet(); 806 sosetlock(so); 807 KASSERT(solocked(so)); 808 809 family = so->so_proto->pr_domain->dom_family; 810 switch (family) { 811 #ifdef INET 812 case PF_INET: 813 inp = sotoinpcb(so); 814 #ifdef INET6 815 in6p = NULL; 816 #endif 817 break; 818 #endif 819 #ifdef INET6 820 case PF_INET6: 821 inp = NULL; 822 in6p = sotoin6pcb(so); 823 break; 824 #endif 825 default: 826 error = EAFNOSUPPORT; 827 goto out; 828 } 829 830 KASSERT(inp == NULL); 831 #ifdef INET6 832 KASSERT(in6p == NULL); 833 #endif 834 835 #ifdef MBUFTRACE 836 so->so_mowner = &tcp_sock_mowner; 837 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 838 so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 839 #endif 840 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 841 error = soreserve(so, tcp_sendspace, tcp_recvspace); 842 if (error) 843 goto out; 844 } 845 846 so->so_rcv.sb_flags |= SB_AUTOSIZE; 847 so->so_snd.sb_flags |= SB_AUTOSIZE; 848 849 switch (family) { 850 #ifdef INET 851 case PF_INET: 852 error = in_pcballoc(so, &tcbtable); 853 if (error) 854 goto out; 855 inp = sotoinpcb(so); 856 #ifdef INET6 857 in6p = NULL; 858 #endif 859 break; 860 #endif 861 #ifdef INET6 862 case PF_INET6: 863 error = in6_pcballoc(so, &tcbtable); 864 if (error) 865 goto out; 866 inp = NULL; 867 in6p = sotoin6pcb(so); 868 break; 869 #endif 870 default: 871 error = EAFNOSUPPORT; 872 goto out; 873 } 874 if (inp) 875 tp = tcp_newtcpcb(family, (void *)inp); 876 #ifdef INET6 877 else if (in6p) 878 tp = tcp_newtcpcb(family, (void *)in6p); 879 #endif 880 else 881 tp = NULL; 882 883 if (tp == NULL) { 884 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 885 886 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 887 #ifdef INET 888 if (inp) 889 in_pcbdetach(inp); 890 #endif 891 #ifdef INET6 892 if (in6p) 893 in6_pcbdetach(in6p); 894 #endif 895 so->so_state |= nofd; 896 error = ENOBUFS; 897 goto out; 898 } 899 tp->t_state = TCPS_CLOSED; 900 if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 901 so->so_linger = TCP_LINGERTIME; 902 } 903 out: 904 KASSERT(solocked(so)); 905 splx(s); 906 return error; 907 } 908 909 static void 910 tcp_detach(struct socket *so) 911 { 912 struct inpcb *inp; 913 #ifdef INET6 914 struct in6pcb *in6p; 915 #endif 916 struct tcpcb *tp = NULL; 917 int s, family; 918 919 KASSERT(solocked(so)); 920 921 s = splsoftnet(); 922 family = so->so_proto->pr_domain->dom_family; 923 switch (family) { 924 #ifdef INET 925 case PF_INET: 926 inp = sotoinpcb(so); 927 tp = intotcpcb(inp); 928 break; 929 #endif 930 #ifdef INET6 931 case PF_INET6: 932 in6p = sotoin6pcb(so); 933 tp = in6totcpcb(in6p); 934 break; 935 #endif 936 default: 937 splx(s); 938 return; 939 } 940 KASSERT(tp != NULL); 941 (void)tcp_disconnect(tp); 942 splx(s); 943 } 944 945 static int 946 tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 947 { 948 switch (so->so_proto->pr_domain->dom_family) { 949 #ifdef INET 950 case PF_INET: 951 return in_control(so, cmd, nam, ifp); 952 #endif 953 #ifdef INET6 954 case PF_INET6: 955 return in6_control(so, cmd, nam, ifp); 956 #endif 957 default: 958 return EAFNOSUPPORT; 959 } 960 } 961 962 /* 963 * Initiate (or continue) disconnect. 964 * If embryonic state, just send reset (once). 965 * If in ``let data drain'' option and linger null, just drop. 966 * Otherwise (hard), mark socket disconnecting and drop 967 * current input data; switch states based on user close, and 968 * send segment to peer (with FIN). 969 */ 970 struct tcpcb * 971 tcp_disconnect(struct tcpcb *tp) 972 { 973 struct socket *so; 974 975 if (tp->t_inpcb) 976 so = tp->t_inpcb->inp_socket; 977 #ifdef INET6 978 else if (tp->t_in6pcb) 979 so = tp->t_in6pcb->in6p_socket; 980 #endif 981 else 982 so = NULL; 983 984 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 985 tp = tcp_close(tp); 986 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 987 tp = tcp_drop(tp, 0); 988 else { 989 soisdisconnecting(so); 990 sbflush(&so->so_rcv); 991 tp = tcp_usrclosed(tp); 992 if (tp) 993 (void) tcp_output(tp); 994 } 995 return (tp); 996 } 997 998 /* 999 * User issued close, and wish to trail through shutdown states: 1000 * if never received SYN, just forget it. If got a SYN from peer, 1001 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1002 * If already got a FIN from peer, then almost done; go to LAST_ACK 1003 * state. In all other cases, have already sent FIN to peer (e.g. 1004 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1005 * for peer to send FIN or not respond to keep-alives, etc. 1006 * We can let the user exit from the close as soon as the FIN is acked. 1007 */ 1008 struct tcpcb * 1009 tcp_usrclosed(struct tcpcb *tp) 1010 { 1011 1012 switch (tp->t_state) { 1013 1014 case TCPS_CLOSED: 1015 case TCPS_LISTEN: 1016 case TCPS_SYN_SENT: 1017 tp->t_state = TCPS_CLOSED; 1018 tp = tcp_close(tp); 1019 break; 1020 1021 case TCPS_SYN_RECEIVED: 1022 case TCPS_ESTABLISHED: 1023 tp->t_state = TCPS_FIN_WAIT_1; 1024 break; 1025 1026 case TCPS_CLOSE_WAIT: 1027 tp->t_state = TCPS_LAST_ACK; 1028 break; 1029 } 1030 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1031 struct socket *so; 1032 if (tp->t_inpcb) 1033 so = tp->t_inpcb->inp_socket; 1034 #ifdef INET6 1035 else if (tp->t_in6pcb) 1036 so = tp->t_in6pcb->in6p_socket; 1037 #endif 1038 else 1039 so = NULL; 1040 if (so) 1041 soisdisconnected(so); 1042 /* 1043 * If we are in FIN_WAIT_2, we arrived here because the 1044 * application did a shutdown of the send side. Like the 1045 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1046 * a full close, we start a timer to make sure sockets are 1047 * not left in FIN_WAIT_2 forever. 1048 */ 1049 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1050 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1051 else if (tp->t_state == TCPS_TIME_WAIT 1052 && ((tp->t_inpcb 1053 && (tcp4_vtw_enable & 1) 1054 && vtw_add(AF_INET, tp)) 1055 || 1056 (tp->t_in6pcb 1057 && (tcp6_vtw_enable & 1) 1058 && vtw_add(AF_INET6, tp)))) { 1059 tp = 0; 1060 } 1061 } 1062 return (tp); 1063 } 1064 1065 /* 1066 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1067 * than 32. 1068 */ 1069 static int 1070 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1071 { 1072 int error, mssdflt; 1073 struct sysctlnode node; 1074 1075 mssdflt = tcp_mssdflt; 1076 node = *rnode; 1077 node.sysctl_data = &mssdflt; 1078 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1079 if (error || newp == NULL) 1080 return (error); 1081 1082 if (mssdflt < 32) 1083 return (EINVAL); 1084 tcp_mssdflt = mssdflt; 1085 1086 mutex_enter(softnet_lock); 1087 tcp_tcpcb_template(); 1088 mutex_exit(softnet_lock); 1089 1090 return (0); 1091 } 1092 1093 /* 1094 * sysctl helper for TCP CB template update 1095 */ 1096 static int 1097 sysctl_update_tcpcb_template(SYSCTLFN_ARGS) 1098 { 1099 int t, error; 1100 struct sysctlnode node; 1101 1102 /* follow procedures in sysctl(9) manpage */ 1103 t = *(int *)rnode->sysctl_data; 1104 node = *rnode; 1105 node.sysctl_data = &t; 1106 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1107 if (error || newp == NULL) 1108 return error; 1109 1110 if (t < 0) 1111 return EINVAL; 1112 1113 *(int *)rnode->sysctl_data = t; 1114 1115 mutex_enter(softnet_lock); 1116 tcp_tcpcb_template(); 1117 mutex_exit(softnet_lock); 1118 1119 return 0; 1120 } 1121 1122 /* 1123 * sysctl helper routine for setting port related values under 1124 * net.inet.ip and net.inet6.ip6. does basic range checking and does 1125 * additional checks for each type. this code has placed in 1126 * tcp_input.c since INET and INET6 both use the same tcp code. 1127 * 1128 * this helper is not static so that both inet and inet6 can use it. 1129 */ 1130 int 1131 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1132 { 1133 int error, tmp; 1134 int apmin, apmax; 1135 #ifndef IPNOPRIVPORTS 1136 int lpmin, lpmax; 1137 #endif /* IPNOPRIVPORTS */ 1138 struct sysctlnode node; 1139 1140 if (namelen != 0) 1141 return (EINVAL); 1142 1143 switch (name[-3]) { 1144 #ifdef INET 1145 case PF_INET: 1146 apmin = anonportmin; 1147 apmax = anonportmax; 1148 #ifndef IPNOPRIVPORTS 1149 lpmin = lowportmin; 1150 lpmax = lowportmax; 1151 #endif /* IPNOPRIVPORTS */ 1152 break; 1153 #endif /* INET */ 1154 #ifdef INET6 1155 case PF_INET6: 1156 apmin = ip6_anonportmin; 1157 apmax = ip6_anonportmax; 1158 #ifndef IPNOPRIVPORTS 1159 lpmin = ip6_lowportmin; 1160 lpmax = ip6_lowportmax; 1161 #endif /* IPNOPRIVPORTS */ 1162 break; 1163 #endif /* INET6 */ 1164 default: 1165 return (EINVAL); 1166 } 1167 1168 /* 1169 * insert temporary copy into node, perform lookup on 1170 * temporary, then restore pointer 1171 */ 1172 node = *rnode; 1173 tmp = *(int*)rnode->sysctl_data; 1174 node.sysctl_data = &tmp; 1175 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1176 if (error || newp == NULL) 1177 return (error); 1178 1179 /* 1180 * simple port range check 1181 */ 1182 if (tmp < 0 || tmp > 65535) 1183 return (EINVAL); 1184 1185 /* 1186 * per-node range checks 1187 */ 1188 switch (rnode->sysctl_num) { 1189 case IPCTL_ANONPORTMIN: 1190 case IPV6CTL_ANONPORTMIN: 1191 if (tmp >= apmax) 1192 return (EINVAL); 1193 #ifndef IPNOPRIVPORTS 1194 if (tmp < IPPORT_RESERVED) 1195 return (EINVAL); 1196 #endif /* IPNOPRIVPORTS */ 1197 break; 1198 1199 case IPCTL_ANONPORTMAX: 1200 case IPV6CTL_ANONPORTMAX: 1201 if (apmin >= tmp) 1202 return (EINVAL); 1203 #ifndef IPNOPRIVPORTS 1204 if (tmp < IPPORT_RESERVED) 1205 return (EINVAL); 1206 #endif /* IPNOPRIVPORTS */ 1207 break; 1208 1209 #ifndef IPNOPRIVPORTS 1210 case IPCTL_LOWPORTMIN: 1211 case IPV6CTL_LOWPORTMIN: 1212 if (tmp >= lpmax || 1213 tmp > IPPORT_RESERVEDMAX || 1214 tmp < IPPORT_RESERVEDMIN) 1215 return (EINVAL); 1216 break; 1217 1218 case IPCTL_LOWPORTMAX: 1219 case IPV6CTL_LOWPORTMAX: 1220 if (lpmin >= tmp || 1221 tmp > IPPORT_RESERVEDMAX || 1222 tmp < IPPORT_RESERVEDMIN) 1223 return (EINVAL); 1224 break; 1225 #endif /* IPNOPRIVPORTS */ 1226 1227 default: 1228 return (EINVAL); 1229 } 1230 1231 *(int*)rnode->sysctl_data = tmp; 1232 1233 return (0); 1234 } 1235 1236 static inline int 1237 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1238 { 1239 if (oldp) { 1240 size_t sz; 1241 uid_t uid; 1242 int error; 1243 1244 if (sockp->so_cred == NULL) 1245 return EPERM; 1246 1247 uid = kauth_cred_geteuid(sockp->so_cred); 1248 sz = MIN(sizeof(uid), *oldlenp); 1249 if ((error = copyout(&uid, oldp, sz)) != 0) 1250 return error; 1251 } 1252 *oldlenp = sizeof(uid_t); 1253 return 0; 1254 } 1255 1256 static inline int 1257 inet4_ident_core(struct in_addr raddr, u_int rport, 1258 struct in_addr laddr, u_int lport, 1259 void *oldp, size_t *oldlenp, 1260 struct lwp *l, int dodrop) 1261 { 1262 struct inpcb *inp; 1263 struct socket *sockp; 1264 1265 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0); 1266 1267 if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1268 return ESRCH; 1269 1270 if (dodrop) { 1271 struct tcpcb *tp; 1272 int error; 1273 1274 if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1275 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1276 return ESRCH; 1277 1278 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1279 KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1280 if (error) 1281 return (error); 1282 1283 (void)tcp_drop(tp, ECONNABORTED); 1284 return 0; 1285 } 1286 else 1287 return copyout_uid(sockp, oldp, oldlenp); 1288 } 1289 1290 #ifdef INET6 1291 static inline int 1292 inet6_ident_core(struct in6_addr *raddr, u_int rport, 1293 struct in6_addr *laddr, u_int lport, 1294 void *oldp, size_t *oldlenp, 1295 struct lwp *l, int dodrop) 1296 { 1297 struct in6pcb *in6p; 1298 struct socket *sockp; 1299 1300 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0); 1301 1302 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL) 1303 return ESRCH; 1304 1305 if (dodrop) { 1306 struct tcpcb *tp; 1307 int error; 1308 1309 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL || 1310 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0) 1311 return ESRCH; 1312 1313 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1314 KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL); 1315 if (error) 1316 return (error); 1317 1318 (void)tcp_drop(tp, ECONNABORTED); 1319 return 0; 1320 } 1321 else 1322 return copyout_uid(sockp, oldp, oldlenp); 1323 } 1324 #endif 1325 1326 /* 1327 * sysctl helper routine for the net.inet.tcp.drop and 1328 * net.inet6.tcp6.drop nodes. 1329 */ 1330 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1331 1332 /* 1333 * sysctl helper routine for the net.inet.tcp.ident and 1334 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1335 * old way of looking up the ident information for ipv4 which involves 1336 * stuffing the port/addr pairs into the mib lookup. 1337 */ 1338 static int 1339 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1340 { 1341 #ifdef INET 1342 struct sockaddr_in *si4[2]; 1343 #endif /* INET */ 1344 #ifdef INET6 1345 struct sockaddr_in6 *si6[2]; 1346 #endif /* INET6 */ 1347 struct sockaddr_storage sa[2]; 1348 int error, pf, dodrop; 1349 1350 dodrop = name[-1] == TCPCTL_DROP; 1351 if (dodrop) { 1352 if (oldp != NULL || *oldlenp != 0) 1353 return EINVAL; 1354 if (newp == NULL) 1355 return EPERM; 1356 if (newlen < sizeof(sa)) 1357 return ENOMEM; 1358 } 1359 if (namelen != 4 && namelen != 0) 1360 return EINVAL; 1361 if (name[-2] != IPPROTO_TCP) 1362 return EINVAL; 1363 pf = name[-3]; 1364 1365 /* old style lookup, ipv4 only */ 1366 if (namelen == 4) { 1367 #ifdef INET 1368 struct in_addr laddr, raddr; 1369 u_int lport, rport; 1370 1371 if (pf != PF_INET) 1372 return EPROTONOSUPPORT; 1373 raddr.s_addr = (uint32_t)name[0]; 1374 rport = (u_int)name[1]; 1375 laddr.s_addr = (uint32_t)name[2]; 1376 lport = (u_int)name[3]; 1377 1378 mutex_enter(softnet_lock); 1379 error = inet4_ident_core(raddr, rport, laddr, lport, 1380 oldp, oldlenp, l, dodrop); 1381 mutex_exit(softnet_lock); 1382 return error; 1383 #else /* INET */ 1384 return EINVAL; 1385 #endif /* INET */ 1386 } 1387 1388 if (newp == NULL || newlen != sizeof(sa)) 1389 return EINVAL; 1390 error = copyin(newp, &sa, newlen); 1391 if (error) 1392 return error; 1393 1394 /* 1395 * requested families must match 1396 */ 1397 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1398 return EINVAL; 1399 1400 switch (pf) { 1401 #ifdef INET6 1402 case PF_INET6: 1403 si6[0] = (struct sockaddr_in6*)&sa[0]; 1404 si6[1] = (struct sockaddr_in6*)&sa[1]; 1405 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1406 si6[1]->sin6_len != sizeof(*si6[1])) 1407 return EINVAL; 1408 1409 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1410 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1411 error = sa6_embedscope(si6[0], ip6_use_defzone); 1412 if (error) 1413 return error; 1414 error = sa6_embedscope(si6[1], ip6_use_defzone); 1415 if (error) 1416 return error; 1417 1418 mutex_enter(softnet_lock); 1419 error = inet6_ident_core(&si6[0]->sin6_addr, 1420 si6[0]->sin6_port, &si6[1]->sin6_addr, 1421 si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1422 mutex_exit(softnet_lock); 1423 return error; 1424 } 1425 1426 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1427 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1428 return EINVAL; 1429 1430 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1431 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1432 /*FALLTHROUGH*/ 1433 #endif /* INET6 */ 1434 #ifdef INET 1435 case PF_INET: 1436 si4[0] = (struct sockaddr_in*)&sa[0]; 1437 si4[1] = (struct sockaddr_in*)&sa[1]; 1438 if (si4[0]->sin_len != sizeof(*si4[0]) || 1439 si4[0]->sin_len != sizeof(*si4[1])) 1440 return EINVAL; 1441 1442 mutex_enter(softnet_lock); 1443 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1444 si4[1]->sin_addr, si4[1]->sin_port, 1445 oldp, oldlenp, l, dodrop); 1446 mutex_exit(softnet_lock); 1447 return error; 1448 #endif /* INET */ 1449 default: 1450 return EPROTONOSUPPORT; 1451 } 1452 } 1453 1454 /* 1455 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1456 * inet/inet6, as well as raw pcbs for each. specifically not 1457 * declared static so that raw sockets and udp/udp6 can use it as 1458 * well. 1459 */ 1460 int 1461 sysctl_inpcblist(SYSCTLFN_ARGS) 1462 { 1463 #ifdef INET 1464 struct sockaddr_in *in; 1465 const struct inpcb *inp; 1466 #endif 1467 #ifdef INET6 1468 struct sockaddr_in6 *in6; 1469 const struct in6pcb *in6p; 1470 #endif 1471 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1472 const struct inpcb_hdr *inph; 1473 struct tcpcb *tp; 1474 struct kinfo_pcb pcb; 1475 char *dp; 1476 size_t len, needed, elem_size, out_size; 1477 int error, elem_count, pf, proto, pf2; 1478 1479 if (namelen != 4) 1480 return (EINVAL); 1481 1482 if (oldp != NULL) { 1483 len = *oldlenp; 1484 elem_size = name[2]; 1485 elem_count = name[3]; 1486 if (elem_size != sizeof(pcb)) 1487 return EINVAL; 1488 } else { 1489 len = 0; 1490 elem_count = INT_MAX; 1491 elem_size = sizeof(pcb); 1492 } 1493 error = 0; 1494 dp = oldp; 1495 out_size = elem_size; 1496 needed = 0; 1497 1498 if (namelen == 1 && name[0] == CTL_QUERY) 1499 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1500 1501 if (name - oname != 4) 1502 return (EINVAL); 1503 1504 pf = oname[1]; 1505 proto = oname[2]; 1506 pf2 = (oldp != NULL) ? pf : 0; 1507 1508 mutex_enter(softnet_lock); 1509 1510 TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1511 #ifdef INET 1512 inp = (const struct inpcb *)inph; 1513 #endif 1514 #ifdef INET6 1515 in6p = (const struct in6pcb *)inph; 1516 #endif 1517 1518 if (inph->inph_af != pf) 1519 continue; 1520 1521 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1522 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL, 1523 NULL) != 0) 1524 continue; 1525 1526 memset(&pcb, 0, sizeof(pcb)); 1527 1528 pcb.ki_family = pf; 1529 pcb.ki_type = proto; 1530 1531 switch (pf2) { 1532 case 0: 1533 /* just probing for size */ 1534 break; 1535 #ifdef INET 1536 case PF_INET: 1537 pcb.ki_family = inp->inp_socket->so_proto-> 1538 pr_domain->dom_family; 1539 pcb.ki_type = inp->inp_socket->so_proto-> 1540 pr_type; 1541 pcb.ki_protocol = inp->inp_socket->so_proto-> 1542 pr_protocol; 1543 pcb.ki_pflags = inp->inp_flags; 1544 1545 pcb.ki_sostate = inp->inp_socket->so_state; 1546 pcb.ki_prstate = inp->inp_state; 1547 if (proto == IPPROTO_TCP) { 1548 tp = intotcpcb(inp); 1549 pcb.ki_tstate = tp->t_state; 1550 pcb.ki_tflags = tp->t_flags; 1551 } 1552 1553 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1554 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1555 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1556 1557 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1558 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1559 1560 in = satosin(&pcb.ki_src); 1561 in->sin_len = sizeof(*in); 1562 in->sin_family = pf; 1563 in->sin_port = inp->inp_lport; 1564 in->sin_addr = inp->inp_laddr; 1565 if (pcb.ki_prstate >= INP_CONNECTED) { 1566 in = satosin(&pcb.ki_dst); 1567 in->sin_len = sizeof(*in); 1568 in->sin_family = pf; 1569 in->sin_port = inp->inp_fport; 1570 in->sin_addr = inp->inp_faddr; 1571 } 1572 break; 1573 #endif 1574 #ifdef INET6 1575 case PF_INET6: 1576 pcb.ki_family = in6p->in6p_socket->so_proto-> 1577 pr_domain->dom_family; 1578 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1579 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1580 pr_protocol; 1581 pcb.ki_pflags = in6p->in6p_flags; 1582 1583 pcb.ki_sostate = in6p->in6p_socket->so_state; 1584 pcb.ki_prstate = in6p->in6p_state; 1585 if (proto == IPPROTO_TCP) { 1586 tp = in6totcpcb(in6p); 1587 pcb.ki_tstate = tp->t_state; 1588 pcb.ki_tflags = tp->t_flags; 1589 } 1590 1591 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1592 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1593 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1594 1595 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1596 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1597 1598 in6 = satosin6(&pcb.ki_src); 1599 in6->sin6_len = sizeof(*in6); 1600 in6->sin6_family = pf; 1601 in6->sin6_port = in6p->in6p_lport; 1602 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1603 in6->sin6_addr = in6p->in6p_laddr; 1604 in6->sin6_scope_id = 0; /* XXX? */ 1605 1606 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1607 in6 = satosin6(&pcb.ki_dst); 1608 in6->sin6_len = sizeof(*in6); 1609 in6->sin6_family = pf; 1610 in6->sin6_port = in6p->in6p_fport; 1611 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1612 in6->sin6_addr = in6p->in6p_faddr; 1613 in6->sin6_scope_id = 0; /* XXX? */ 1614 } 1615 break; 1616 #endif 1617 } 1618 1619 if (len >= elem_size && elem_count > 0) { 1620 error = copyout(&pcb, dp, out_size); 1621 if (error) { 1622 mutex_exit(softnet_lock); 1623 return (error); 1624 } 1625 dp += elem_size; 1626 len -= elem_size; 1627 } 1628 needed += elem_size; 1629 if (elem_count > 0 && elem_count != INT_MAX) 1630 elem_count--; 1631 } 1632 1633 *oldlenp = needed; 1634 if (oldp == NULL) 1635 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1636 1637 mutex_exit(softnet_lock); 1638 1639 return (error); 1640 } 1641 1642 static int 1643 sysctl_tcp_congctl(SYSCTLFN_ARGS) 1644 { 1645 struct sysctlnode node; 1646 int error; 1647 char newname[TCPCC_MAXLEN]; 1648 1649 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1650 1651 node = *rnode; 1652 node.sysctl_data = newname; 1653 node.sysctl_size = sizeof(newname); 1654 1655 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1656 1657 if (error || 1658 newp == NULL || 1659 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1660 return error; 1661 1662 mutex_enter(softnet_lock); 1663 error = tcp_congctl_select(NULL, newname); 1664 mutex_exit(softnet_lock); 1665 1666 return error; 1667 } 1668 1669 static int 1670 sysctl_tcp_init_win(SYSCTLFN_ARGS) 1671 { 1672 int error; 1673 u_int iw; 1674 struct sysctlnode node; 1675 1676 iw = *(u_int *)rnode->sysctl_data; 1677 node = *rnode; 1678 node.sysctl_data = &iw; 1679 node.sysctl_size = sizeof(iw); 1680 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1681 if (error || newp == NULL) 1682 return error; 1683 1684 if (iw >= __arraycount(tcp_init_win_max)) 1685 return EINVAL; 1686 *(u_int *)rnode->sysctl_data = iw; 1687 return 0; 1688 } 1689 1690 static int 1691 sysctl_tcp_keep(SYSCTLFN_ARGS) 1692 { 1693 int error; 1694 u_int tmp; 1695 struct sysctlnode node; 1696 1697 node = *rnode; 1698 tmp = *(u_int *)rnode->sysctl_data; 1699 node.sysctl_data = &tmp; 1700 1701 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1702 if (error || newp == NULL) 1703 return error; 1704 1705 mutex_enter(softnet_lock); 1706 1707 *(u_int *)rnode->sysctl_data = tmp; 1708 tcp_tcpcb_template(); /* update the template */ 1709 1710 mutex_exit(softnet_lock); 1711 return 0; 1712 } 1713 1714 static int 1715 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 1716 { 1717 1718 return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS)); 1719 } 1720 1721 /* 1722 * this (second stage) setup routine is a replacement for tcp_sysctl() 1723 * (which is currently used for ipv4 and ipv6) 1724 */ 1725 static void 1726 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1727 const char *tcpname) 1728 { 1729 const struct sysctlnode *sack_node; 1730 const struct sysctlnode *abc_node; 1731 const struct sysctlnode *ecn_node; 1732 const struct sysctlnode *congctl_node; 1733 const struct sysctlnode *mslt_node; 1734 const struct sysctlnode *vtw_node; 1735 #ifdef TCP_DEBUG 1736 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1737 extern int tcp_debx; 1738 #endif 1739 1740 sysctl_createv(clog, 0, NULL, NULL, 1741 CTLFLAG_PERMANENT, 1742 CTLTYPE_NODE, pfname, NULL, 1743 NULL, 0, NULL, 0, 1744 CTL_NET, pf, CTL_EOL); 1745 sysctl_createv(clog, 0, NULL, NULL, 1746 CTLFLAG_PERMANENT, 1747 CTLTYPE_NODE, tcpname, 1748 SYSCTL_DESCR("TCP related settings"), 1749 NULL, 0, NULL, 0, 1750 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1751 1752 sysctl_createv(clog, 0, NULL, NULL, 1753 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1754 CTLTYPE_INT, "rfc1323", 1755 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1756 sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0, 1757 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1758 sysctl_createv(clog, 0, NULL, NULL, 1759 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1760 CTLTYPE_INT, "sendspace", 1761 SYSCTL_DESCR("Default TCP send buffer size"), 1762 NULL, 0, &tcp_sendspace, 0, 1763 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1764 sysctl_createv(clog, 0, NULL, NULL, 1765 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1766 CTLTYPE_INT, "recvspace", 1767 SYSCTL_DESCR("Default TCP receive buffer size"), 1768 NULL, 0, &tcp_recvspace, 0, 1769 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1770 sysctl_createv(clog, 0, NULL, NULL, 1771 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1772 CTLTYPE_INT, "mssdflt", 1773 SYSCTL_DESCR("Default maximum segment size"), 1774 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1775 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1776 sysctl_createv(clog, 0, NULL, NULL, 1777 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1778 CTLTYPE_INT, "minmss", 1779 SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 1780 NULL, 0, &tcp_minmss, 0, 1781 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1782 sysctl_createv(clog, 0, NULL, NULL, 1783 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1784 CTLTYPE_INT, "msl", 1785 SYSCTL_DESCR("Maximum Segment Life"), 1786 NULL, 0, &tcp_msl, 0, 1787 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL); 1788 sysctl_createv(clog, 0, NULL, NULL, 1789 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1790 CTLTYPE_INT, "syn_cache_limit", 1791 SYSCTL_DESCR("Maximum number of entries in the TCP " 1792 "compressed state engine"), 1793 NULL, 0, &tcp_syn_cache_limit, 0, 1794 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1795 CTL_EOL); 1796 sysctl_createv(clog, 0, NULL, NULL, 1797 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1798 CTLTYPE_INT, "syn_bucket_limit", 1799 SYSCTL_DESCR("Maximum number of entries per hash " 1800 "bucket in the TCP compressed state " 1801 "engine"), 1802 NULL, 0, &tcp_syn_bucket_limit, 0, 1803 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1804 CTL_EOL); 1805 #if 0 /* obsoleted */ 1806 sysctl_createv(clog, 0, NULL, NULL, 1807 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1808 CTLTYPE_INT, "syn_cache_interval", 1809 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1810 NULL, 0, &tcp_syn_cache_interval, 0, 1811 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1812 CTL_EOL); 1813 #endif 1814 sysctl_createv(clog, 0, NULL, NULL, 1815 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1816 CTLTYPE_INT, "init_win", 1817 SYSCTL_DESCR("Initial TCP congestion window"), 1818 sysctl_tcp_init_win, 0, &tcp_init_win, 0, 1819 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1820 sysctl_createv(clog, 0, NULL, NULL, 1821 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1822 CTLTYPE_INT, "mss_ifmtu", 1823 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1824 NULL, 0, &tcp_mss_ifmtu, 0, 1825 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1826 sysctl_createv(clog, 0, NULL, &sack_node, 1827 CTLFLAG_PERMANENT, 1828 CTLTYPE_NODE, "sack", 1829 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1830 NULL, 0, NULL, 0, 1831 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1832 1833 /* Congctl subtree */ 1834 sysctl_createv(clog, 0, NULL, &congctl_node, 1835 CTLFLAG_PERMANENT, 1836 CTLTYPE_NODE, "congctl", 1837 SYSCTL_DESCR("TCP Congestion Control"), 1838 NULL, 0, NULL, 0, 1839 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1840 sysctl_createv(clog, 0, &congctl_node, NULL, 1841 CTLFLAG_PERMANENT, 1842 CTLTYPE_STRING, "available", 1843 SYSCTL_DESCR("Available Congestion Control Mechanisms"), 1844 NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 1845 sysctl_createv(clog, 0, &congctl_node, NULL, 1846 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1847 CTLTYPE_STRING, "selected", 1848 SYSCTL_DESCR("Selected Congestion Control Mechanism"), 1849 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 1850 CTL_CREATE, CTL_EOL); 1851 1852 sysctl_createv(clog, 0, NULL, NULL, 1853 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1854 CTLTYPE_INT, "win_scale", 1855 SYSCTL_DESCR("Use RFC1323 window scale options"), 1856 sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0, 1857 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1858 sysctl_createv(clog, 0, NULL, NULL, 1859 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1860 CTLTYPE_INT, "timestamps", 1861 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1862 sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0, 1863 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1864 sysctl_createv(clog, 0, NULL, NULL, 1865 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1866 CTLTYPE_INT, "compat_42", 1867 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1868 NULL, 0, &tcp_compat_42, 0, 1869 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1870 sysctl_createv(clog, 0, NULL, NULL, 1871 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1872 CTLTYPE_INT, "cwm", 1873 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1874 "Monitoring"), 1875 NULL, 0, &tcp_cwm, 0, 1876 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1877 sysctl_createv(clog, 0, NULL, NULL, 1878 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1879 CTLTYPE_INT, "cwm_burstsize", 1880 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1881 "burst count in packets"), 1882 NULL, 0, &tcp_cwm_burstsize, 0, 1883 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1884 CTL_EOL); 1885 sysctl_createv(clog, 0, NULL, NULL, 1886 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1887 CTLTYPE_INT, "ack_on_push", 1888 SYSCTL_DESCR("Immediately return ACK when PSH is " 1889 "received"), 1890 NULL, 0, &tcp_ack_on_push, 0, 1891 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1892 sysctl_createv(clog, 0, NULL, NULL, 1893 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1894 CTLTYPE_INT, "keepidle", 1895 SYSCTL_DESCR("Allowed connection idle ticks before a " 1896 "keepalive probe is sent"), 1897 sysctl_tcp_keep, 0, &tcp_keepidle, 0, 1898 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1899 sysctl_createv(clog, 0, NULL, NULL, 1900 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1901 CTLTYPE_INT, "keepintvl", 1902 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1903 sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 1904 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1905 sysctl_createv(clog, 0, NULL, NULL, 1906 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1907 CTLTYPE_INT, "keepcnt", 1908 SYSCTL_DESCR("Number of keepalive probes to send"), 1909 sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 1910 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1911 sysctl_createv(clog, 0, NULL, NULL, 1912 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1913 CTLTYPE_INT, "slowhz", 1914 SYSCTL_DESCR("Keepalive ticks per second"), 1915 NULL, PR_SLOWHZ, NULL, 0, 1916 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1917 sysctl_createv(clog, 0, NULL, NULL, 1918 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1919 CTLTYPE_INT, "log_refused", 1920 SYSCTL_DESCR("Log refused TCP connections"), 1921 NULL, 0, &tcp_log_refused, 0, 1922 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1923 #if 0 /* obsoleted */ 1924 sysctl_createv(clog, 0, NULL, NULL, 1925 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1926 CTLTYPE_INT, "rstratelimit", NULL, 1927 NULL, 0, &tcp_rst_ratelim, 0, 1928 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1929 #endif 1930 sysctl_createv(clog, 0, NULL, NULL, 1931 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1932 CTLTYPE_INT, "rstppslimit", 1933 SYSCTL_DESCR("Maximum number of RST packets to send " 1934 "per second"), 1935 NULL, 0, &tcp_rst_ppslim, 0, 1936 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1937 sysctl_createv(clog, 0, NULL, NULL, 1938 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1939 CTLTYPE_INT, "delack_ticks", 1940 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1941 NULL, 0, &tcp_delack_ticks, 0, 1942 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1943 sysctl_createv(clog, 0, NULL, NULL, 1944 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1945 CTLTYPE_INT, "init_win_local", 1946 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1947 sysctl_tcp_init_win, 0, &tcp_init_win_local, 0, 1948 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1949 CTL_EOL); 1950 sysctl_createv(clog, 0, NULL, NULL, 1951 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1952 CTLTYPE_STRUCT, "ident", 1953 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1954 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1955 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1956 sysctl_createv(clog, 0, NULL, NULL, 1957 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1958 CTLTYPE_INT, "do_loopback_cksum", 1959 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1960 NULL, 0, &tcp_do_loopback_cksum, 0, 1961 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1962 CTL_EOL); 1963 sysctl_createv(clog, 0, NULL, NULL, 1964 CTLFLAG_PERMANENT, 1965 CTLTYPE_STRUCT, "pcblist", 1966 SYSCTL_DESCR("TCP protocol control block list"), 1967 sysctl_inpcblist, 0, &tcbtable, 0, 1968 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1969 CTL_EOL); 1970 sysctl_createv(clog, 0, NULL, NULL, 1971 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1972 CTLTYPE_INT, "keepinit", 1973 SYSCTL_DESCR("Ticks before initial tcp connection times out"), 1974 sysctl_tcp_keep, 0, &tcp_keepinit, 0, 1975 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1976 1977 /* TCP socket buffers auto-sizing nodes */ 1978 sysctl_createv(clog, 0, NULL, NULL, 1979 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1980 CTLTYPE_INT, "recvbuf_auto", 1981 SYSCTL_DESCR("Enable automatic receive " 1982 "buffer sizing (experimental)"), 1983 NULL, 0, &tcp_do_autorcvbuf, 0, 1984 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1985 sysctl_createv(clog, 0, NULL, NULL, 1986 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1987 CTLTYPE_INT, "recvbuf_inc", 1988 SYSCTL_DESCR("Incrementor step size of " 1989 "automatic receive buffer"), 1990 NULL, 0, &tcp_autorcvbuf_inc, 0, 1991 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1992 sysctl_createv(clog, 0, NULL, NULL, 1993 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1994 CTLTYPE_INT, "recvbuf_max", 1995 SYSCTL_DESCR("Max size of automatic receive buffer"), 1996 NULL, 0, &tcp_autorcvbuf_max, 0, 1997 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1998 1999 sysctl_createv(clog, 0, NULL, NULL, 2000 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2001 CTLTYPE_INT, "sendbuf_auto", 2002 SYSCTL_DESCR("Enable automatic send " 2003 "buffer sizing (experimental)"), 2004 NULL, 0, &tcp_do_autosndbuf, 0, 2005 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2006 sysctl_createv(clog, 0, NULL, NULL, 2007 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2008 CTLTYPE_INT, "sendbuf_inc", 2009 SYSCTL_DESCR("Incrementor step size of " 2010 "automatic send buffer"), 2011 NULL, 0, &tcp_autosndbuf_inc, 0, 2012 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2013 sysctl_createv(clog, 0, NULL, NULL, 2014 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2015 CTLTYPE_INT, "sendbuf_max", 2016 SYSCTL_DESCR("Max size of automatic send buffer"), 2017 NULL, 0, &tcp_autosndbuf_max, 0, 2018 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2019 2020 /* ECN subtree */ 2021 sysctl_createv(clog, 0, NULL, &ecn_node, 2022 CTLFLAG_PERMANENT, 2023 CTLTYPE_NODE, "ecn", 2024 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 2025 NULL, 0, NULL, 0, 2026 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2027 sysctl_createv(clog, 0, &ecn_node, NULL, 2028 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2029 CTLTYPE_INT, "enable", 2030 SYSCTL_DESCR("Enable TCP Explicit Congestion " 2031 "Notification"), 2032 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 2033 sysctl_createv(clog, 0, &ecn_node, NULL, 2034 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2035 CTLTYPE_INT, "maxretries", 2036 SYSCTL_DESCR("Number of times to retry ECN setup " 2037 "before disabling ECN on the connection"), 2038 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 2039 2040 /* SACK gets it's own little subtree. */ 2041 sysctl_createv(clog, 0, NULL, &sack_node, 2042 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2043 CTLTYPE_INT, "enable", 2044 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 2045 NULL, 0, &tcp_do_sack, 0, 2046 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2047 sysctl_createv(clog, 0, NULL, &sack_node, 2048 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2049 CTLTYPE_INT, "maxholes", 2050 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 2051 NULL, 0, &tcp_sack_tp_maxholes, 0, 2052 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2053 sysctl_createv(clog, 0, NULL, &sack_node, 2054 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2055 CTLTYPE_INT, "globalmaxholes", 2056 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 2057 NULL, 0, &tcp_sack_globalmaxholes, 0, 2058 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2059 sysctl_createv(clog, 0, NULL, &sack_node, 2060 CTLFLAG_PERMANENT, 2061 CTLTYPE_INT, "globalholes", 2062 SYSCTL_DESCR("Global number of TCP SACK holes"), 2063 NULL, 0, &tcp_sack_globalholes, 0, 2064 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2065 2066 sysctl_createv(clog, 0, NULL, NULL, 2067 CTLFLAG_PERMANENT, 2068 CTLTYPE_STRUCT, "stats", 2069 SYSCTL_DESCR("TCP statistics"), 2070 sysctl_net_inet_tcp_stats, 0, NULL, 0, 2071 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 2072 CTL_EOL); 2073 sysctl_createv(clog, 0, NULL, NULL, 2074 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2075 CTLTYPE_INT, "local_by_rtt", 2076 SYSCTL_DESCR("Use RTT estimator to decide which hosts " 2077 "are local"), 2078 NULL, 0, &tcp_rttlocal, 0, 2079 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2080 #ifdef TCP_DEBUG 2081 sysctl_createv(clog, 0, NULL, NULL, 2082 CTLFLAG_PERMANENT, 2083 CTLTYPE_STRUCT, "debug", 2084 SYSCTL_DESCR("TCP sockets debug information"), 2085 NULL, 0, &tcp_debug, sizeof(tcp_debug), 2086 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 2087 CTL_EOL); 2088 sysctl_createv(clog, 0, NULL, NULL, 2089 CTLFLAG_PERMANENT, 2090 CTLTYPE_INT, "debx", 2091 SYSCTL_DESCR("Number of TCP debug sockets messages"), 2092 NULL, 0, &tcp_debx, sizeof(tcp_debx), 2093 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 2094 CTL_EOL); 2095 #endif 2096 sysctl_createv(clog, 0, NULL, NULL, 2097 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2098 CTLTYPE_STRUCT, "drop", 2099 SYSCTL_DESCR("TCP drop connection"), 2100 sysctl_net_inet_tcp_drop, 0, NULL, 0, 2101 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2102 sysctl_createv(clog, 0, NULL, NULL, 2103 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2104 CTLTYPE_INT, "iss_hash", 2105 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2106 "hash computation"), 2107 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2108 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2109 CTL_EOL); 2110 2111 /* ABC subtree */ 2112 2113 sysctl_createv(clog, 0, NULL, &abc_node, 2114 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2115 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2116 NULL, 0, NULL, 0, 2117 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2118 sysctl_createv(clog, 0, &abc_node, NULL, 2119 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2120 CTLTYPE_INT, "enable", 2121 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2122 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2123 sysctl_createv(clog, 0, &abc_node, NULL, 2124 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2125 CTLTYPE_INT, "aggressive", 2126 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2127 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2128 2129 /* MSL tuning subtree */ 2130 2131 sysctl_createv(clog, 0, NULL, &mslt_node, 2132 CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt", 2133 SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"), 2134 NULL, 0, NULL, 0, 2135 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2136 sysctl_createv(clog, 0, &mslt_node, NULL, 2137 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2138 CTLTYPE_INT, "enable", 2139 SYSCTL_DESCR("Enable TIME_WAIT truncation"), 2140 NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL); 2141 sysctl_createv(clog, 0, &mslt_node, NULL, 2142 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2143 CTLTYPE_INT, "loopback", 2144 SYSCTL_DESCR("MSL value to use for loopback connections"), 2145 NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL); 2146 sysctl_createv(clog, 0, &mslt_node, NULL, 2147 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2148 CTLTYPE_INT, "local", 2149 SYSCTL_DESCR("MSL value to use for local connections"), 2150 NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL); 2151 sysctl_createv(clog, 0, &mslt_node, NULL, 2152 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2153 CTLTYPE_INT, "remote", 2154 SYSCTL_DESCR("MSL value to use for remote connections"), 2155 NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL); 2156 sysctl_createv(clog, 0, &mslt_node, NULL, 2157 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2158 CTLTYPE_INT, "remote_threshold", 2159 SYSCTL_DESCR("RTT estimate value to promote local to remote"), 2160 NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL); 2161 2162 /* vestigial TIME_WAIT tuning subtree */ 2163 2164 sysctl_createv(clog, 0, NULL, &vtw_node, 2165 CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw", 2166 SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"), 2167 NULL, 0, NULL, 0, 2168 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2169 sysctl_createv(clog, 0, &vtw_node, NULL, 2170 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2171 CTLTYPE_INT, "enable", 2172 SYSCTL_DESCR("Enable Vestigial TIME_WAIT"), 2173 sysctl_tcp_vtw_enable, 0, 2174 (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable, 2175 0, CTL_CREATE, CTL_EOL); 2176 sysctl_createv(clog, 0, &vtw_node, NULL, 2177 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2178 CTLTYPE_INT, "entries", 2179 SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"), 2180 NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL); 2181 } 2182 2183 void 2184 tcp_usrreq_init(void) 2185 { 2186 2187 #ifdef INET 2188 sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp"); 2189 #endif 2190 #ifdef INET6 2191 sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6"); 2192 #endif 2193 } 2194 2195 PR_WRAP_USRREQS(tcp) 2196 #define tcp_attach tcp_attach_wrapper 2197 #define tcp_detach tcp_detach_wrapper 2198 #define tcp_ioctl tcp_ioctl_wrapper 2199 #define tcp_usrreq tcp_usrreq_wrapper 2200 2201 const struct pr_usrreqs tcp_usrreqs = { 2202 .pr_attach = tcp_attach, 2203 .pr_detach = tcp_detach, 2204 .pr_ioctl = tcp_ioctl, 2205 .pr_generic = tcp_usrreq, 2206 }; 2207