1 /* $NetBSD: tcp_usrreq.c,v 1.205 2015/04/03 20:01:07 rtr Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 */ 65 66 /* 67 * Copyright (c) 1982, 1986, 1988, 1993, 1995 68 * The Regents of the University of California. All rights reserved. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 3. Neither the name of the University nor the names of its contributors 79 * may be used to endorse or promote products derived from this software 80 * without specific prior written permission. 81 * 82 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 * SUCH DAMAGE. 93 * 94 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 95 */ 96 97 /* 98 * TCP protocol interface to socket abstraction. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.205 2015/04/03 20:01:07 rtr Exp $"); 103 104 #include "opt_inet.h" 105 #include "opt_tcp_debug.h" 106 #include "opt_mbuftrace.h" 107 108 #include <sys/param.h> 109 #include <sys/systm.h> 110 #include <sys/kernel.h> 111 #include <sys/mbuf.h> 112 #include <sys/socket.h> 113 #include <sys/socketvar.h> 114 #include <sys/protosw.h> 115 #include <sys/errno.h> 116 #include <sys/stat.h> 117 #include <sys/proc.h> 118 #include <sys/domain.h> 119 #include <sys/sysctl.h> 120 #include <sys/kauth.h> 121 #include <sys/kernel.h> 122 #include <sys/uidinfo.h> 123 124 #include <net/if.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_systm.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip.h> 131 #include <netinet/in_pcb.h> 132 #include <netinet/ip_var.h> 133 #include <netinet/in_offload.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #include <netinet6/scope6_var.h> 143 #endif 144 145 #include <netinet/tcp.h> 146 #include <netinet/tcp_fsm.h> 147 #include <netinet/tcp_seq.h> 148 #include <netinet/tcp_timer.h> 149 #include <netinet/tcp_var.h> 150 #include <netinet/tcp_private.h> 151 #include <netinet/tcp_congctl.h> 152 #include <netinet/tcpip.h> 153 #include <netinet/tcp_debug.h> 154 #include <netinet/tcp_vtw.h> 155 156 #include "opt_tcp_space.h" 157 158 static int 159 tcp_debug_capture(struct tcpcb *tp, int req) 160 { 161 #ifdef KPROF 162 tcp_acounts[tp->t_state][req]++; 163 #endif 164 #ifdef TCP_DEBUG 165 return tp->t_state; 166 #endif 167 return 0; 168 } 169 170 static inline void 171 tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req) 172 { 173 #ifdef TCP_DEBUG 174 if (tp && (so->so_options & SO_DEBUG)) 175 tcp_trace(TA_USER, ostate, tp, NULL, req); 176 #endif 177 } 178 179 static int 180 tcp_getpcb(struct socket *so, struct inpcb **inp, 181 struct in6pcb **in6p, struct tcpcb **tp) 182 { 183 184 KASSERT(solocked(so)); 185 186 /* 187 * When a TCP is attached to a socket, then there will be 188 * a (struct inpcb) pointed at by the socket, and this 189 * structure will point at a subsidary (struct tcpcb). 190 */ 191 switch (so->so_proto->pr_domain->dom_family) { 192 #ifdef INET 193 case PF_INET: 194 *inp = sotoinpcb(so); 195 if (*inp == NULL) 196 return EINVAL; 197 *tp = intotcpcb(*inp); 198 break; 199 #endif 200 #ifdef INET6 201 case PF_INET6: 202 *in6p = sotoin6pcb(so); 203 if (*in6p == NULL) 204 return EINVAL; 205 *tp = in6totcpcb(*in6p); 206 break; 207 #endif 208 default: 209 return EAFNOSUPPORT; 210 } 211 212 KASSERT(tp != NULL); 213 214 return 0; 215 } 216 217 /* 218 * Process a TCP user request for TCP tb. If this is a send request 219 * then m is the mbuf chain of send data. If this is a timer expiration 220 * (called from the software clock routine), then timertype tells which timer. 221 */ 222 static int 223 tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 224 struct mbuf *control, struct lwp *l) 225 { 226 KASSERT(req != PRU_ATTACH); 227 KASSERT(req != PRU_DETACH); 228 KASSERT(req != PRU_ACCEPT); 229 KASSERT(req != PRU_BIND); 230 KASSERT(req != PRU_LISTEN); 231 KASSERT(req != PRU_CONNECT); 232 KASSERT(req != PRU_CONNECT2); 233 KASSERT(req != PRU_DISCONNECT); 234 KASSERT(req != PRU_SHUTDOWN); 235 KASSERT(req != PRU_ABORT); 236 KASSERT(req != PRU_CONTROL); 237 KASSERT(req != PRU_SENSE); 238 KASSERT(req != PRU_PEERADDR); 239 KASSERT(req != PRU_SOCKADDR); 240 KASSERT(req != PRU_RCVD); 241 KASSERT(req != PRU_RCVOOB); 242 KASSERT(req != PRU_SEND); 243 KASSERT(req != PRU_SENDOOB); 244 KASSERT(req != PRU_PURGEIF); 245 246 KASSERT(solocked(so)); 247 248 panic("tcp_usrreq"); 249 250 return 0; 251 } 252 253 static void 254 change_keepalive(struct socket *so, struct tcpcb *tp) 255 { 256 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 257 TCP_TIMER_DISARM(tp, TCPT_KEEP); 258 TCP_TIMER_DISARM(tp, TCPT_2MSL); 259 260 if (tp->t_state == TCPS_SYN_RECEIVED || 261 tp->t_state == TCPS_SYN_SENT) { 262 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 263 } else if (so->so_options & SO_KEEPALIVE && 264 tp->t_state <= TCPS_CLOSE_WAIT) { 265 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 266 } else { 267 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 268 } 269 270 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 271 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 272 } 273 274 /* 275 * Export TCP internal state information via a struct tcp_info, based on the 276 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 277 * (TCP state machine, etc). We export all information using FreeBSD-native 278 * constants -- for example, the numeric values for tcpi_state will differ 279 * from Linux. 280 */ 281 static void 282 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 283 { 284 285 bzero(ti, sizeof(*ti)); 286 287 ti->tcpi_state = tp->t_state; 288 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 289 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 290 if (tp->t_flags & TF_SACK_PERMIT) 291 ti->tcpi_options |= TCPI_OPT_SACK; 292 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 293 ti->tcpi_options |= TCPI_OPT_WSCALE; 294 ti->tcpi_snd_wscale = tp->snd_scale; 295 ti->tcpi_rcv_wscale = tp->rcv_scale; 296 } 297 if (tp->t_flags & TF_ECN_PERMIT) { 298 ti->tcpi_options |= TCPI_OPT_ECN; 299 } 300 301 ti->tcpi_rto = tp->t_rxtcur * tick; 302 ti->tcpi_last_data_recv = (long)(hardclock_ticks - 303 (int)tp->t_rcvtime) * tick; 304 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; 305 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; 306 307 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 308 /* Linux API wants these in # of segments, apparently */ 309 ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz; 310 ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz; 311 312 /* 313 * FreeBSD-specific extension fields for tcp_info. 314 */ 315 ti->tcpi_rcv_space = tp->rcv_wnd; 316 ti->tcpi_rcv_nxt = tp->rcv_nxt; 317 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ 318 ti->tcpi_snd_nxt = tp->snd_nxt; 319 ti->tcpi_snd_mss = tp->t_segsz; 320 ti->tcpi_rcv_mss = tp->t_segsz; 321 #ifdef TF_TOE 322 if (tp->t_flags & TF_TOE) 323 ti->tcpi_options |= TCPI_OPT_TOE; 324 #endif 325 /* From the redundant department of redundancies... */ 326 ti->__tcpi_retransmits = ti->__tcpi_retrans = 327 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; 328 329 ti->tcpi_rcv_ooopack = tp->t_rcvoopack; 330 ti->tcpi_snd_zerowin = tp->t_sndzerowin; 331 } 332 333 int 334 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt) 335 { 336 int error = 0, s; 337 struct inpcb *inp; 338 #ifdef INET6 339 struct in6pcb *in6p; 340 #endif 341 struct tcpcb *tp; 342 struct tcp_info ti; 343 u_int ui; 344 int family; /* family of the socket */ 345 int level, optname, optval; 346 347 level = sopt->sopt_level; 348 optname = sopt->sopt_name; 349 350 family = so->so_proto->pr_domain->dom_family; 351 352 s = splsoftnet(); 353 switch (family) { 354 #ifdef INET 355 case PF_INET: 356 inp = sotoinpcb(so); 357 #ifdef INET6 358 in6p = NULL; 359 #endif 360 break; 361 #endif 362 #ifdef INET6 363 case PF_INET6: 364 inp = NULL; 365 in6p = sotoin6pcb(so); 366 break; 367 #endif 368 default: 369 splx(s); 370 panic("%s: af %d", __func__, family); 371 } 372 #ifndef INET6 373 if (inp == NULL) 374 #else 375 if (inp == NULL && in6p == NULL) 376 #endif 377 { 378 splx(s); 379 return (ECONNRESET); 380 } 381 if (level != IPPROTO_TCP) { 382 switch (family) { 383 #ifdef INET 384 case PF_INET: 385 error = ip_ctloutput(op, so, sopt); 386 break; 387 #endif 388 #ifdef INET6 389 case PF_INET6: 390 error = ip6_ctloutput(op, so, sopt); 391 break; 392 #endif 393 } 394 splx(s); 395 return (error); 396 } 397 if (inp) 398 tp = intotcpcb(inp); 399 #ifdef INET6 400 else if (in6p) 401 tp = in6totcpcb(in6p); 402 #endif 403 else 404 tp = NULL; 405 406 switch (op) { 407 case PRCO_SETOPT: 408 switch (optname) { 409 #ifdef TCP_SIGNATURE 410 case TCP_MD5SIG: 411 error = sockopt_getint(sopt, &optval); 412 if (error) 413 break; 414 if (optval > 0) 415 tp->t_flags |= TF_SIGNATURE; 416 else 417 tp->t_flags &= ~TF_SIGNATURE; 418 break; 419 #endif /* TCP_SIGNATURE */ 420 421 case TCP_NODELAY: 422 error = sockopt_getint(sopt, &optval); 423 if (error) 424 break; 425 if (optval) 426 tp->t_flags |= TF_NODELAY; 427 else 428 tp->t_flags &= ~TF_NODELAY; 429 break; 430 431 case TCP_MAXSEG: 432 error = sockopt_getint(sopt, &optval); 433 if (error) 434 break; 435 if (optval > 0 && optval <= tp->t_peermss) 436 tp->t_peermss = optval; /* limit on send size */ 437 else 438 error = EINVAL; 439 break; 440 #ifdef notyet 441 case TCP_CONGCTL: 442 /* XXX string overflow XXX */ 443 error = tcp_congctl_select(tp, sopt->sopt_data); 444 break; 445 #endif 446 447 case TCP_KEEPIDLE: 448 error = sockopt_get(sopt, &ui, sizeof(ui)); 449 if (error) 450 break; 451 if (ui > 0) { 452 tp->t_keepidle = ui; 453 change_keepalive(so, tp); 454 } else 455 error = EINVAL; 456 break; 457 458 case TCP_KEEPINTVL: 459 error = sockopt_get(sopt, &ui, sizeof(ui)); 460 if (error) 461 break; 462 if (ui > 0) { 463 tp->t_keepintvl = ui; 464 change_keepalive(so, tp); 465 } else 466 error = EINVAL; 467 break; 468 469 case TCP_KEEPCNT: 470 error = sockopt_get(sopt, &ui, sizeof(ui)); 471 if (error) 472 break; 473 if (ui > 0) { 474 tp->t_keepcnt = ui; 475 change_keepalive(so, tp); 476 } else 477 error = EINVAL; 478 break; 479 480 case TCP_KEEPINIT: 481 error = sockopt_get(sopt, &ui, sizeof(ui)); 482 if (error) 483 break; 484 if (ui > 0) { 485 tp->t_keepinit = ui; 486 change_keepalive(so, tp); 487 } else 488 error = EINVAL; 489 break; 490 491 default: 492 error = ENOPROTOOPT; 493 break; 494 } 495 break; 496 497 case PRCO_GETOPT: 498 switch (optname) { 499 #ifdef TCP_SIGNATURE 500 case TCP_MD5SIG: 501 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 502 error = sockopt_set(sopt, &optval, sizeof(optval)); 503 break; 504 #endif 505 case TCP_NODELAY: 506 optval = tp->t_flags & TF_NODELAY; 507 error = sockopt_set(sopt, &optval, sizeof(optval)); 508 break; 509 case TCP_MAXSEG: 510 optval = tp->t_peermss; 511 error = sockopt_set(sopt, &optval, sizeof(optval)); 512 break; 513 case TCP_INFO: 514 tcp_fill_info(tp, &ti); 515 error = sockopt_set(sopt, &ti, sizeof ti); 516 break; 517 #ifdef notyet 518 case TCP_CONGCTL: 519 break; 520 #endif 521 default: 522 error = ENOPROTOOPT; 523 break; 524 } 525 break; 526 } 527 splx(s); 528 return (error); 529 } 530 531 #ifndef TCP_SENDSPACE 532 #define TCP_SENDSPACE 1024*32 533 #endif 534 int tcp_sendspace = TCP_SENDSPACE; 535 #ifndef TCP_RECVSPACE 536 #define TCP_RECVSPACE 1024*32 537 #endif 538 int tcp_recvspace = TCP_RECVSPACE; 539 540 /* 541 * tcp_attach: attach TCP protocol to socket, allocating internet protocol 542 * control block, TCP control block, buffer space and entering LISTEN state 543 * if to accept connections. 544 */ 545 static int 546 tcp_attach(struct socket *so, int proto) 547 { 548 struct tcpcb *tp; 549 struct inpcb *inp; 550 #ifdef INET6 551 struct in6pcb *in6p; 552 #endif 553 int s, error, family; 554 555 /* Assign the lock (must happen even if we will error out). */ 556 s = splsoftnet(); 557 sosetlock(so); 558 KASSERT(solocked(so)); 559 560 family = so->so_proto->pr_domain->dom_family; 561 switch (family) { 562 #ifdef INET 563 case PF_INET: 564 inp = sotoinpcb(so); 565 #ifdef INET6 566 in6p = NULL; 567 #endif 568 break; 569 #endif 570 #ifdef INET6 571 case PF_INET6: 572 inp = NULL; 573 in6p = sotoin6pcb(so); 574 break; 575 #endif 576 default: 577 error = EAFNOSUPPORT; 578 goto out; 579 } 580 581 KASSERT(inp == NULL); 582 #ifdef INET6 583 KASSERT(in6p == NULL); 584 #endif 585 586 #ifdef MBUFTRACE 587 so->so_mowner = &tcp_sock_mowner; 588 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 589 so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 590 #endif 591 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 592 error = soreserve(so, tcp_sendspace, tcp_recvspace); 593 if (error) 594 goto out; 595 } 596 597 so->so_rcv.sb_flags |= SB_AUTOSIZE; 598 so->so_snd.sb_flags |= SB_AUTOSIZE; 599 600 switch (family) { 601 #ifdef INET 602 case PF_INET: 603 error = in_pcballoc(so, &tcbtable); 604 if (error) 605 goto out; 606 inp = sotoinpcb(so); 607 #ifdef INET6 608 in6p = NULL; 609 #endif 610 break; 611 #endif 612 #ifdef INET6 613 case PF_INET6: 614 error = in6_pcballoc(so, &tcbtable); 615 if (error) 616 goto out; 617 inp = NULL; 618 in6p = sotoin6pcb(so); 619 break; 620 #endif 621 default: 622 error = EAFNOSUPPORT; 623 goto out; 624 } 625 if (inp) 626 tp = tcp_newtcpcb(family, (void *)inp); 627 #ifdef INET6 628 else if (in6p) 629 tp = tcp_newtcpcb(family, (void *)in6p); 630 #endif 631 else 632 tp = NULL; 633 634 if (tp == NULL) { 635 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 636 637 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 638 #ifdef INET 639 if (inp) 640 in_pcbdetach(inp); 641 #endif 642 #ifdef INET6 643 if (in6p) 644 in6_pcbdetach(in6p); 645 #endif 646 so->so_state |= nofd; 647 error = ENOBUFS; 648 goto out; 649 } 650 tp->t_state = TCPS_CLOSED; 651 if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 652 so->so_linger = TCP_LINGERTIME; 653 } 654 out: 655 KASSERT(solocked(so)); 656 splx(s); 657 return error; 658 } 659 660 static void 661 tcp_detach(struct socket *so) 662 { 663 struct inpcb *inp = NULL; 664 struct in6pcb *in6p = NULL; 665 struct tcpcb *tp = NULL; 666 int s; 667 668 if (tcp_getpcb(so, &inp, &in6p, &tp) != 0) 669 return; 670 671 s = splsoftnet(); 672 (void)tcp_disconnect1(tp); 673 splx(s); 674 } 675 676 static int 677 tcp_accept(struct socket *so, struct mbuf *nam) 678 { 679 struct inpcb *inp = NULL; 680 struct in6pcb *in6p = NULL; 681 struct tcpcb *tp = NULL; 682 int ostate = 0; 683 int error = 0; 684 int s; 685 686 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 687 return error; 688 689 ostate = tcp_debug_capture(tp, PRU_ACCEPT); 690 691 /* 692 * Accept a connection. Essentially all the work is 693 * done at higher levels; just return the address 694 * of the peer, storing through addr. 695 */ 696 s = splsoftnet(); 697 #ifdef INET 698 if (inp) { 699 in_setpeeraddr(inp, nam); 700 } 701 #endif 702 #ifdef INET6 703 if (in6p) { 704 in6_setpeeraddr(in6p, nam); 705 } 706 #endif 707 tcp_debug_trace(so, tp, ostate, PRU_ACCEPT); 708 splx(s); 709 710 return 0; 711 } 712 713 static int 714 tcp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) 715 { 716 struct inpcb *inp = NULL; 717 struct in6pcb *in6p = NULL; 718 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 719 #ifdef INET6 720 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 721 #endif /* INET6 */ 722 struct tcpcb *tp = NULL; 723 int s; 724 int error = 0; 725 int ostate = 0; 726 727 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 728 return error; 729 730 ostate = tcp_debug_capture(tp, PRU_BIND); 731 732 /* 733 * Give the socket an address. 734 */ 735 s = splsoftnet(); 736 switch (so->so_proto->pr_domain->dom_family) { 737 #ifdef INET 738 case PF_INET: 739 error = in_pcbbind(inp, sin, l); 740 break; 741 #endif 742 #ifdef INET6 743 case PF_INET6: 744 error = in6_pcbbind(in6p, sin6, l); 745 if (!error) { 746 /* mapped addr case */ 747 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 748 tp->t_family = AF_INET; 749 else 750 tp->t_family = AF_INET6; 751 } 752 break; 753 #endif 754 } 755 tcp_debug_trace(so, tp, ostate, PRU_BIND); 756 splx(s); 757 758 return error; 759 } 760 761 static int 762 tcp_listen(struct socket *so, struct lwp *l) 763 { 764 struct inpcb *inp = NULL; 765 struct in6pcb *in6p = NULL; 766 struct tcpcb *tp = NULL; 767 int error = 0; 768 int ostate = 0; 769 int s; 770 771 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 772 return error; 773 774 ostate = tcp_debug_capture(tp, PRU_LISTEN); 775 776 /* 777 * Prepare to accept connections. 778 */ 779 s = splsoftnet(); 780 #ifdef INET 781 if (inp && inp->inp_lport == 0) { 782 error = in_pcbbind(inp, NULL, l); 783 if (error) 784 goto release; 785 } 786 #endif 787 #ifdef INET6 788 if (in6p && in6p->in6p_lport == 0) { 789 error = in6_pcbbind(in6p, NULL, l); 790 if (error) 791 goto release; 792 } 793 #endif 794 tp->t_state = TCPS_LISTEN; 795 796 release: 797 tcp_debug_trace(so, tp, ostate, PRU_LISTEN); 798 splx(s); 799 800 return error; 801 } 802 803 static int 804 tcp_connect(struct socket *so, struct mbuf *nam, struct lwp *l) 805 { 806 struct inpcb *inp = NULL; 807 struct in6pcb *in6p = NULL; 808 struct tcpcb *tp = NULL; 809 int s; 810 int error = 0; 811 int ostate = 0; 812 813 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 814 return error; 815 816 ostate = tcp_debug_capture(tp, PRU_CONNECT); 817 818 /* 819 * Initiate connection to peer. 820 * Create a template for use in transmissions on this connection. 821 * Enter SYN_SENT state, and mark socket as connecting. 822 * Start keep-alive timer, and seed output sequence space. 823 * Send initial segment on connection. 824 */ 825 s = splsoftnet(); 826 #ifdef INET 827 if (inp) { 828 if (inp->inp_lport == 0) { 829 error = in_pcbbind(inp, NULL, l); 830 if (error) 831 goto release; 832 } 833 error = in_pcbconnect(inp, nam, l); 834 } 835 #endif 836 #ifdef INET6 837 if (in6p) { 838 if (in6p->in6p_lport == 0) { 839 error = in6_pcbbind(in6p, NULL, l); 840 if (error) 841 goto release; 842 } 843 error = in6_pcbconnect(in6p, nam, l); 844 if (!error) { 845 /* mapped addr case */ 846 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 847 tp->t_family = AF_INET; 848 else 849 tp->t_family = AF_INET6; 850 } 851 } 852 #endif 853 if (error) 854 goto release; 855 tp->t_template = tcp_template(tp); 856 if (tp->t_template == 0) { 857 #ifdef INET 858 if (inp) 859 in_pcbdisconnect(inp); 860 #endif 861 #ifdef INET6 862 if (in6p) 863 in6_pcbdisconnect(in6p); 864 #endif 865 error = ENOBUFS; 866 goto release; 867 } 868 /* 869 * Compute window scaling to request. 870 * XXX: This should be moved to tcp_output(). 871 */ 872 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 873 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 874 tp->request_r_scale++; 875 soisconnecting(so); 876 TCP_STATINC(TCP_STAT_CONNATTEMPT); 877 tp->t_state = TCPS_SYN_SENT; 878 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 879 tp->iss = tcp_new_iss(tp, 0); 880 tcp_sendseqinit(tp); 881 error = tcp_output(tp); 882 883 release: 884 tcp_debug_trace(so, tp, ostate, PRU_CONNECT); 885 splx(s); 886 887 return error; 888 } 889 890 static int 891 tcp_connect2(struct socket *so, struct socket *so2) 892 { 893 struct inpcb *inp = NULL; 894 struct in6pcb *in6p = NULL; 895 struct tcpcb *tp = NULL; 896 int error = 0; 897 int ostate = 0; 898 899 KASSERT(solocked(so)); 900 901 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 902 return error; 903 904 ostate = tcp_debug_capture(tp, PRU_CONNECT2); 905 906 tcp_debug_trace(so, tp, ostate, PRU_CONNECT2); 907 908 return EOPNOTSUPP; 909 } 910 911 static int 912 tcp_disconnect(struct socket *so) 913 { 914 struct inpcb *inp = NULL; 915 struct in6pcb *in6p = NULL; 916 struct tcpcb *tp = NULL; 917 int error = 0; 918 int ostate = 0; 919 int s; 920 921 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 922 return error; 923 924 ostate = tcp_debug_capture(tp, PRU_DISCONNECT); 925 926 /* 927 * Initiate disconnect from peer. 928 * If connection never passed embryonic stage, just drop; 929 * else if don't need to let data drain, then can just drop anyways, 930 * else have to begin TCP shutdown process: mark socket disconnecting, 931 * drain unread data, state switch to reflect user close, and 932 * send segment (e.g. FIN) to peer. Socket will be really disconnected 933 * when peer sends FIN and acks ours. 934 * 935 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 936 */ 937 s = splsoftnet(); 938 tp = tcp_disconnect1(tp); 939 tcp_debug_trace(so, tp, ostate, PRU_DISCONNECT); 940 splx(s); 941 942 return error; 943 } 944 945 static int 946 tcp_shutdown(struct socket *so) 947 { 948 struct inpcb *inp = NULL; 949 struct in6pcb *in6p = NULL; 950 struct tcpcb *tp = NULL; 951 int error = 0; 952 int ostate = 0; 953 int s; 954 955 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 956 return error; 957 958 ostate = tcp_debug_capture(tp, PRU_SHUTDOWN); 959 /* 960 * Mark the connection as being incapable of further output. 961 */ 962 s = splsoftnet(); 963 socantsendmore(so); 964 tp = tcp_usrclosed(tp); 965 if (tp) 966 error = tcp_output(tp); 967 tcp_debug_trace(so, tp, ostate, PRU_SHUTDOWN); 968 splx(s); 969 970 return error; 971 } 972 973 static int 974 tcp_abort(struct socket *so) 975 { 976 struct inpcb *inp = NULL; 977 struct in6pcb *in6p = NULL; 978 struct tcpcb *tp = NULL; 979 int error = 0; 980 int ostate = 0; 981 int s; 982 983 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 984 return error; 985 986 ostate = tcp_debug_capture(tp, PRU_ABORT); 987 988 /* 989 * Abort the TCP. 990 */ 991 s = splsoftnet(); 992 tp = tcp_drop(tp, ECONNABORTED); 993 tcp_debug_trace(so, tp, ostate, PRU_ABORT); 994 splx(s); 995 996 return error; 997 } 998 999 static int 1000 tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 1001 { 1002 switch (so->so_proto->pr_domain->dom_family) { 1003 #ifdef INET 1004 case PF_INET: 1005 return in_control(so, cmd, nam, ifp); 1006 #endif 1007 #ifdef INET6 1008 case PF_INET6: 1009 return in6_control(so, cmd, nam, ifp); 1010 #endif 1011 default: 1012 return EAFNOSUPPORT; 1013 } 1014 } 1015 1016 static int 1017 tcp_stat(struct socket *so, struct stat *ub) 1018 { 1019 KASSERT(solocked(so)); 1020 1021 /* stat: don't bother with a blocksize. */ 1022 return 0; 1023 } 1024 1025 static int 1026 tcp_peeraddr(struct socket *so, struct mbuf *nam) 1027 { 1028 struct inpcb *inp = NULL; 1029 struct in6pcb *in6p = NULL; 1030 struct tcpcb *tp = NULL; 1031 int ostate = 0; 1032 int error = 0; 1033 int s; 1034 1035 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1036 return error; 1037 1038 ostate = tcp_debug_capture(tp, PRU_PEERADDR); 1039 1040 s = splsoftnet(); 1041 #ifdef INET 1042 if (inp) 1043 in_setpeeraddr(inp, nam); 1044 #endif 1045 #ifdef INET6 1046 if (in6p) 1047 in6_setpeeraddr(in6p, nam); 1048 #endif 1049 tcp_debug_trace(so, tp, ostate, PRU_PEERADDR); 1050 splx(s); 1051 1052 return 0; 1053 } 1054 1055 static int 1056 tcp_sockaddr(struct socket *so, struct mbuf *nam) 1057 { 1058 struct inpcb *inp = NULL; 1059 struct in6pcb *in6p = NULL; 1060 struct tcpcb *tp = NULL; 1061 int ostate = 0; 1062 int error = 0; 1063 int s; 1064 1065 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1066 return error; 1067 1068 ostate = tcp_debug_capture(tp, PRU_SOCKADDR); 1069 1070 s = splsoftnet(); 1071 #ifdef INET 1072 if (inp) 1073 in_setsockaddr(inp, nam); 1074 #endif 1075 #ifdef INET6 1076 if (in6p) 1077 in6_setsockaddr(in6p, nam); 1078 #endif 1079 tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR); 1080 splx(s); 1081 1082 return 0; 1083 } 1084 1085 static int 1086 tcp_rcvd(struct socket *so, int flags, struct lwp *l) 1087 { 1088 struct inpcb *inp = NULL; 1089 struct in6pcb *in6p = NULL; 1090 struct tcpcb *tp = NULL; 1091 int ostate = 0; 1092 int error = 0; 1093 int s; 1094 1095 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1096 return error; 1097 1098 ostate = tcp_debug_capture(tp, PRU_RCVD); 1099 1100 /* 1101 * After a receive, possibly send window update to peer. 1102 * 1103 * soreceive() calls this function when a user receives 1104 * ancillary data on a listening socket. We don't call 1105 * tcp_output in such a case, since there is no header 1106 * template for a listening socket and hence the kernel 1107 * will panic. 1108 */ 1109 s = splsoftnet(); 1110 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 1111 (void) tcp_output(tp); 1112 splx(s); 1113 1114 tcp_debug_trace(so, tp, ostate, PRU_RCVD); 1115 1116 return 0; 1117 } 1118 1119 static int 1120 tcp_recvoob(struct socket *so, struct mbuf *m, int flags) 1121 { 1122 struct inpcb *inp = NULL; 1123 struct in6pcb *in6p = NULL; 1124 struct tcpcb *tp = NULL; 1125 int ostate = 0; 1126 int error = 0; 1127 int s; 1128 1129 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1130 return error; 1131 1132 ostate = tcp_debug_capture(tp, PRU_RCVOOB); 1133 1134 s = splsoftnet(); 1135 if ((so->so_oobmark == 0 && 1136 (so->so_state & SS_RCVATMARK) == 0) || 1137 so->so_options & SO_OOBINLINE || 1138 tp->t_oobflags & TCPOOB_HADDATA) { 1139 splx(s); 1140 return EINVAL; 1141 } 1142 1143 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1144 splx(s); 1145 return EWOULDBLOCK; 1146 } 1147 1148 m->m_len = 1; 1149 *mtod(m, char *) = tp->t_iobc; 1150 if ((flags & MSG_PEEK) == 0) 1151 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1152 1153 tcp_debug_trace(so, tp, ostate, PRU_RCVOOB); 1154 splx(s); 1155 1156 return 0; 1157 } 1158 1159 static int 1160 tcp_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 1161 struct mbuf *control, struct lwp *l) 1162 { 1163 struct inpcb *inp = NULL; 1164 struct in6pcb *in6p = NULL; 1165 struct tcpcb *tp = NULL; 1166 int ostate = 0; 1167 int error = 0; 1168 int s; 1169 1170 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1171 return error; 1172 1173 ostate = tcp_debug_capture(tp, PRU_SEND); 1174 1175 /* 1176 * Do a send by putting data in output queue and updating urgent 1177 * marker if URG set. Possibly send more data. 1178 */ 1179 s = splsoftnet(); 1180 if (control && control->m_len) { 1181 m_freem(control); 1182 m_freem(m); 1183 tcp_debug_trace(so, tp, ostate, PRU_SEND); 1184 splx(s); 1185 return EINVAL; 1186 } 1187 1188 sbappendstream(&so->so_snd, m); 1189 error = tcp_output(tp); 1190 tcp_debug_trace(so, tp, ostate, PRU_SEND); 1191 splx(s); 1192 1193 return error; 1194 } 1195 1196 static int 1197 tcp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) 1198 { 1199 struct inpcb *inp = NULL; 1200 struct in6pcb *in6p = NULL; 1201 struct tcpcb *tp = NULL; 1202 int ostate = 0; 1203 int error = 0; 1204 int s; 1205 1206 if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) 1207 return error; 1208 1209 ostate = tcp_debug_capture(tp, PRU_SENDOOB); 1210 1211 s = splsoftnet(); 1212 if (sbspace(&so->so_snd) < -512) { 1213 m_freem(m); 1214 splx(s); 1215 return ENOBUFS; 1216 } 1217 /* 1218 * According to RFC961 (Assigned Protocols), 1219 * the urgent pointer points to the last octet 1220 * of urgent data. We continue, however, 1221 * to consider it to indicate the first octet 1222 * of data past the urgent section. 1223 * Otherwise, snd_up should be one lower. 1224 */ 1225 sbappendstream(&so->so_snd, m); 1226 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 1227 tp->t_force = 1; 1228 error = tcp_output(tp); 1229 tp->t_force = 0; 1230 tcp_debug_trace(so, tp, ostate, PRU_SENDOOB); 1231 splx(s); 1232 1233 return error; 1234 } 1235 1236 static int 1237 tcp_purgeif(struct socket *so, struct ifnet *ifp) 1238 { 1239 int s; 1240 1241 s = splsoftnet(); 1242 mutex_enter(softnet_lock); 1243 switch (so->so_proto->pr_domain->dom_family) { 1244 #ifdef INET 1245 case PF_INET: 1246 in_pcbpurgeif0(&tcbtable, ifp); 1247 in_purgeif(ifp); 1248 in_pcbpurgeif(&tcbtable, ifp); 1249 break; 1250 #endif 1251 #ifdef INET6 1252 case PF_INET6: 1253 in6_pcbpurgeif0(&tcbtable, ifp); 1254 in6_purgeif(ifp); 1255 in6_pcbpurgeif(&tcbtable, ifp); 1256 break; 1257 #endif 1258 default: 1259 mutex_exit(softnet_lock); 1260 splx(s); 1261 return EAFNOSUPPORT; 1262 } 1263 mutex_exit(softnet_lock); 1264 splx(s); 1265 1266 return 0; 1267 } 1268 1269 /* 1270 * Initiate (or continue) disconnect. 1271 * If embryonic state, just send reset (once). 1272 * If in ``let data drain'' option and linger null, just drop. 1273 * Otherwise (hard), mark socket disconnecting and drop 1274 * current input data; switch states based on user close, and 1275 * send segment to peer (with FIN). 1276 */ 1277 struct tcpcb * 1278 tcp_disconnect1(struct tcpcb *tp) 1279 { 1280 struct socket *so; 1281 1282 if (tp->t_inpcb) 1283 so = tp->t_inpcb->inp_socket; 1284 #ifdef INET6 1285 else if (tp->t_in6pcb) 1286 so = tp->t_in6pcb->in6p_socket; 1287 #endif 1288 else 1289 so = NULL; 1290 1291 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 1292 tp = tcp_close(tp); 1293 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1294 tp = tcp_drop(tp, 0); 1295 else { 1296 soisdisconnecting(so); 1297 sbflush(&so->so_rcv); 1298 tp = tcp_usrclosed(tp); 1299 if (tp) 1300 (void) tcp_output(tp); 1301 } 1302 return (tp); 1303 } 1304 1305 /* 1306 * User issued close, and wish to trail through shutdown states: 1307 * if never received SYN, just forget it. If got a SYN from peer, 1308 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1309 * If already got a FIN from peer, then almost done; go to LAST_ACK 1310 * state. In all other cases, have already sent FIN to peer (e.g. 1311 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1312 * for peer to send FIN or not respond to keep-alives, etc. 1313 * We can let the user exit from the close as soon as the FIN is acked. 1314 */ 1315 struct tcpcb * 1316 tcp_usrclosed(struct tcpcb *tp) 1317 { 1318 1319 switch (tp->t_state) { 1320 1321 case TCPS_CLOSED: 1322 case TCPS_LISTEN: 1323 case TCPS_SYN_SENT: 1324 tp->t_state = TCPS_CLOSED; 1325 tp = tcp_close(tp); 1326 break; 1327 1328 case TCPS_SYN_RECEIVED: 1329 case TCPS_ESTABLISHED: 1330 tp->t_state = TCPS_FIN_WAIT_1; 1331 break; 1332 1333 case TCPS_CLOSE_WAIT: 1334 tp->t_state = TCPS_LAST_ACK; 1335 break; 1336 } 1337 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1338 struct socket *so; 1339 if (tp->t_inpcb) 1340 so = tp->t_inpcb->inp_socket; 1341 #ifdef INET6 1342 else if (tp->t_in6pcb) 1343 so = tp->t_in6pcb->in6p_socket; 1344 #endif 1345 else 1346 so = NULL; 1347 if (so) 1348 soisdisconnected(so); 1349 /* 1350 * If we are in FIN_WAIT_2, we arrived here because the 1351 * application did a shutdown of the send side. Like the 1352 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1353 * a full close, we start a timer to make sure sockets are 1354 * not left in FIN_WAIT_2 forever. 1355 */ 1356 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1357 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1358 else if (tp->t_state == TCPS_TIME_WAIT 1359 && ((tp->t_inpcb 1360 && (tcp4_vtw_enable & 1) 1361 && vtw_add(AF_INET, tp)) 1362 || 1363 (tp->t_in6pcb 1364 && (tcp6_vtw_enable & 1) 1365 && vtw_add(AF_INET6, tp)))) { 1366 tp = 0; 1367 } 1368 } 1369 return (tp); 1370 } 1371 1372 /* 1373 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1374 * than 32. 1375 */ 1376 static int 1377 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1378 { 1379 int error, mssdflt; 1380 struct sysctlnode node; 1381 1382 mssdflt = tcp_mssdflt; 1383 node = *rnode; 1384 node.sysctl_data = &mssdflt; 1385 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1386 if (error || newp == NULL) 1387 return (error); 1388 1389 if (mssdflt < 32) 1390 return (EINVAL); 1391 tcp_mssdflt = mssdflt; 1392 1393 mutex_enter(softnet_lock); 1394 tcp_tcpcb_template(); 1395 mutex_exit(softnet_lock); 1396 1397 return (0); 1398 } 1399 1400 /* 1401 * sysctl helper for TCP CB template update 1402 */ 1403 static int 1404 sysctl_update_tcpcb_template(SYSCTLFN_ARGS) 1405 { 1406 int t, error; 1407 struct sysctlnode node; 1408 1409 /* follow procedures in sysctl(9) manpage */ 1410 t = *(int *)rnode->sysctl_data; 1411 node = *rnode; 1412 node.sysctl_data = &t; 1413 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1414 if (error || newp == NULL) 1415 return error; 1416 1417 if (t < 0) 1418 return EINVAL; 1419 1420 *(int *)rnode->sysctl_data = t; 1421 1422 mutex_enter(softnet_lock); 1423 tcp_tcpcb_template(); 1424 mutex_exit(softnet_lock); 1425 1426 return 0; 1427 } 1428 1429 /* 1430 * sysctl helper routine for setting port related values under 1431 * net.inet.ip and net.inet6.ip6. does basic range checking and does 1432 * additional checks for each type. this code has placed in 1433 * tcp_input.c since INET and INET6 both use the same tcp code. 1434 * 1435 * this helper is not static so that both inet and inet6 can use it. 1436 */ 1437 int 1438 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1439 { 1440 int error, tmp; 1441 int apmin, apmax; 1442 #ifndef IPNOPRIVPORTS 1443 int lpmin, lpmax; 1444 #endif /* IPNOPRIVPORTS */ 1445 struct sysctlnode node; 1446 1447 if (namelen != 0) 1448 return (EINVAL); 1449 1450 switch (name[-3]) { 1451 #ifdef INET 1452 case PF_INET: 1453 apmin = anonportmin; 1454 apmax = anonportmax; 1455 #ifndef IPNOPRIVPORTS 1456 lpmin = lowportmin; 1457 lpmax = lowportmax; 1458 #endif /* IPNOPRIVPORTS */ 1459 break; 1460 #endif /* INET */ 1461 #ifdef INET6 1462 case PF_INET6: 1463 apmin = ip6_anonportmin; 1464 apmax = ip6_anonportmax; 1465 #ifndef IPNOPRIVPORTS 1466 lpmin = ip6_lowportmin; 1467 lpmax = ip6_lowportmax; 1468 #endif /* IPNOPRIVPORTS */ 1469 break; 1470 #endif /* INET6 */ 1471 default: 1472 return (EINVAL); 1473 } 1474 1475 /* 1476 * insert temporary copy into node, perform lookup on 1477 * temporary, then restore pointer 1478 */ 1479 node = *rnode; 1480 tmp = *(int*)rnode->sysctl_data; 1481 node.sysctl_data = &tmp; 1482 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1483 if (error || newp == NULL) 1484 return (error); 1485 1486 /* 1487 * simple port range check 1488 */ 1489 if (tmp < 0 || tmp > 65535) 1490 return (EINVAL); 1491 1492 /* 1493 * per-node range checks 1494 */ 1495 switch (rnode->sysctl_num) { 1496 case IPCTL_ANONPORTMIN: 1497 case IPV6CTL_ANONPORTMIN: 1498 if (tmp >= apmax) 1499 return (EINVAL); 1500 #ifndef IPNOPRIVPORTS 1501 if (tmp < IPPORT_RESERVED) 1502 return (EINVAL); 1503 #endif /* IPNOPRIVPORTS */ 1504 break; 1505 1506 case IPCTL_ANONPORTMAX: 1507 case IPV6CTL_ANONPORTMAX: 1508 if (apmin >= tmp) 1509 return (EINVAL); 1510 #ifndef IPNOPRIVPORTS 1511 if (tmp < IPPORT_RESERVED) 1512 return (EINVAL); 1513 #endif /* IPNOPRIVPORTS */ 1514 break; 1515 1516 #ifndef IPNOPRIVPORTS 1517 case IPCTL_LOWPORTMIN: 1518 case IPV6CTL_LOWPORTMIN: 1519 if (tmp >= lpmax || 1520 tmp > IPPORT_RESERVEDMAX || 1521 tmp < IPPORT_RESERVEDMIN) 1522 return (EINVAL); 1523 break; 1524 1525 case IPCTL_LOWPORTMAX: 1526 case IPV6CTL_LOWPORTMAX: 1527 if (lpmin >= tmp || 1528 tmp > IPPORT_RESERVEDMAX || 1529 tmp < IPPORT_RESERVEDMIN) 1530 return (EINVAL); 1531 break; 1532 #endif /* IPNOPRIVPORTS */ 1533 1534 default: 1535 return (EINVAL); 1536 } 1537 1538 *(int*)rnode->sysctl_data = tmp; 1539 1540 return (0); 1541 } 1542 1543 static inline int 1544 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1545 { 1546 if (oldp) { 1547 size_t sz; 1548 uid_t uid; 1549 int error; 1550 1551 if (sockp->so_cred == NULL) 1552 return EPERM; 1553 1554 uid = kauth_cred_geteuid(sockp->so_cred); 1555 sz = MIN(sizeof(uid), *oldlenp); 1556 if ((error = copyout(&uid, oldp, sz)) != 0) 1557 return error; 1558 } 1559 *oldlenp = sizeof(uid_t); 1560 return 0; 1561 } 1562 1563 static inline int 1564 inet4_ident_core(struct in_addr raddr, u_int rport, 1565 struct in_addr laddr, u_int lport, 1566 void *oldp, size_t *oldlenp, 1567 struct lwp *l, int dodrop) 1568 { 1569 struct inpcb *inp; 1570 struct socket *sockp; 1571 1572 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0); 1573 1574 if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1575 return ESRCH; 1576 1577 if (dodrop) { 1578 struct tcpcb *tp; 1579 int error; 1580 1581 if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1582 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1583 return ESRCH; 1584 1585 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1586 KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1587 if (error) 1588 return (error); 1589 1590 (void)tcp_drop(tp, ECONNABORTED); 1591 return 0; 1592 } 1593 else 1594 return copyout_uid(sockp, oldp, oldlenp); 1595 } 1596 1597 #ifdef INET6 1598 static inline int 1599 inet6_ident_core(struct in6_addr *raddr, u_int rport, 1600 struct in6_addr *laddr, u_int lport, 1601 void *oldp, size_t *oldlenp, 1602 struct lwp *l, int dodrop) 1603 { 1604 struct in6pcb *in6p; 1605 struct socket *sockp; 1606 1607 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0); 1608 1609 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL) 1610 return ESRCH; 1611 1612 if (dodrop) { 1613 struct tcpcb *tp; 1614 int error; 1615 1616 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL || 1617 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0) 1618 return ESRCH; 1619 1620 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1621 KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL); 1622 if (error) 1623 return (error); 1624 1625 (void)tcp_drop(tp, ECONNABORTED); 1626 return 0; 1627 } 1628 else 1629 return copyout_uid(sockp, oldp, oldlenp); 1630 } 1631 #endif 1632 1633 /* 1634 * sysctl helper routine for the net.inet.tcp.drop and 1635 * net.inet6.tcp6.drop nodes. 1636 */ 1637 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1638 1639 /* 1640 * sysctl helper routine for the net.inet.tcp.ident and 1641 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1642 * old way of looking up the ident information for ipv4 which involves 1643 * stuffing the port/addr pairs into the mib lookup. 1644 */ 1645 static int 1646 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1647 { 1648 #ifdef INET 1649 struct sockaddr_in *si4[2]; 1650 #endif /* INET */ 1651 #ifdef INET6 1652 struct sockaddr_in6 *si6[2]; 1653 #endif /* INET6 */ 1654 struct sockaddr_storage sa[2]; 1655 int error, pf, dodrop; 1656 1657 dodrop = name[-1] == TCPCTL_DROP; 1658 if (dodrop) { 1659 if (oldp != NULL || *oldlenp != 0) 1660 return EINVAL; 1661 if (newp == NULL) 1662 return EPERM; 1663 if (newlen < sizeof(sa)) 1664 return ENOMEM; 1665 } 1666 if (namelen != 4 && namelen != 0) 1667 return EINVAL; 1668 if (name[-2] != IPPROTO_TCP) 1669 return EINVAL; 1670 pf = name[-3]; 1671 1672 /* old style lookup, ipv4 only */ 1673 if (namelen == 4) { 1674 #ifdef INET 1675 struct in_addr laddr, raddr; 1676 u_int lport, rport; 1677 1678 if (pf != PF_INET) 1679 return EPROTONOSUPPORT; 1680 raddr.s_addr = (uint32_t)name[0]; 1681 rport = (u_int)name[1]; 1682 laddr.s_addr = (uint32_t)name[2]; 1683 lport = (u_int)name[3]; 1684 1685 mutex_enter(softnet_lock); 1686 error = inet4_ident_core(raddr, rport, laddr, lport, 1687 oldp, oldlenp, l, dodrop); 1688 mutex_exit(softnet_lock); 1689 return error; 1690 #else /* INET */ 1691 return EINVAL; 1692 #endif /* INET */ 1693 } 1694 1695 if (newp == NULL || newlen != sizeof(sa)) 1696 return EINVAL; 1697 error = copyin(newp, &sa, newlen); 1698 if (error) 1699 return error; 1700 1701 /* 1702 * requested families must match 1703 */ 1704 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1705 return EINVAL; 1706 1707 switch (pf) { 1708 #ifdef INET6 1709 case PF_INET6: 1710 si6[0] = (struct sockaddr_in6*)&sa[0]; 1711 si6[1] = (struct sockaddr_in6*)&sa[1]; 1712 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1713 si6[1]->sin6_len != sizeof(*si6[1])) 1714 return EINVAL; 1715 1716 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1717 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1718 error = sa6_embedscope(si6[0], ip6_use_defzone); 1719 if (error) 1720 return error; 1721 error = sa6_embedscope(si6[1], ip6_use_defzone); 1722 if (error) 1723 return error; 1724 1725 mutex_enter(softnet_lock); 1726 error = inet6_ident_core(&si6[0]->sin6_addr, 1727 si6[0]->sin6_port, &si6[1]->sin6_addr, 1728 si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1729 mutex_exit(softnet_lock); 1730 return error; 1731 } 1732 1733 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1734 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1735 return EINVAL; 1736 1737 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1738 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1739 /*FALLTHROUGH*/ 1740 #endif /* INET6 */ 1741 #ifdef INET 1742 case PF_INET: 1743 si4[0] = (struct sockaddr_in*)&sa[0]; 1744 si4[1] = (struct sockaddr_in*)&sa[1]; 1745 if (si4[0]->sin_len != sizeof(*si4[0]) || 1746 si4[0]->sin_len != sizeof(*si4[1])) 1747 return EINVAL; 1748 1749 mutex_enter(softnet_lock); 1750 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1751 si4[1]->sin_addr, si4[1]->sin_port, 1752 oldp, oldlenp, l, dodrop); 1753 mutex_exit(softnet_lock); 1754 return error; 1755 #endif /* INET */ 1756 default: 1757 return EPROTONOSUPPORT; 1758 } 1759 } 1760 1761 /* 1762 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1763 * inet/inet6, as well as raw pcbs for each. specifically not 1764 * declared static so that raw sockets and udp/udp6 can use it as 1765 * well. 1766 */ 1767 int 1768 sysctl_inpcblist(SYSCTLFN_ARGS) 1769 { 1770 #ifdef INET 1771 struct sockaddr_in *in; 1772 const struct inpcb *inp; 1773 #endif 1774 #ifdef INET6 1775 struct sockaddr_in6 *in6; 1776 const struct in6pcb *in6p; 1777 #endif 1778 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1779 const struct inpcb_hdr *inph; 1780 struct tcpcb *tp; 1781 struct kinfo_pcb pcb; 1782 char *dp; 1783 size_t len, needed, elem_size, out_size; 1784 int error, elem_count, pf, proto, pf2; 1785 1786 if (namelen != 4) 1787 return (EINVAL); 1788 1789 if (oldp != NULL) { 1790 len = *oldlenp; 1791 elem_size = name[2]; 1792 elem_count = name[3]; 1793 if (elem_size != sizeof(pcb)) 1794 return EINVAL; 1795 } else { 1796 len = 0; 1797 elem_count = INT_MAX; 1798 elem_size = sizeof(pcb); 1799 } 1800 error = 0; 1801 dp = oldp; 1802 out_size = elem_size; 1803 needed = 0; 1804 1805 if (namelen == 1 && name[0] == CTL_QUERY) 1806 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1807 1808 if (name - oname != 4) 1809 return (EINVAL); 1810 1811 pf = oname[1]; 1812 proto = oname[2]; 1813 pf2 = (oldp != NULL) ? pf : 0; 1814 1815 mutex_enter(softnet_lock); 1816 1817 TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1818 #ifdef INET 1819 inp = (const struct inpcb *)inph; 1820 #endif 1821 #ifdef INET6 1822 in6p = (const struct in6pcb *)inph; 1823 #endif 1824 1825 if (inph->inph_af != pf) 1826 continue; 1827 1828 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1829 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL, 1830 NULL) != 0) 1831 continue; 1832 1833 memset(&pcb, 0, sizeof(pcb)); 1834 1835 pcb.ki_family = pf; 1836 pcb.ki_type = proto; 1837 1838 switch (pf2) { 1839 case 0: 1840 /* just probing for size */ 1841 break; 1842 #ifdef INET 1843 case PF_INET: 1844 pcb.ki_family = inp->inp_socket->so_proto-> 1845 pr_domain->dom_family; 1846 pcb.ki_type = inp->inp_socket->so_proto-> 1847 pr_type; 1848 pcb.ki_protocol = inp->inp_socket->so_proto-> 1849 pr_protocol; 1850 pcb.ki_pflags = inp->inp_flags; 1851 1852 pcb.ki_sostate = inp->inp_socket->so_state; 1853 pcb.ki_prstate = inp->inp_state; 1854 if (proto == IPPROTO_TCP) { 1855 tp = intotcpcb(inp); 1856 pcb.ki_tstate = tp->t_state; 1857 pcb.ki_tflags = tp->t_flags; 1858 } 1859 1860 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1861 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1862 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1863 1864 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1865 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1866 1867 in = satosin(&pcb.ki_src); 1868 in->sin_len = sizeof(*in); 1869 in->sin_family = pf; 1870 in->sin_port = inp->inp_lport; 1871 in->sin_addr = inp->inp_laddr; 1872 if (pcb.ki_prstate >= INP_CONNECTED) { 1873 in = satosin(&pcb.ki_dst); 1874 in->sin_len = sizeof(*in); 1875 in->sin_family = pf; 1876 in->sin_port = inp->inp_fport; 1877 in->sin_addr = inp->inp_faddr; 1878 } 1879 break; 1880 #endif 1881 #ifdef INET6 1882 case PF_INET6: 1883 pcb.ki_family = in6p->in6p_socket->so_proto-> 1884 pr_domain->dom_family; 1885 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1886 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1887 pr_protocol; 1888 pcb.ki_pflags = in6p->in6p_flags; 1889 1890 pcb.ki_sostate = in6p->in6p_socket->so_state; 1891 pcb.ki_prstate = in6p->in6p_state; 1892 if (proto == IPPROTO_TCP) { 1893 tp = in6totcpcb(in6p); 1894 pcb.ki_tstate = tp->t_state; 1895 pcb.ki_tflags = tp->t_flags; 1896 } 1897 1898 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1899 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1900 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1901 1902 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1903 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1904 1905 in6 = satosin6(&pcb.ki_src); 1906 in6->sin6_len = sizeof(*in6); 1907 in6->sin6_family = pf; 1908 in6->sin6_port = in6p->in6p_lport; 1909 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1910 in6->sin6_addr = in6p->in6p_laddr; 1911 in6->sin6_scope_id = 0; /* XXX? */ 1912 1913 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1914 in6 = satosin6(&pcb.ki_dst); 1915 in6->sin6_len = sizeof(*in6); 1916 in6->sin6_family = pf; 1917 in6->sin6_port = in6p->in6p_fport; 1918 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1919 in6->sin6_addr = in6p->in6p_faddr; 1920 in6->sin6_scope_id = 0; /* XXX? */ 1921 } 1922 break; 1923 #endif 1924 } 1925 1926 if (len >= elem_size && elem_count > 0) { 1927 error = copyout(&pcb, dp, out_size); 1928 if (error) { 1929 mutex_exit(softnet_lock); 1930 return (error); 1931 } 1932 dp += elem_size; 1933 len -= elem_size; 1934 } 1935 needed += elem_size; 1936 if (elem_count > 0 && elem_count != INT_MAX) 1937 elem_count--; 1938 } 1939 1940 *oldlenp = needed; 1941 if (oldp == NULL) 1942 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1943 1944 mutex_exit(softnet_lock); 1945 1946 return (error); 1947 } 1948 1949 static int 1950 sysctl_tcp_congctl(SYSCTLFN_ARGS) 1951 { 1952 struct sysctlnode node; 1953 int error; 1954 char newname[TCPCC_MAXLEN]; 1955 1956 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1957 1958 node = *rnode; 1959 node.sysctl_data = newname; 1960 node.sysctl_size = sizeof(newname); 1961 1962 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1963 1964 if (error || 1965 newp == NULL || 1966 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1967 return error; 1968 1969 mutex_enter(softnet_lock); 1970 error = tcp_congctl_select(NULL, newname); 1971 mutex_exit(softnet_lock); 1972 1973 return error; 1974 } 1975 1976 static int 1977 sysctl_tcp_init_win(SYSCTLFN_ARGS) 1978 { 1979 int error; 1980 u_int iw; 1981 struct sysctlnode node; 1982 1983 iw = *(u_int *)rnode->sysctl_data; 1984 node = *rnode; 1985 node.sysctl_data = &iw; 1986 node.sysctl_size = sizeof(iw); 1987 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1988 if (error || newp == NULL) 1989 return error; 1990 1991 if (iw >= __arraycount(tcp_init_win_max)) 1992 return EINVAL; 1993 *(u_int *)rnode->sysctl_data = iw; 1994 return 0; 1995 } 1996 1997 static int 1998 sysctl_tcp_keep(SYSCTLFN_ARGS) 1999 { 2000 int error; 2001 u_int tmp; 2002 struct sysctlnode node; 2003 2004 node = *rnode; 2005 tmp = *(u_int *)rnode->sysctl_data; 2006 node.sysctl_data = &tmp; 2007 2008 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2009 if (error || newp == NULL) 2010 return error; 2011 2012 mutex_enter(softnet_lock); 2013 2014 *(u_int *)rnode->sysctl_data = tmp; 2015 tcp_tcpcb_template(); /* update the template */ 2016 2017 mutex_exit(softnet_lock); 2018 return 0; 2019 } 2020 2021 static int 2022 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 2023 { 2024 2025 return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS)); 2026 } 2027 2028 /* 2029 * this (second stage) setup routine is a replacement for tcp_sysctl() 2030 * (which is currently used for ipv4 and ipv6) 2031 */ 2032 static void 2033 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 2034 const char *tcpname) 2035 { 2036 const struct sysctlnode *sack_node; 2037 const struct sysctlnode *abc_node; 2038 const struct sysctlnode *ecn_node; 2039 const struct sysctlnode *congctl_node; 2040 const struct sysctlnode *mslt_node; 2041 const struct sysctlnode *vtw_node; 2042 #ifdef TCP_DEBUG 2043 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 2044 extern int tcp_debx; 2045 #endif 2046 2047 sysctl_createv(clog, 0, NULL, NULL, 2048 CTLFLAG_PERMANENT, 2049 CTLTYPE_NODE, pfname, NULL, 2050 NULL, 0, NULL, 0, 2051 CTL_NET, pf, CTL_EOL); 2052 sysctl_createv(clog, 0, NULL, NULL, 2053 CTLFLAG_PERMANENT, 2054 CTLTYPE_NODE, tcpname, 2055 SYSCTL_DESCR("TCP related settings"), 2056 NULL, 0, NULL, 0, 2057 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 2058 2059 sysctl_createv(clog, 0, NULL, NULL, 2060 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2061 CTLTYPE_INT, "rfc1323", 2062 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 2063 sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0, 2064 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 2065 sysctl_createv(clog, 0, NULL, NULL, 2066 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2067 CTLTYPE_INT, "sendspace", 2068 SYSCTL_DESCR("Default TCP send buffer size"), 2069 NULL, 0, &tcp_sendspace, 0, 2070 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 2071 sysctl_createv(clog, 0, NULL, NULL, 2072 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2073 CTLTYPE_INT, "recvspace", 2074 SYSCTL_DESCR("Default TCP receive buffer size"), 2075 NULL, 0, &tcp_recvspace, 0, 2076 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 2077 sysctl_createv(clog, 0, NULL, NULL, 2078 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2079 CTLTYPE_INT, "mssdflt", 2080 SYSCTL_DESCR("Default maximum segment size"), 2081 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 2082 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 2083 sysctl_createv(clog, 0, NULL, NULL, 2084 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2085 CTLTYPE_INT, "minmss", 2086 SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 2087 NULL, 0, &tcp_minmss, 0, 2088 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2089 sysctl_createv(clog, 0, NULL, NULL, 2090 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2091 CTLTYPE_INT, "msl", 2092 SYSCTL_DESCR("Maximum Segment Life"), 2093 NULL, 0, &tcp_msl, 0, 2094 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL); 2095 sysctl_createv(clog, 0, NULL, NULL, 2096 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2097 CTLTYPE_INT, "syn_cache_limit", 2098 SYSCTL_DESCR("Maximum number of entries in the TCP " 2099 "compressed state engine"), 2100 NULL, 0, &tcp_syn_cache_limit, 0, 2101 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 2102 CTL_EOL); 2103 sysctl_createv(clog, 0, NULL, NULL, 2104 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2105 CTLTYPE_INT, "syn_bucket_limit", 2106 SYSCTL_DESCR("Maximum number of entries per hash " 2107 "bucket in the TCP compressed state " 2108 "engine"), 2109 NULL, 0, &tcp_syn_bucket_limit, 0, 2110 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 2111 CTL_EOL); 2112 #if 0 /* obsoleted */ 2113 sysctl_createv(clog, 0, NULL, NULL, 2114 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2115 CTLTYPE_INT, "syn_cache_interval", 2116 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 2117 NULL, 0, &tcp_syn_cache_interval, 0, 2118 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 2119 CTL_EOL); 2120 #endif 2121 sysctl_createv(clog, 0, NULL, NULL, 2122 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2123 CTLTYPE_INT, "init_win", 2124 SYSCTL_DESCR("Initial TCP congestion window"), 2125 sysctl_tcp_init_win, 0, &tcp_init_win, 0, 2126 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 2127 sysctl_createv(clog, 0, NULL, NULL, 2128 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2129 CTLTYPE_INT, "mss_ifmtu", 2130 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 2131 NULL, 0, &tcp_mss_ifmtu, 0, 2132 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 2133 sysctl_createv(clog, 0, NULL, &sack_node, 2134 CTLFLAG_PERMANENT, 2135 CTLTYPE_NODE, "sack", 2136 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 2137 NULL, 0, NULL, 0, 2138 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 2139 2140 /* Congctl subtree */ 2141 sysctl_createv(clog, 0, NULL, &congctl_node, 2142 CTLFLAG_PERMANENT, 2143 CTLTYPE_NODE, "congctl", 2144 SYSCTL_DESCR("TCP Congestion Control"), 2145 NULL, 0, NULL, 0, 2146 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2147 sysctl_createv(clog, 0, &congctl_node, NULL, 2148 CTLFLAG_PERMANENT, 2149 CTLTYPE_STRING, "available", 2150 SYSCTL_DESCR("Available Congestion Control Mechanisms"), 2151 NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 2152 sysctl_createv(clog, 0, &congctl_node, NULL, 2153 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2154 CTLTYPE_STRING, "selected", 2155 SYSCTL_DESCR("Selected Congestion Control Mechanism"), 2156 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 2157 CTL_CREATE, CTL_EOL); 2158 2159 sysctl_createv(clog, 0, NULL, NULL, 2160 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2161 CTLTYPE_INT, "win_scale", 2162 SYSCTL_DESCR("Use RFC1323 window scale options"), 2163 sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0, 2164 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 2165 sysctl_createv(clog, 0, NULL, NULL, 2166 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2167 CTLTYPE_INT, "timestamps", 2168 SYSCTL_DESCR("Use RFC1323 time stamp options"), 2169 sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0, 2170 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 2171 sysctl_createv(clog, 0, NULL, NULL, 2172 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2173 CTLTYPE_INT, "compat_42", 2174 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 2175 NULL, 0, &tcp_compat_42, 0, 2176 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 2177 sysctl_createv(clog, 0, NULL, NULL, 2178 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2179 CTLTYPE_INT, "cwm", 2180 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 2181 "Monitoring"), 2182 NULL, 0, &tcp_cwm, 0, 2183 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 2184 sysctl_createv(clog, 0, NULL, NULL, 2185 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2186 CTLTYPE_INT, "cwm_burstsize", 2187 SYSCTL_DESCR("Congestion Window Monitoring allowed " 2188 "burst count in packets"), 2189 NULL, 0, &tcp_cwm_burstsize, 0, 2190 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 2191 CTL_EOL); 2192 sysctl_createv(clog, 0, NULL, NULL, 2193 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2194 CTLTYPE_INT, "ack_on_push", 2195 SYSCTL_DESCR("Immediately return ACK when PSH is " 2196 "received"), 2197 NULL, 0, &tcp_ack_on_push, 0, 2198 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 2199 sysctl_createv(clog, 0, NULL, NULL, 2200 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2201 CTLTYPE_INT, "keepidle", 2202 SYSCTL_DESCR("Allowed connection idle ticks before a " 2203 "keepalive probe is sent"), 2204 sysctl_tcp_keep, 0, &tcp_keepidle, 0, 2205 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 2206 sysctl_createv(clog, 0, NULL, NULL, 2207 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2208 CTLTYPE_INT, "keepintvl", 2209 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 2210 sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 2211 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 2212 sysctl_createv(clog, 0, NULL, NULL, 2213 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2214 CTLTYPE_INT, "keepcnt", 2215 SYSCTL_DESCR("Number of keepalive probes to send"), 2216 sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 2217 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 2218 sysctl_createv(clog, 0, NULL, NULL, 2219 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 2220 CTLTYPE_INT, "slowhz", 2221 SYSCTL_DESCR("Keepalive ticks per second"), 2222 NULL, PR_SLOWHZ, NULL, 0, 2223 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 2224 sysctl_createv(clog, 0, NULL, NULL, 2225 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2226 CTLTYPE_INT, "log_refused", 2227 SYSCTL_DESCR("Log refused TCP connections"), 2228 NULL, 0, &tcp_log_refused, 0, 2229 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 2230 #if 0 /* obsoleted */ 2231 sysctl_createv(clog, 0, NULL, NULL, 2232 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2233 CTLTYPE_INT, "rstratelimit", NULL, 2234 NULL, 0, &tcp_rst_ratelim, 0, 2235 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 2236 #endif 2237 sysctl_createv(clog, 0, NULL, NULL, 2238 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2239 CTLTYPE_INT, "rstppslimit", 2240 SYSCTL_DESCR("Maximum number of RST packets to send " 2241 "per second"), 2242 NULL, 0, &tcp_rst_ppslim, 0, 2243 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 2244 sysctl_createv(clog, 0, NULL, NULL, 2245 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2246 CTLTYPE_INT, "delack_ticks", 2247 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 2248 NULL, 0, &tcp_delack_ticks, 0, 2249 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 2250 sysctl_createv(clog, 0, NULL, NULL, 2251 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2252 CTLTYPE_INT, "init_win_local", 2253 SYSCTL_DESCR("Initial TCP window size (in segments)"), 2254 sysctl_tcp_init_win, 0, &tcp_init_win_local, 0, 2255 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 2256 CTL_EOL); 2257 sysctl_createv(clog, 0, NULL, NULL, 2258 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2259 CTLTYPE_STRUCT, "ident", 2260 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 2261 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 2262 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 2263 sysctl_createv(clog, 0, NULL, NULL, 2264 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2265 CTLTYPE_INT, "do_loopback_cksum", 2266 SYSCTL_DESCR("Perform TCP checksum on loopback"), 2267 NULL, 0, &tcp_do_loopback_cksum, 0, 2268 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 2269 CTL_EOL); 2270 sysctl_createv(clog, 0, NULL, NULL, 2271 CTLFLAG_PERMANENT, 2272 CTLTYPE_STRUCT, "pcblist", 2273 SYSCTL_DESCR("TCP protocol control block list"), 2274 sysctl_inpcblist, 0, &tcbtable, 0, 2275 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2276 CTL_EOL); 2277 sysctl_createv(clog, 0, NULL, NULL, 2278 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2279 CTLTYPE_INT, "keepinit", 2280 SYSCTL_DESCR("Ticks before initial tcp connection times out"), 2281 sysctl_tcp_keep, 0, &tcp_keepinit, 0, 2282 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2283 2284 /* TCP socket buffers auto-sizing nodes */ 2285 sysctl_createv(clog, 0, NULL, NULL, 2286 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2287 CTLTYPE_INT, "recvbuf_auto", 2288 SYSCTL_DESCR("Enable automatic receive " 2289 "buffer sizing (experimental)"), 2290 NULL, 0, &tcp_do_autorcvbuf, 0, 2291 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2292 sysctl_createv(clog, 0, NULL, NULL, 2293 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2294 CTLTYPE_INT, "recvbuf_inc", 2295 SYSCTL_DESCR("Incrementor step size of " 2296 "automatic receive buffer"), 2297 NULL, 0, &tcp_autorcvbuf_inc, 0, 2298 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2299 sysctl_createv(clog, 0, NULL, NULL, 2300 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2301 CTLTYPE_INT, "recvbuf_max", 2302 SYSCTL_DESCR("Max size of automatic receive buffer"), 2303 NULL, 0, &tcp_autorcvbuf_max, 0, 2304 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2305 2306 sysctl_createv(clog, 0, NULL, NULL, 2307 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2308 CTLTYPE_INT, "sendbuf_auto", 2309 SYSCTL_DESCR("Enable automatic send " 2310 "buffer sizing (experimental)"), 2311 NULL, 0, &tcp_do_autosndbuf, 0, 2312 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2313 sysctl_createv(clog, 0, NULL, NULL, 2314 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2315 CTLTYPE_INT, "sendbuf_inc", 2316 SYSCTL_DESCR("Incrementor step size of " 2317 "automatic send buffer"), 2318 NULL, 0, &tcp_autosndbuf_inc, 0, 2319 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2320 sysctl_createv(clog, 0, NULL, NULL, 2321 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2322 CTLTYPE_INT, "sendbuf_max", 2323 SYSCTL_DESCR("Max size of automatic send buffer"), 2324 NULL, 0, &tcp_autosndbuf_max, 0, 2325 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2326 2327 /* ECN subtree */ 2328 sysctl_createv(clog, 0, NULL, &ecn_node, 2329 CTLFLAG_PERMANENT, 2330 CTLTYPE_NODE, "ecn", 2331 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 2332 NULL, 0, NULL, 0, 2333 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2334 sysctl_createv(clog, 0, &ecn_node, NULL, 2335 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2336 CTLTYPE_INT, "enable", 2337 SYSCTL_DESCR("Enable TCP Explicit Congestion " 2338 "Notification"), 2339 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 2340 sysctl_createv(clog, 0, &ecn_node, NULL, 2341 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2342 CTLTYPE_INT, "maxretries", 2343 SYSCTL_DESCR("Number of times to retry ECN setup " 2344 "before disabling ECN on the connection"), 2345 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 2346 2347 /* SACK gets its own little subtree. */ 2348 sysctl_createv(clog, 0, NULL, &sack_node, 2349 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2350 CTLTYPE_INT, "enable", 2351 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 2352 NULL, 0, &tcp_do_sack, 0, 2353 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2354 sysctl_createv(clog, 0, NULL, &sack_node, 2355 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2356 CTLTYPE_INT, "maxholes", 2357 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 2358 NULL, 0, &tcp_sack_tp_maxholes, 0, 2359 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2360 sysctl_createv(clog, 0, NULL, &sack_node, 2361 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2362 CTLTYPE_INT, "globalmaxholes", 2363 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 2364 NULL, 0, &tcp_sack_globalmaxholes, 0, 2365 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2366 sysctl_createv(clog, 0, NULL, &sack_node, 2367 CTLFLAG_PERMANENT, 2368 CTLTYPE_INT, "globalholes", 2369 SYSCTL_DESCR("Global number of TCP SACK holes"), 2370 NULL, 0, &tcp_sack_globalholes, 0, 2371 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2372 2373 sysctl_createv(clog, 0, NULL, NULL, 2374 CTLFLAG_PERMANENT, 2375 CTLTYPE_STRUCT, "stats", 2376 SYSCTL_DESCR("TCP statistics"), 2377 sysctl_net_inet_tcp_stats, 0, NULL, 0, 2378 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 2379 CTL_EOL); 2380 sysctl_createv(clog, 0, NULL, NULL, 2381 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2382 CTLTYPE_INT, "local_by_rtt", 2383 SYSCTL_DESCR("Use RTT estimator to decide which hosts " 2384 "are local"), 2385 NULL, 0, &tcp_rttlocal, 0, 2386 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2387 #ifdef TCP_DEBUG 2388 sysctl_createv(clog, 0, NULL, NULL, 2389 CTLFLAG_PERMANENT, 2390 CTLTYPE_STRUCT, "debug", 2391 SYSCTL_DESCR("TCP sockets debug information"), 2392 NULL, 0, &tcp_debug, sizeof(tcp_debug), 2393 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 2394 CTL_EOL); 2395 sysctl_createv(clog, 0, NULL, NULL, 2396 CTLFLAG_PERMANENT, 2397 CTLTYPE_INT, "debx", 2398 SYSCTL_DESCR("Number of TCP debug sockets messages"), 2399 NULL, 0, &tcp_debx, sizeof(tcp_debx), 2400 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 2401 CTL_EOL); 2402 #endif 2403 sysctl_createv(clog, 0, NULL, NULL, 2404 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2405 CTLTYPE_STRUCT, "drop", 2406 SYSCTL_DESCR("TCP drop connection"), 2407 sysctl_net_inet_tcp_drop, 0, NULL, 0, 2408 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2409 sysctl_createv(clog, 0, NULL, NULL, 2410 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2411 CTLTYPE_INT, "iss_hash", 2412 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2413 "hash computation"), 2414 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2415 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2416 CTL_EOL); 2417 2418 /* ABC subtree */ 2419 2420 sysctl_createv(clog, 0, NULL, &abc_node, 2421 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2422 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2423 NULL, 0, NULL, 0, 2424 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2425 sysctl_createv(clog, 0, &abc_node, NULL, 2426 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2427 CTLTYPE_INT, "enable", 2428 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2429 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2430 sysctl_createv(clog, 0, &abc_node, NULL, 2431 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2432 CTLTYPE_INT, "aggressive", 2433 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2434 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2435 2436 /* MSL tuning subtree */ 2437 2438 sysctl_createv(clog, 0, NULL, &mslt_node, 2439 CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt", 2440 SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"), 2441 NULL, 0, NULL, 0, 2442 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2443 sysctl_createv(clog, 0, &mslt_node, NULL, 2444 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2445 CTLTYPE_INT, "enable", 2446 SYSCTL_DESCR("Enable TIME_WAIT truncation"), 2447 NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL); 2448 sysctl_createv(clog, 0, &mslt_node, NULL, 2449 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2450 CTLTYPE_INT, "loopback", 2451 SYSCTL_DESCR("MSL value to use for loopback connections"), 2452 NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL); 2453 sysctl_createv(clog, 0, &mslt_node, NULL, 2454 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2455 CTLTYPE_INT, "local", 2456 SYSCTL_DESCR("MSL value to use for local connections"), 2457 NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL); 2458 sysctl_createv(clog, 0, &mslt_node, NULL, 2459 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2460 CTLTYPE_INT, "remote", 2461 SYSCTL_DESCR("MSL value to use for remote connections"), 2462 NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL); 2463 sysctl_createv(clog, 0, &mslt_node, NULL, 2464 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2465 CTLTYPE_INT, "remote_threshold", 2466 SYSCTL_DESCR("RTT estimate value to promote local to remote"), 2467 NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL); 2468 2469 /* vestigial TIME_WAIT tuning subtree */ 2470 2471 sysctl_createv(clog, 0, NULL, &vtw_node, 2472 CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw", 2473 SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"), 2474 NULL, 0, NULL, 0, 2475 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2476 sysctl_createv(clog, 0, &vtw_node, NULL, 2477 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2478 CTLTYPE_INT, "enable", 2479 SYSCTL_DESCR("Enable Vestigial TIME_WAIT"), 2480 sysctl_tcp_vtw_enable, 0, 2481 (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable, 2482 0, CTL_CREATE, CTL_EOL); 2483 sysctl_createv(clog, 0, &vtw_node, NULL, 2484 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2485 CTLTYPE_INT, "entries", 2486 SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"), 2487 NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL); 2488 } 2489 2490 void 2491 tcp_usrreq_init(void) 2492 { 2493 2494 #ifdef INET 2495 sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp"); 2496 #endif 2497 #ifdef INET6 2498 sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6"); 2499 #endif 2500 } 2501 2502 PR_WRAP_USRREQS(tcp) 2503 #define tcp_attach tcp_attach_wrapper 2504 #define tcp_detach tcp_detach_wrapper 2505 #define tcp_accept tcp_accept_wrapper 2506 #define tcp_bind tcp_bind_wrapper 2507 #define tcp_listen tcp_listen_wrapper 2508 #define tcp_connect tcp_connect_wrapper 2509 #define tcp_connect2 tcp_connect2_wrapper 2510 #define tcp_disconnect tcp_disconnect_wrapper 2511 #define tcp_shutdown tcp_shutdown_wrapper 2512 #define tcp_abort tcp_abort_wrapper 2513 #define tcp_ioctl tcp_ioctl_wrapper 2514 #define tcp_stat tcp_stat_wrapper 2515 #define tcp_peeraddr tcp_peeraddr_wrapper 2516 #define tcp_sockaddr tcp_sockaddr_wrapper 2517 #define tcp_rcvd tcp_rcvd_wrapper 2518 #define tcp_recvoob tcp_recvoob_wrapper 2519 #define tcp_send tcp_send_wrapper 2520 #define tcp_sendoob tcp_sendoob_wrapper 2521 #define tcp_purgeif tcp_purgeif_wrapper 2522 #define tcp_usrreq tcp_usrreq_wrapper 2523 2524 const struct pr_usrreqs tcp_usrreqs = { 2525 .pr_attach = tcp_attach, 2526 .pr_detach = tcp_detach, 2527 .pr_accept = tcp_accept, 2528 .pr_bind = tcp_bind, 2529 .pr_listen = tcp_listen, 2530 .pr_connect = tcp_connect, 2531 .pr_connect2 = tcp_connect2, 2532 .pr_disconnect = tcp_disconnect, 2533 .pr_shutdown = tcp_shutdown, 2534 .pr_abort = tcp_abort, 2535 .pr_ioctl = tcp_ioctl, 2536 .pr_stat = tcp_stat, 2537 .pr_peeraddr = tcp_peeraddr, 2538 .pr_sockaddr = tcp_sockaddr, 2539 .pr_rcvd = tcp_rcvd, 2540 .pr_recvoob = tcp_recvoob, 2541 .pr_send = tcp_send, 2542 .pr_sendoob = tcp_sendoob, 2543 .pr_purgeif = tcp_purgeif, 2544 .pr_generic = tcp_usrreq, 2545 }; 2546