1 /* $NetBSD: tcp_subr.c,v 1.271 2017/07/29 05:08:48 maxv Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.271 2017/07/29 05:08:48 maxv Exp $"); 95 96 #ifdef _KERNEL_OPT 97 #include "opt_inet.h" 98 #include "opt_ipsec.h" 99 #include "opt_inet_csum.h" 100 #include "opt_mbuftrace.h" 101 #endif 102 103 #include <sys/param.h> 104 #include <sys/atomic.h> 105 #include <sys/proc.h> 106 #include <sys/systm.h> 107 #include <sys/mbuf.h> 108 #include <sys/once.h> 109 #include <sys/socket.h> 110 #include <sys/socketvar.h> 111 #include <sys/protosw.h> 112 #include <sys/errno.h> 113 #include <sys/kernel.h> 114 #include <sys/pool.h> 115 #include <sys/md5.h> 116 #include <sys/cprng.h> 117 118 #include <net/route.h> 119 #include <net/if.h> 120 121 #include <netinet/in.h> 122 #include <netinet/in_systm.h> 123 #include <netinet/ip.h> 124 #include <netinet/in_pcb.h> 125 #include <netinet/ip_var.h> 126 #include <netinet/ip_icmp.h> 127 128 #ifdef INET6 129 #ifndef INET 130 #include <netinet/in.h> 131 #endif 132 #include <netinet/ip6.h> 133 #include <netinet6/in6_pcb.h> 134 #include <netinet6/ip6_var.h> 135 #include <netinet6/in6_var.h> 136 #include <netinet6/ip6protosw.h> 137 #include <netinet/icmp6.h> 138 #include <netinet6/nd6.h> 139 #endif 140 141 #include <netinet/tcp.h> 142 #include <netinet/tcp_fsm.h> 143 #include <netinet/tcp_seq.h> 144 #include <netinet/tcp_timer.h> 145 #include <netinet/tcp_var.h> 146 #include <netinet/tcp_vtw.h> 147 #include <netinet/tcp_private.h> 148 #include <netinet/tcp_congctl.h> 149 #include <netinet/tcpip.h> 150 151 #ifdef IPSEC 152 #include <netipsec/ipsec.h> 153 #include <netipsec/xform.h> 154 #ifdef INET6 155 #include <netipsec/ipsec6.h> 156 #endif 157 #include <netipsec/key.h> 158 #endif /* IPSEC*/ 159 160 161 struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ 162 u_int32_t tcp_now; /* slow ticks, for RFC 1323 timestamps */ 163 164 percpu_t *tcpstat_percpu; 165 166 /* patchable/settable parameters for tcp */ 167 int tcp_mssdflt = TCP_MSS; 168 int tcp_minmss = TCP_MINMSS; 169 int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 170 int tcp_do_rfc1323 = 1; /* window scaling / timestamps (obsolete) */ 171 int tcp_do_rfc1948 = 0; /* ISS by cryptographic hash */ 172 int tcp_do_sack = 1; /* selective acknowledgement */ 173 int tcp_do_win_scale = 1; /* RFC1323 window scaling */ 174 int tcp_do_timestamps = 1; /* RFC1323 timestamps */ 175 int tcp_ack_on_push = 0; /* set to enable immediate ACK-on-PUSH */ 176 int tcp_do_ecn = 0; /* Explicit Congestion Notification */ 177 #ifndef TCP_INIT_WIN 178 #define TCP_INIT_WIN 4 /* initial slow start window */ 179 #endif 180 #ifndef TCP_INIT_WIN_LOCAL 181 #define TCP_INIT_WIN_LOCAL 4 /* initial slow start window for local nets */ 182 #endif 183 /* 184 * Up to 5 we scale linearly, to reach 3 * 1460; then (iw) * 1460. 185 * This is to simulate current behavior for iw == 4 186 */ 187 int tcp_init_win_max[] = { 188 1 * 1460, 189 1 * 1460, 190 2 * 1460, 191 2 * 1460, 192 3 * 1460, 193 5 * 1460, 194 6 * 1460, 195 7 * 1460, 196 8 * 1460, 197 9 * 1460, 198 10 * 1460 199 }; 200 int tcp_init_win = TCP_INIT_WIN; 201 int tcp_init_win_local = TCP_INIT_WIN_LOCAL; 202 int tcp_mss_ifmtu = 0; 203 int tcp_rst_ppslim = 100; /* 100pps */ 204 int tcp_ackdrop_ppslim = 100; /* 100pps */ 205 int tcp_do_loopback_cksum = 0; 206 int tcp_do_abc = 1; /* RFC3465 Appropriate byte counting. */ 207 int tcp_abc_aggressive = 1; /* 1: L=2*SMSS 0: L=1*SMSS */ 208 int tcp_sack_tp_maxholes = 32; 209 int tcp_sack_globalmaxholes = 1024; 210 int tcp_sack_globalholes = 0; 211 int tcp_ecn_maxretries = 1; 212 int tcp_msl_enable = 1; /* enable TIME_WAIT truncation */ 213 int tcp_msl_loop = PR_SLOWHZ; /* MSL for loopback */ 214 int tcp_msl_local = 5 * PR_SLOWHZ; /* MSL for 'local' */ 215 int tcp_msl_remote = TCPTV_MSL; /* MSL otherwise */ 216 int tcp_msl_remote_threshold = TCPTV_SRTTDFLT; /* RTT threshold */ 217 int tcp_rttlocal = 0; /* Use RTT to decide who's 'local' */ 218 219 int tcp4_vtw_enable = 0; /* 1 to enable */ 220 int tcp6_vtw_enable = 0; /* 1 to enable */ 221 int tcp_vtw_was_enabled = 0; 222 int tcp_vtw_entries = 1 << 4; /* 16 vestigial TIME_WAIT entries */ 223 224 /* tcb hash */ 225 #ifndef TCBHASHSIZE 226 #define TCBHASHSIZE 128 227 #endif 228 int tcbhashsize = TCBHASHSIZE; 229 230 /* syn hash parameters */ 231 #define TCP_SYN_HASH_SIZE 293 232 #define TCP_SYN_BUCKET_SIZE 35 233 int tcp_syn_cache_size = TCP_SYN_HASH_SIZE; 234 int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE; 235 int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE; 236 struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE]; 237 238 int tcp_freeq(struct tcpcb *); 239 static int tcp_iss_secret_init(void); 240 241 #ifdef INET 242 static void tcp_mtudisc_callback(struct in_addr); 243 #endif 244 245 #ifdef INET6 246 void tcp6_mtudisc(struct in6pcb *, int); 247 #endif 248 249 static struct pool tcpcb_pool; 250 251 static int tcp_drainwanted; 252 253 #ifdef TCP_CSUM_COUNTERS 254 #include <sys/device.h> 255 256 #if defined(INET) 257 struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 258 NULL, "tcp", "hwcsum bad"); 259 struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 260 NULL, "tcp", "hwcsum ok"); 261 struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 262 NULL, "tcp", "hwcsum data"); 263 struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 264 NULL, "tcp", "swcsum"); 265 266 EVCNT_ATTACH_STATIC(tcp_hwcsum_bad); 267 EVCNT_ATTACH_STATIC(tcp_hwcsum_ok); 268 EVCNT_ATTACH_STATIC(tcp_hwcsum_data); 269 EVCNT_ATTACH_STATIC(tcp_swcsum); 270 #endif /* defined(INET) */ 271 272 #if defined(INET6) 273 struct evcnt tcp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 274 NULL, "tcp6", "hwcsum bad"); 275 struct evcnt tcp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 276 NULL, "tcp6", "hwcsum ok"); 277 struct evcnt tcp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 278 NULL, "tcp6", "hwcsum data"); 279 struct evcnt tcp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 280 NULL, "tcp6", "swcsum"); 281 282 EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad); 283 EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok); 284 EVCNT_ATTACH_STATIC(tcp6_hwcsum_data); 285 EVCNT_ATTACH_STATIC(tcp6_swcsum); 286 #endif /* defined(INET6) */ 287 #endif /* TCP_CSUM_COUNTERS */ 288 289 290 #ifdef TCP_OUTPUT_COUNTERS 291 #include <sys/device.h> 292 293 struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 294 NULL, "tcp", "output big header"); 295 struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 296 NULL, "tcp", "output predict hit"); 297 struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 298 NULL, "tcp", "output predict miss"); 299 struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 300 NULL, "tcp", "output copy small"); 301 struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 302 NULL, "tcp", "output copy big"); 303 struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 304 NULL, "tcp", "output reference big"); 305 306 EVCNT_ATTACH_STATIC(tcp_output_bigheader); 307 EVCNT_ATTACH_STATIC(tcp_output_predict_hit); 308 EVCNT_ATTACH_STATIC(tcp_output_predict_miss); 309 EVCNT_ATTACH_STATIC(tcp_output_copysmall); 310 EVCNT_ATTACH_STATIC(tcp_output_copybig); 311 EVCNT_ATTACH_STATIC(tcp_output_refbig); 312 313 #endif /* TCP_OUTPUT_COUNTERS */ 314 315 #ifdef TCP_REASS_COUNTERS 316 #include <sys/device.h> 317 318 struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 319 NULL, "tcp_reass", "calls"); 320 struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 321 &tcp_reass_, "tcp_reass", "insert into empty queue"); 322 struct evcnt tcp_reass_iteration[8] = { 323 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"), 324 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"), 325 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"), 326 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"), 327 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"), 328 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"), 329 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"), 330 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"), 331 }; 332 struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 333 &tcp_reass_, "tcp_reass", "prepend to first"); 334 struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 335 &tcp_reass_, "tcp_reass", "prepend"); 336 struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 337 &tcp_reass_, "tcp_reass", "insert"); 338 struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 339 &tcp_reass_, "tcp_reass", "insert at tail"); 340 struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 341 &tcp_reass_, "tcp_reass", "append"); 342 struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 343 &tcp_reass_, "tcp_reass", "append to tail fragment"); 344 struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 345 &tcp_reass_, "tcp_reass", "overlap at end"); 346 struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 347 &tcp_reass_, "tcp_reass", "overlap at start"); 348 struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 349 &tcp_reass_, "tcp_reass", "duplicate segment"); 350 struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 351 &tcp_reass_, "tcp_reass", "duplicate fragment"); 352 353 EVCNT_ATTACH_STATIC(tcp_reass_); 354 EVCNT_ATTACH_STATIC(tcp_reass_empty); 355 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 0); 356 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 1); 357 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 2); 358 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 3); 359 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 4); 360 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 5); 361 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 6); 362 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 7); 363 EVCNT_ATTACH_STATIC(tcp_reass_prependfirst); 364 EVCNT_ATTACH_STATIC(tcp_reass_prepend); 365 EVCNT_ATTACH_STATIC(tcp_reass_insert); 366 EVCNT_ATTACH_STATIC(tcp_reass_inserttail); 367 EVCNT_ATTACH_STATIC(tcp_reass_append); 368 EVCNT_ATTACH_STATIC(tcp_reass_appendtail); 369 EVCNT_ATTACH_STATIC(tcp_reass_overlaptail); 370 EVCNT_ATTACH_STATIC(tcp_reass_overlapfront); 371 EVCNT_ATTACH_STATIC(tcp_reass_segdup); 372 EVCNT_ATTACH_STATIC(tcp_reass_fragdup); 373 374 #endif /* TCP_REASS_COUNTERS */ 375 376 #ifdef MBUFTRACE 377 struct mowner tcp_mowner = MOWNER_INIT("tcp", ""); 378 struct mowner tcp_rx_mowner = MOWNER_INIT("tcp", "rx"); 379 struct mowner tcp_tx_mowner = MOWNER_INIT("tcp", "tx"); 380 struct mowner tcp_sock_mowner = MOWNER_INIT("tcp", "sock"); 381 struct mowner tcp_sock_rx_mowner = MOWNER_INIT("tcp", "sock rx"); 382 struct mowner tcp_sock_tx_mowner = MOWNER_INIT("tcp", "sock tx"); 383 #endif 384 385 callout_t tcp_slowtimo_ch; 386 387 static int 388 do_tcpinit(void) 389 { 390 391 in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize); 392 pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl", 393 NULL, IPL_SOFTNET); 394 395 tcp_usrreq_init(); 396 397 /* Initialize timer state. */ 398 tcp_timer_init(); 399 400 /* Initialize the compressed state engine. */ 401 syn_cache_init(); 402 403 /* Initialize the congestion control algorithms. */ 404 tcp_congctl_init(); 405 406 /* Initialize the TCPCB template. */ 407 tcp_tcpcb_template(); 408 409 /* Initialize reassembly queue */ 410 tcpipqent_init(); 411 412 /* SACK */ 413 tcp_sack_init(); 414 415 MOWNER_ATTACH(&tcp_tx_mowner); 416 MOWNER_ATTACH(&tcp_rx_mowner); 417 MOWNER_ATTACH(&tcp_reass_mowner); 418 MOWNER_ATTACH(&tcp_sock_mowner); 419 MOWNER_ATTACH(&tcp_sock_tx_mowner); 420 MOWNER_ATTACH(&tcp_sock_rx_mowner); 421 MOWNER_ATTACH(&tcp_mowner); 422 423 tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS); 424 425 vtw_earlyinit(); 426 427 callout_init(&tcp_slowtimo_ch, CALLOUT_MPSAFE); 428 callout_reset(&tcp_slowtimo_ch, 1, tcp_slowtimo, NULL); 429 430 return 0; 431 } 432 433 void 434 tcp_init_common(unsigned basehlen) 435 { 436 static ONCE_DECL(dotcpinit); 437 unsigned hlen = basehlen + sizeof(struct tcphdr); 438 unsigned oldhlen; 439 440 if (max_linkhdr + hlen > MHLEN) 441 panic("tcp_init"); 442 while ((oldhlen = max_protohdr) < hlen) 443 atomic_cas_uint(&max_protohdr, oldhlen, hlen); 444 445 RUN_ONCE(&dotcpinit, do_tcpinit); 446 } 447 448 /* 449 * Tcp initialization 450 */ 451 void 452 tcp_init(void) 453 { 454 455 icmp_mtudisc_callback_register(tcp_mtudisc_callback); 456 457 tcp_init_common(sizeof(struct ip)); 458 } 459 460 /* 461 * Create template to be used to send tcp packets on a connection. 462 * Call after host entry created, allocates an mbuf and fills 463 * in a skeletal tcp/ip header, minimizing the amount of work 464 * necessary when the connection is used. 465 */ 466 struct mbuf * 467 tcp_template(struct tcpcb *tp) 468 { 469 struct inpcb *inp = tp->t_inpcb; 470 #ifdef INET6 471 struct in6pcb *in6p = tp->t_in6pcb; 472 #endif 473 struct tcphdr *n; 474 struct mbuf *m; 475 int hlen; 476 477 switch (tp->t_family) { 478 case AF_INET: 479 hlen = sizeof(struct ip); 480 if (inp) 481 break; 482 #ifdef INET6 483 if (in6p) { 484 /* mapped addr case */ 485 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) 486 && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 487 break; 488 } 489 #endif 490 return NULL; /*EINVAL*/ 491 #ifdef INET6 492 case AF_INET6: 493 hlen = sizeof(struct ip6_hdr); 494 if (in6p) { 495 /* more sainty check? */ 496 break; 497 } 498 return NULL; /*EINVAL*/ 499 #endif 500 default: 501 hlen = 0; /*pacify gcc*/ 502 return NULL; /*EAFNOSUPPORT*/ 503 } 504 #ifdef DIAGNOSTIC 505 if (hlen + sizeof(struct tcphdr) > MCLBYTES) 506 panic("mclbytes too small for t_template"); 507 #endif 508 m = tp->t_template; 509 if (m && m->m_len == hlen + sizeof(struct tcphdr)) 510 ; 511 else { 512 if (m) 513 m_freem(m); 514 m = tp->t_template = NULL; 515 MGETHDR(m, M_DONTWAIT, MT_HEADER); 516 if (m && hlen + sizeof(struct tcphdr) > MHLEN) { 517 MCLGET(m, M_DONTWAIT); 518 if ((m->m_flags & M_EXT) == 0) { 519 m_free(m); 520 m = NULL; 521 } 522 } 523 if (m == NULL) 524 return NULL; 525 MCLAIM(m, &tcp_mowner); 526 m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr); 527 } 528 529 memset(mtod(m, void *), 0, m->m_len); 530 531 n = (struct tcphdr *)(mtod(m, char *) + hlen); 532 533 switch (tp->t_family) { 534 case AF_INET: 535 { 536 struct ipovly *ipov; 537 mtod(m, struct ip *)->ip_v = 4; 538 mtod(m, struct ip *)->ip_hl = hlen >> 2; 539 ipov = mtod(m, struct ipovly *); 540 ipov->ih_pr = IPPROTO_TCP; 541 ipov->ih_len = htons(sizeof(struct tcphdr)); 542 if (inp) { 543 ipov->ih_src = inp->inp_laddr; 544 ipov->ih_dst = inp->inp_faddr; 545 } 546 #ifdef INET6 547 else if (in6p) { 548 /* mapped addr case */ 549 bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src, 550 sizeof(ipov->ih_src)); 551 bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst, 552 sizeof(ipov->ih_dst)); 553 } 554 #endif 555 /* 556 * Compute the pseudo-header portion of the checksum 557 * now. We incrementally add in the TCP option and 558 * payload lengths later, and then compute the TCP 559 * checksum right before the packet is sent off onto 560 * the wire. 561 */ 562 n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr, 563 ipov->ih_dst.s_addr, 564 htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 565 break; 566 } 567 #ifdef INET6 568 case AF_INET6: 569 { 570 struct ip6_hdr *ip6; 571 mtod(m, struct ip *)->ip_v = 6; 572 ip6 = mtod(m, struct ip6_hdr *); 573 ip6->ip6_nxt = IPPROTO_TCP; 574 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 575 ip6->ip6_src = in6p->in6p_laddr; 576 ip6->ip6_dst = in6p->in6p_faddr; 577 ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK; 578 if (ip6_auto_flowlabel) { 579 ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK; 580 ip6->ip6_flow |= 581 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 582 } 583 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 584 ip6->ip6_vfc |= IPV6_VERSION; 585 586 /* 587 * Compute the pseudo-header portion of the checksum 588 * now. We incrementally add in the TCP option and 589 * payload lengths later, and then compute the TCP 590 * checksum right before the packet is sent off onto 591 * the wire. 592 */ 593 n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr, 594 &in6p->in6p_faddr, htonl(sizeof(struct tcphdr)), 595 htonl(IPPROTO_TCP)); 596 break; 597 } 598 #endif 599 } 600 if (inp) { 601 n->th_sport = inp->inp_lport; 602 n->th_dport = inp->inp_fport; 603 } 604 #ifdef INET6 605 else if (in6p) { 606 n->th_sport = in6p->in6p_lport; 607 n->th_dport = in6p->in6p_fport; 608 } 609 #endif 610 n->th_seq = 0; 611 n->th_ack = 0; 612 n->th_x2 = 0; 613 n->th_off = 5; 614 n->th_flags = 0; 615 n->th_win = 0; 616 n->th_urp = 0; 617 return (m); 618 } 619 620 /* 621 * Send a single message to the TCP at address specified by 622 * the given TCP/IP header. If m == 0, then we make a copy 623 * of the tcpiphdr at ti and send directly to the addressed host. 624 * This is used to force keep alive messages out using the TCP 625 * template for a connection tp->t_template. If flags are given 626 * then we send a message back to the TCP which originated the 627 * segment ti, and discard the mbuf containing it and any other 628 * attached mbufs. 629 * 630 * In any case the ack and sequence number of the transmitted 631 * segment are as specified by the parameters. 632 */ 633 int 634 tcp_respond(struct tcpcb *tp, struct mbuf *mtemplate, struct mbuf *m, 635 struct tcphdr *th0, tcp_seq ack, tcp_seq seq, int flags) 636 { 637 struct route *ro; 638 int error, tlen, win = 0; 639 int hlen; 640 struct ip *ip; 641 #ifdef INET6 642 struct ip6_hdr *ip6; 643 #endif 644 int family; /* family on packet, not inpcb/in6pcb! */ 645 struct tcphdr *th; 646 647 if (tp != NULL && (flags & TH_RST) == 0) { 648 #ifdef DIAGNOSTIC 649 if (tp->t_inpcb && tp->t_in6pcb) 650 panic("tcp_respond: both t_inpcb and t_in6pcb are set"); 651 #endif 652 #ifdef INET 653 if (tp->t_inpcb) 654 win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); 655 #endif 656 #ifdef INET6 657 if (tp->t_in6pcb) 658 win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv); 659 #endif 660 } 661 662 th = NULL; /* Quell uninitialized warning */ 663 ip = NULL; 664 #ifdef INET6 665 ip6 = NULL; 666 #endif 667 if (m == 0) { 668 if (!mtemplate) 669 return EINVAL; 670 671 /* get family information from template */ 672 switch (mtod(mtemplate, struct ip *)->ip_v) { 673 case 4: 674 family = AF_INET; 675 hlen = sizeof(struct ip); 676 break; 677 #ifdef INET6 678 case 6: 679 family = AF_INET6; 680 hlen = sizeof(struct ip6_hdr); 681 break; 682 #endif 683 default: 684 return EAFNOSUPPORT; 685 } 686 687 MGETHDR(m, M_DONTWAIT, MT_HEADER); 688 if (m) { 689 MCLAIM(m, &tcp_tx_mowner); 690 MCLGET(m, M_DONTWAIT); 691 if ((m->m_flags & M_EXT) == 0) { 692 m_free(m); 693 m = NULL; 694 } 695 } 696 if (m == NULL) 697 return (ENOBUFS); 698 699 tlen = 0; 700 701 m->m_data += max_linkhdr; 702 bcopy(mtod(mtemplate, void *), mtod(m, void *), 703 mtemplate->m_len); 704 switch (family) { 705 case AF_INET: 706 ip = mtod(m, struct ip *); 707 th = (struct tcphdr *)(ip + 1); 708 break; 709 #ifdef INET6 710 case AF_INET6: 711 ip6 = mtod(m, struct ip6_hdr *); 712 th = (struct tcphdr *)(ip6 + 1); 713 break; 714 #endif 715 #if 0 716 default: 717 /* noone will visit here */ 718 m_freem(m); 719 return EAFNOSUPPORT; 720 #endif 721 } 722 flags = TH_ACK; 723 } else { 724 725 if ((m->m_flags & M_PKTHDR) == 0) { 726 #if 0 727 printf("non PKTHDR to tcp_respond\n"); 728 #endif 729 m_freem(m); 730 return EINVAL; 731 } 732 #ifdef DIAGNOSTIC 733 if (!th0) 734 panic("th0 == NULL in tcp_respond"); 735 #endif 736 737 /* get family information from m */ 738 switch (mtod(m, struct ip *)->ip_v) { 739 case 4: 740 family = AF_INET; 741 hlen = sizeof(struct ip); 742 ip = mtod(m, struct ip *); 743 break; 744 #ifdef INET6 745 case 6: 746 family = AF_INET6; 747 hlen = sizeof(struct ip6_hdr); 748 ip6 = mtod(m, struct ip6_hdr *); 749 break; 750 #endif 751 default: 752 m_freem(m); 753 return EAFNOSUPPORT; 754 } 755 /* clear h/w csum flags inherited from rx packet */ 756 m->m_pkthdr.csum_flags = 0; 757 758 if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2)) 759 tlen = sizeof(*th0); 760 else 761 tlen = th0->th_off << 2; 762 763 if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 && 764 mtod(m, char *) + hlen == (char *)th0) { 765 m->m_len = hlen + tlen; 766 m_freem(m->m_next); 767 m->m_next = NULL; 768 } else { 769 struct mbuf *n; 770 771 #ifdef DIAGNOSTIC 772 if (max_linkhdr + hlen + tlen > MCLBYTES) { 773 m_freem(m); 774 return EMSGSIZE; 775 } 776 #endif 777 MGETHDR(n, M_DONTWAIT, MT_HEADER); 778 if (n && max_linkhdr + hlen + tlen > MHLEN) { 779 MCLGET(n, M_DONTWAIT); 780 if ((n->m_flags & M_EXT) == 0) { 781 m_freem(n); 782 n = NULL; 783 } 784 } 785 if (!n) { 786 m_freem(m); 787 return ENOBUFS; 788 } 789 790 MCLAIM(n, &tcp_tx_mowner); 791 n->m_data += max_linkhdr; 792 n->m_len = hlen + tlen; 793 m_copyback(n, 0, hlen, mtod(m, void *)); 794 m_copyback(n, hlen, tlen, (void *)th0); 795 796 m_freem(m); 797 m = n; 798 n = NULL; 799 } 800 801 #define xchg(a,b,type) { type t; t=a; a=b; b=t; } 802 switch (family) { 803 case AF_INET: 804 ip = mtod(m, struct ip *); 805 th = (struct tcphdr *)(ip + 1); 806 ip->ip_p = IPPROTO_TCP; 807 xchg(ip->ip_dst, ip->ip_src, struct in_addr); 808 ip->ip_p = IPPROTO_TCP; 809 break; 810 #ifdef INET6 811 case AF_INET6: 812 ip6 = mtod(m, struct ip6_hdr *); 813 th = (struct tcphdr *)(ip6 + 1); 814 ip6->ip6_nxt = IPPROTO_TCP; 815 xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); 816 ip6->ip6_nxt = IPPROTO_TCP; 817 break; 818 #endif 819 #if 0 820 default: 821 /* noone will visit here */ 822 m_freem(m); 823 return EAFNOSUPPORT; 824 #endif 825 } 826 xchg(th->th_dport, th->th_sport, u_int16_t); 827 #undef xchg 828 tlen = 0; /*be friendly with the following code*/ 829 } 830 th->th_seq = htonl(seq); 831 th->th_ack = htonl(ack); 832 th->th_x2 = 0; 833 if ((flags & TH_SYN) == 0) { 834 if (tp) 835 win >>= tp->rcv_scale; 836 if (win > TCP_MAXWIN) 837 win = TCP_MAXWIN; 838 th->th_win = htons((u_int16_t)win); 839 th->th_off = sizeof (struct tcphdr) >> 2; 840 tlen += sizeof(*th); 841 } else 842 tlen += th->th_off << 2; 843 m->m_len = hlen + tlen; 844 m->m_pkthdr.len = hlen + tlen; 845 m_reset_rcvif(m); 846 th->th_flags = flags; 847 th->th_urp = 0; 848 849 switch (family) { 850 #ifdef INET 851 case AF_INET: 852 { 853 struct ipovly *ipov = (struct ipovly *)ip; 854 memset(ipov->ih_x1, 0, sizeof ipov->ih_x1); 855 ipov->ih_len = htons((u_int16_t)tlen); 856 857 th->th_sum = 0; 858 th->th_sum = in_cksum(m, hlen + tlen); 859 ip->ip_len = htons(hlen + tlen); 860 ip->ip_ttl = ip_defttl; 861 break; 862 } 863 #endif 864 #ifdef INET6 865 case AF_INET6: 866 { 867 th->th_sum = 0; 868 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), 869 tlen); 870 ip6->ip6_plen = htons(tlen); 871 if (tp && tp->t_in6pcb) 872 ip6->ip6_hlim = in6_selecthlim_rt(tp->t_in6pcb); 873 else 874 ip6->ip6_hlim = ip6_defhlim; 875 ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK; 876 if (ip6_auto_flowlabel) { 877 ip6->ip6_flow |= 878 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 879 } 880 break; 881 } 882 #endif 883 } 884 885 if (tp != NULL && tp->t_inpcb != NULL) { 886 ro = &tp->t_inpcb->inp_route; 887 #ifdef DIAGNOSTIC 888 if (family != AF_INET) 889 panic("tcp_respond: address family mismatch"); 890 if (!in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr)) { 891 panic("tcp_respond: ip_dst %x != inp_faddr %x", 892 ntohl(ip->ip_dst.s_addr), 893 ntohl(tp->t_inpcb->inp_faddr.s_addr)); 894 } 895 #endif 896 } 897 #ifdef INET6 898 else if (tp != NULL && tp->t_in6pcb != NULL) { 899 ro = (struct route *)&tp->t_in6pcb->in6p_route; 900 #ifdef DIAGNOSTIC 901 if (family == AF_INET) { 902 if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr)) 903 panic("tcp_respond: not mapped addr"); 904 if (memcmp(&ip->ip_dst, 905 &tp->t_in6pcb->in6p_faddr.s6_addr32[3], 906 sizeof(ip->ip_dst)) != 0) { 907 panic("tcp_respond: ip_dst != in6p_faddr"); 908 } 909 } else if (family == AF_INET6) { 910 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, 911 &tp->t_in6pcb->in6p_faddr)) 912 panic("tcp_respond: ip6_dst != in6p_faddr"); 913 } else 914 panic("tcp_respond: address family mismatch"); 915 #endif 916 } 917 #endif 918 else 919 ro = NULL; 920 921 switch (family) { 922 #ifdef INET 923 case AF_INET: 924 error = ip_output(m, NULL, ro, 925 (tp && tp->t_mtudisc ? IP_MTUDISC : 0), NULL, 926 tp ? tp->t_inpcb : NULL); 927 break; 928 #endif 929 #ifdef INET6 930 case AF_INET6: 931 error = ip6_output(m, NULL, ro, 0, NULL, 932 tp ? tp->t_in6pcb : NULL, NULL); 933 break; 934 #endif 935 default: 936 error = EAFNOSUPPORT; 937 break; 938 } 939 940 return (error); 941 } 942 943 /* 944 * Template TCPCB. Rather than zeroing a new TCPCB and initializing 945 * a bunch of members individually, we maintain this template for the 946 * static and mostly-static components of the TCPCB, and copy it into 947 * the new TCPCB instead. 948 */ 949 static struct tcpcb tcpcb_template = { 950 .t_srtt = TCPTV_SRTTBASE, 951 .t_rttmin = TCPTV_MIN, 952 953 .snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT, 954 .snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT, 955 .snd_numholes = 0, 956 .snd_cubic_wmax = 0, 957 .snd_cubic_wmax_last = 0, 958 .snd_cubic_ctime = 0, 959 960 .t_partialacks = -1, 961 .t_bytes_acked = 0, 962 .t_sndrexmitpack = 0, 963 .t_rcvoopack = 0, 964 .t_sndzerowin = 0, 965 }; 966 967 /* 968 * Updates the TCPCB template whenever a parameter that would affect 969 * the template is changed. 970 */ 971 void 972 tcp_tcpcb_template(void) 973 { 974 struct tcpcb *tp = &tcpcb_template; 975 int flags; 976 977 tp->t_peermss = tcp_mssdflt; 978 tp->t_ourmss = tcp_mssdflt; 979 tp->t_segsz = tcp_mssdflt; 980 981 flags = 0; 982 if (tcp_do_rfc1323 && tcp_do_win_scale) 983 flags |= TF_REQ_SCALE; 984 if (tcp_do_rfc1323 && tcp_do_timestamps) 985 flags |= TF_REQ_TSTMP; 986 tp->t_flags = flags; 987 988 /* 989 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 990 * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives 991 * reasonable initial retransmit time. 992 */ 993 tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1); 994 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), 995 TCPTV_MIN, TCPTV_REXMTMAX); 996 997 /* Keep Alive */ 998 tp->t_keepinit = tcp_keepinit; 999 tp->t_keepidle = tcp_keepidle; 1000 tp->t_keepintvl = tcp_keepintvl; 1001 tp->t_keepcnt = tcp_keepcnt; 1002 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 1003 1004 /* MSL */ 1005 tp->t_msl = TCPTV_MSL; 1006 } 1007 1008 /* 1009 * Create a new TCP control block, making an 1010 * empty reassembly queue and hooking it to the argument 1011 * protocol control block. 1012 */ 1013 /* family selects inpcb, or in6pcb */ 1014 struct tcpcb * 1015 tcp_newtcpcb(int family, void *aux) 1016 { 1017 struct tcpcb *tp; 1018 int i; 1019 1020 /* XXX Consider using a pool_cache for speed. */ 1021 tp = pool_get(&tcpcb_pool, PR_NOWAIT); /* splsoftnet via tcp_usrreq */ 1022 if (tp == NULL) 1023 return (NULL); 1024 memcpy(tp, &tcpcb_template, sizeof(*tp)); 1025 TAILQ_INIT(&tp->segq); 1026 TAILQ_INIT(&tp->timeq); 1027 tp->t_family = family; /* may be overridden later on */ 1028 TAILQ_INIT(&tp->snd_holes); 1029 LIST_INIT(&tp->t_sc); /* XXX can template this */ 1030 1031 /* Don't sweat this loop; hopefully the compiler will unroll it. */ 1032 for (i = 0; i < TCPT_NTIMERS; i++) { 1033 callout_init(&tp->t_timer[i], CALLOUT_MPSAFE); 1034 TCP_TIMER_INIT(tp, i); 1035 } 1036 callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE); 1037 1038 switch (family) { 1039 case AF_INET: 1040 { 1041 struct inpcb *inp = (struct inpcb *)aux; 1042 1043 inp->inp_ip.ip_ttl = ip_defttl; 1044 inp->inp_ppcb = (void *)tp; 1045 1046 tp->t_inpcb = inp; 1047 tp->t_mtudisc = ip_mtudisc; 1048 break; 1049 } 1050 #ifdef INET6 1051 case AF_INET6: 1052 { 1053 struct in6pcb *in6p = (struct in6pcb *)aux; 1054 1055 in6p->in6p_ip6.ip6_hlim = in6_selecthlim_rt(in6p); 1056 in6p->in6p_ppcb = (void *)tp; 1057 1058 tp->t_in6pcb = in6p; 1059 /* for IPv6, always try to run path MTU discovery */ 1060 tp->t_mtudisc = 1; 1061 break; 1062 } 1063 #endif /* INET6 */ 1064 default: 1065 for (i = 0; i < TCPT_NTIMERS; i++) 1066 callout_destroy(&tp->t_timer[i]); 1067 callout_destroy(&tp->t_delack_ch); 1068 pool_put(&tcpcb_pool, tp); /* splsoftnet via tcp_usrreq */ 1069 return (NULL); 1070 } 1071 1072 /* 1073 * Initialize our timebase. When we send timestamps, we take 1074 * the delta from tcp_now -- this means each connection always 1075 * gets a timebase of 1, which makes it, among other things, 1076 * more difficult to determine how long a system has been up, 1077 * and thus how many TCP sequence increments have occurred. 1078 * 1079 * We start with 1, because 0 doesn't work with linux, which 1080 * considers timestamp 0 in a SYN packet as a bug and disables 1081 * timestamps. 1082 */ 1083 tp->ts_timebase = tcp_now - 1; 1084 1085 tcp_congctl_select(tp, tcp_congctl_global_name); 1086 1087 return (tp); 1088 } 1089 1090 /* 1091 * Drop a TCP connection, reporting 1092 * the specified error. If connection is synchronized, 1093 * then send a RST to peer. 1094 */ 1095 struct tcpcb * 1096 tcp_drop(struct tcpcb *tp, int errno) 1097 { 1098 struct socket *so = NULL; 1099 1100 #ifdef DIAGNOSTIC 1101 if (tp->t_inpcb && tp->t_in6pcb) 1102 panic("tcp_drop: both t_inpcb and t_in6pcb are set"); 1103 #endif 1104 #ifdef INET 1105 if (tp->t_inpcb) 1106 so = tp->t_inpcb->inp_socket; 1107 #endif 1108 #ifdef INET6 1109 if (tp->t_in6pcb) 1110 so = tp->t_in6pcb->in6p_socket; 1111 #endif 1112 if (!so) 1113 return NULL; 1114 1115 if (TCPS_HAVERCVDSYN(tp->t_state)) { 1116 tp->t_state = TCPS_CLOSED; 1117 (void) tcp_output(tp); 1118 TCP_STATINC(TCP_STAT_DROPS); 1119 } else 1120 TCP_STATINC(TCP_STAT_CONNDROPS); 1121 if (errno == ETIMEDOUT && tp->t_softerror) 1122 errno = tp->t_softerror; 1123 so->so_error = errno; 1124 return (tcp_close(tp)); 1125 } 1126 1127 /* 1128 * Close a TCP control block: 1129 * discard all space held by the tcp 1130 * discard internet protocol block 1131 * wake up any sleepers 1132 */ 1133 struct tcpcb * 1134 tcp_close(struct tcpcb *tp) 1135 { 1136 struct inpcb *inp; 1137 #ifdef INET6 1138 struct in6pcb *in6p; 1139 #endif 1140 struct socket *so; 1141 #ifdef RTV_RTT 1142 struct rtentry *rt = NULL; 1143 #endif 1144 struct route *ro; 1145 int j; 1146 1147 inp = tp->t_inpcb; 1148 #ifdef INET6 1149 in6p = tp->t_in6pcb; 1150 #endif 1151 so = NULL; 1152 ro = NULL; 1153 if (inp) { 1154 so = inp->inp_socket; 1155 ro = &inp->inp_route; 1156 } 1157 #ifdef INET6 1158 else if (in6p) { 1159 so = in6p->in6p_socket; 1160 ro = (struct route *)&in6p->in6p_route; 1161 } 1162 #endif 1163 1164 #ifdef RTV_RTT 1165 /* 1166 * If we sent enough data to get some meaningful characteristics, 1167 * save them in the routing entry. 'Enough' is arbitrarily 1168 * defined as the sendpipesize (default 4K) * 16. This would 1169 * give us 16 rtt samples assuming we only get one sample per 1170 * window (the usual case on a long haul net). 16 samples is 1171 * enough for the srtt filter to converge to within 5% of the correct 1172 * value; fewer samples and we could save a very bogus rtt. 1173 * 1174 * Don't update the default route's characteristics and don't 1175 * update anything that the user "locked". 1176 */ 1177 if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && 1178 ro && (rt = rtcache_validate(ro)) != NULL && 1179 !in_nullhost(satocsin(rt_getkey(rt))->sin_addr)) { 1180 u_long i = 0; 1181 1182 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { 1183 i = tp->t_srtt * 1184 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); 1185 if (rt->rt_rmx.rmx_rtt && i) 1186 /* 1187 * filter this update to half the old & half 1188 * the new values, converting scale. 1189 * See route.h and tcp_var.h for a 1190 * description of the scaling constants. 1191 */ 1192 rt->rt_rmx.rmx_rtt = 1193 (rt->rt_rmx.rmx_rtt + i) / 2; 1194 else 1195 rt->rt_rmx.rmx_rtt = i; 1196 } 1197 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { 1198 i = tp->t_rttvar * 1199 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2)); 1200 if (rt->rt_rmx.rmx_rttvar && i) 1201 rt->rt_rmx.rmx_rttvar = 1202 (rt->rt_rmx.rmx_rttvar + i) / 2; 1203 else 1204 rt->rt_rmx.rmx_rttvar = i; 1205 } 1206 /* 1207 * update the pipelimit (ssthresh) if it has been updated 1208 * already or if a pipesize was specified & the threshhold 1209 * got below half the pipesize. I.e., wait for bad news 1210 * before we start updating, then update on both good 1211 * and bad news. 1212 */ 1213 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && 1214 (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) || 1215 i < (rt->rt_rmx.rmx_sendpipe / 2)) { 1216 /* 1217 * convert the limit from user data bytes to 1218 * packets then to packet data bytes. 1219 */ 1220 i = (i + tp->t_segsz / 2) / tp->t_segsz; 1221 if (i < 2) 1222 i = 2; 1223 i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr)); 1224 if (rt->rt_rmx.rmx_ssthresh) 1225 rt->rt_rmx.rmx_ssthresh = 1226 (rt->rt_rmx.rmx_ssthresh + i) / 2; 1227 else 1228 rt->rt_rmx.rmx_ssthresh = i; 1229 } 1230 } 1231 rtcache_unref(rt, ro); 1232 #endif /* RTV_RTT */ 1233 /* free the reassembly queue, if any */ 1234 TCP_REASS_LOCK(tp); 1235 (void) tcp_freeq(tp); 1236 TCP_REASS_UNLOCK(tp); 1237 1238 /* free the SACK holes list. */ 1239 tcp_free_sackholes(tp); 1240 tcp_congctl_release(tp); 1241 syn_cache_cleanup(tp); 1242 1243 if (tp->t_template) { 1244 m_free(tp->t_template); 1245 tp->t_template = NULL; 1246 } 1247 1248 /* 1249 * Detaching the pcb will unlock the socket/tcpcb, and stopping 1250 * the timers can also drop the lock. We need to prevent access 1251 * to the tcpcb as it's half torn down. Flag the pcb as dead 1252 * (prevents access by timers) and only then detach it. 1253 */ 1254 tp->t_flags |= TF_DEAD; 1255 if (inp) { 1256 inp->inp_ppcb = 0; 1257 soisdisconnected(so); 1258 in_pcbdetach(inp); 1259 } 1260 #ifdef INET6 1261 else if (in6p) { 1262 in6p->in6p_ppcb = 0; 1263 soisdisconnected(so); 1264 in6_pcbdetach(in6p); 1265 } 1266 #endif 1267 /* 1268 * pcb is no longer visble elsewhere, so we can safely release 1269 * the lock in callout_halt() if needed. 1270 */ 1271 TCP_STATINC(TCP_STAT_CLOSED); 1272 for (j = 0; j < TCPT_NTIMERS; j++) { 1273 callout_halt(&tp->t_timer[j], softnet_lock); 1274 callout_destroy(&tp->t_timer[j]); 1275 } 1276 callout_halt(&tp->t_delack_ch, softnet_lock); 1277 callout_destroy(&tp->t_delack_ch); 1278 pool_put(&tcpcb_pool, tp); 1279 1280 return NULL; 1281 } 1282 1283 int 1284 tcp_freeq(struct tcpcb *tp) 1285 { 1286 struct ipqent *qe; 1287 int rv = 0; 1288 #ifdef TCPREASS_DEBUG 1289 int i = 0; 1290 #endif 1291 1292 TCP_REASS_LOCK_CHECK(tp); 1293 1294 while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) { 1295 #ifdef TCPREASS_DEBUG 1296 printf("tcp_freeq[%p,%d]: %u:%u(%u) 0x%02x\n", 1297 tp, i++, qe->ipqe_seq, qe->ipqe_seq + qe->ipqe_len, 1298 qe->ipqe_len, qe->ipqe_flags & (TH_SYN|TH_FIN|TH_RST)); 1299 #endif 1300 TAILQ_REMOVE(&tp->segq, qe, ipqe_q); 1301 TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq); 1302 m_freem(qe->ipqe_m); 1303 tcpipqent_free(qe); 1304 rv = 1; 1305 } 1306 tp->t_segqlen = 0; 1307 KASSERT(TAILQ_EMPTY(&tp->timeq)); 1308 return (rv); 1309 } 1310 1311 void 1312 tcp_fasttimo(void) 1313 { 1314 if (tcp_drainwanted) { 1315 tcp_drain(); 1316 tcp_drainwanted = 0; 1317 } 1318 } 1319 1320 void 1321 tcp_drainstub(void) 1322 { 1323 tcp_drainwanted = 1; 1324 } 1325 1326 /* 1327 * Protocol drain routine. Called when memory is in short supply. 1328 * Called from pr_fasttimo thus a callout context. 1329 */ 1330 void 1331 tcp_drain(void) 1332 { 1333 struct inpcb_hdr *inph; 1334 struct tcpcb *tp; 1335 1336 mutex_enter(softnet_lock); 1337 KERNEL_LOCK(1, NULL); 1338 1339 /* 1340 * Free the sequence queue of all TCP connections. 1341 */ 1342 TAILQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) { 1343 switch (inph->inph_af) { 1344 case AF_INET: 1345 tp = intotcpcb((struct inpcb *)inph); 1346 break; 1347 #ifdef INET6 1348 case AF_INET6: 1349 tp = in6totcpcb((struct in6pcb *)inph); 1350 break; 1351 #endif 1352 default: 1353 tp = NULL; 1354 break; 1355 } 1356 if (tp != NULL) { 1357 /* 1358 * We may be called from a device's interrupt 1359 * context. If the tcpcb is already busy, 1360 * just bail out now. 1361 */ 1362 if (tcp_reass_lock_try(tp) == 0) 1363 continue; 1364 if (tcp_freeq(tp)) 1365 TCP_STATINC(TCP_STAT_CONNSDRAINED); 1366 TCP_REASS_UNLOCK(tp); 1367 } 1368 } 1369 1370 KERNEL_UNLOCK_ONE(NULL); 1371 mutex_exit(softnet_lock); 1372 } 1373 1374 /* 1375 * Notify a tcp user of an asynchronous error; 1376 * store error as soft error, but wake up user 1377 * (for now, won't do anything until can select for soft error). 1378 */ 1379 void 1380 tcp_notify(struct inpcb *inp, int error) 1381 { 1382 struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; 1383 struct socket *so = inp->inp_socket; 1384 1385 /* 1386 * Ignore some errors if we are hooked up. 1387 * If connection hasn't completed, has retransmitted several times, 1388 * and receives a second error, give up now. This is better 1389 * than waiting a long time to establish a connection that 1390 * can never complete. 1391 */ 1392 if (tp->t_state == TCPS_ESTABLISHED && 1393 (error == EHOSTUNREACH || error == ENETUNREACH || 1394 error == EHOSTDOWN)) { 1395 return; 1396 } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && 1397 tp->t_rxtshift > 3 && tp->t_softerror) 1398 so->so_error = error; 1399 else 1400 tp->t_softerror = error; 1401 cv_broadcast(&so->so_cv); 1402 sorwakeup(so); 1403 sowwakeup(so); 1404 } 1405 1406 #ifdef INET6 1407 void 1408 tcp6_notify(struct in6pcb *in6p, int error) 1409 { 1410 struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb; 1411 struct socket *so = in6p->in6p_socket; 1412 1413 /* 1414 * Ignore some errors if we are hooked up. 1415 * If connection hasn't completed, has retransmitted several times, 1416 * and receives a second error, give up now. This is better 1417 * than waiting a long time to establish a connection that 1418 * can never complete. 1419 */ 1420 if (tp->t_state == TCPS_ESTABLISHED && 1421 (error == EHOSTUNREACH || error == ENETUNREACH || 1422 error == EHOSTDOWN)) { 1423 return; 1424 } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && 1425 tp->t_rxtshift > 3 && tp->t_softerror) 1426 so->so_error = error; 1427 else 1428 tp->t_softerror = error; 1429 cv_broadcast(&so->so_cv); 1430 sorwakeup(so); 1431 sowwakeup(so); 1432 } 1433 #endif 1434 1435 #ifdef INET6 1436 void * 1437 tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d) 1438 { 1439 struct tcphdr th; 1440 void (*notify)(struct in6pcb *, int) = tcp6_notify; 1441 int nmatch; 1442 struct ip6_hdr *ip6; 1443 const struct sockaddr_in6 *sa6_src = NULL; 1444 const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa; 1445 struct mbuf *m; 1446 int off; 1447 1448 if (sa->sa_family != AF_INET6 || 1449 sa->sa_len != sizeof(struct sockaddr_in6)) 1450 return NULL; 1451 if ((unsigned)cmd >= PRC_NCMDS) 1452 return NULL; 1453 else if (cmd == PRC_QUENCH) { 1454 /* 1455 * Don't honor ICMP Source Quench messages meant for 1456 * TCP connections. 1457 */ 1458 return NULL; 1459 } else if (PRC_IS_REDIRECT(cmd)) 1460 notify = in6_rtchange, d = NULL; 1461 else if (cmd == PRC_MSGSIZE) 1462 ; /* special code is present, see below */ 1463 else if (cmd == PRC_HOSTDEAD) 1464 d = NULL; 1465 else if (inet6ctlerrmap[cmd] == 0) 1466 return NULL; 1467 1468 /* if the parameter is from icmp6, decode it. */ 1469 if (d != NULL) { 1470 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; 1471 m = ip6cp->ip6c_m; 1472 ip6 = ip6cp->ip6c_ip6; 1473 off = ip6cp->ip6c_off; 1474 sa6_src = ip6cp->ip6c_src; 1475 } else { 1476 m = NULL; 1477 ip6 = NULL; 1478 sa6_src = &sa6_any; 1479 off = 0; 1480 } 1481 1482 if (ip6) { 1483 /* 1484 * XXX: We assume that when ip6 is non NULL, 1485 * M and OFF are valid. 1486 */ 1487 1488 /* check if we can safely examine src and dst ports */ 1489 if (m->m_pkthdr.len < off + sizeof(th)) { 1490 if (cmd == PRC_MSGSIZE) 1491 icmp6_mtudisc_update((struct ip6ctlparam *)d, 0); 1492 return NULL; 1493 } 1494 1495 memset(&th, 0, sizeof(th)); 1496 m_copydata(m, off, sizeof(th), (void *)&th); 1497 1498 if (cmd == PRC_MSGSIZE) { 1499 int valid = 0; 1500 1501 /* 1502 * Check to see if we have a valid TCP connection 1503 * corresponding to the address in the ICMPv6 message 1504 * payload. 1505 */ 1506 if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr, 1507 th.th_dport, 1508 (const struct in6_addr *)&sa6_src->sin6_addr, 1509 th.th_sport, 0, 0)) 1510 valid++; 1511 1512 /* 1513 * Depending on the value of "valid" and routing table 1514 * size (mtudisc_{hi,lo}wat), we will: 1515 * - recalcurate the new MTU and create the 1516 * corresponding routing entry, or 1517 * - ignore the MTU change notification. 1518 */ 1519 icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); 1520 1521 /* 1522 * no need to call in6_pcbnotify, it should have been 1523 * called via callback if necessary 1524 */ 1525 return NULL; 1526 } 1527 1528 nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport, 1529 (const struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify); 1530 if (nmatch == 0 && syn_cache_count && 1531 (inet6ctlerrmap[cmd] == EHOSTUNREACH || 1532 inet6ctlerrmap[cmd] == ENETUNREACH || 1533 inet6ctlerrmap[cmd] == EHOSTDOWN)) 1534 syn_cache_unreach((const struct sockaddr *)sa6_src, 1535 sa, &th); 1536 } else { 1537 (void) in6_pcbnotify(&tcbtable, sa, 0, 1538 (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); 1539 } 1540 1541 return NULL; 1542 } 1543 #endif 1544 1545 #ifdef INET 1546 /* assumes that ip header and tcp header are contiguous on mbuf */ 1547 void * 1548 tcp_ctlinput(int cmd, const struct sockaddr *sa, void *v) 1549 { 1550 struct ip *ip = v; 1551 struct tcphdr *th; 1552 struct icmp *icp; 1553 extern const int inetctlerrmap[]; 1554 void (*notify)(struct inpcb *, int) = tcp_notify; 1555 int errno; 1556 int nmatch; 1557 struct tcpcb *tp; 1558 u_int mtu; 1559 tcp_seq seq; 1560 struct inpcb *inp; 1561 #ifdef INET6 1562 struct in6pcb *in6p; 1563 struct in6_addr src6, dst6; 1564 #endif 1565 1566 if (sa->sa_family != AF_INET || 1567 sa->sa_len != sizeof(struct sockaddr_in)) 1568 return NULL; 1569 if ((unsigned)cmd >= PRC_NCMDS) 1570 return NULL; 1571 errno = inetctlerrmap[cmd]; 1572 if (cmd == PRC_QUENCH) 1573 /* 1574 * Don't honor ICMP Source Quench messages meant for 1575 * TCP connections. 1576 */ 1577 return NULL; 1578 else if (PRC_IS_REDIRECT(cmd)) 1579 notify = in_rtchange, ip = 0; 1580 else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) { 1581 /* 1582 * Check to see if we have a valid TCP connection 1583 * corresponding to the address in the ICMP message 1584 * payload. 1585 * 1586 * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN. 1587 */ 1588 th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1589 #ifdef INET6 1590 in6_in_2_v4mapin6(&ip->ip_src, &src6); 1591 in6_in_2_v4mapin6(&ip->ip_dst, &dst6); 1592 #endif 1593 if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst, 1594 th->th_dport, ip->ip_src, th->th_sport, 0)) != NULL) 1595 #ifdef INET6 1596 in6p = NULL; 1597 #else 1598 ; 1599 #endif 1600 #ifdef INET6 1601 else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6, 1602 th->th_dport, &src6, th->th_sport, 0, 0)) != NULL) 1603 ; 1604 #endif 1605 else 1606 return NULL; 1607 1608 /* 1609 * Now that we've validated that we are actually communicating 1610 * with the host indicated in the ICMP message, locate the 1611 * ICMP header, recalculate the new MTU, and create the 1612 * corresponding routing entry. 1613 */ 1614 icp = (struct icmp *)((char *)ip - 1615 offsetof(struct icmp, icmp_ip)); 1616 if (inp) { 1617 if ((tp = intotcpcb(inp)) == NULL) 1618 return NULL; 1619 } 1620 #ifdef INET6 1621 else if (in6p) { 1622 if ((tp = in6totcpcb(in6p)) == NULL) 1623 return NULL; 1624 } 1625 #endif 1626 else 1627 return NULL; 1628 seq = ntohl(th->th_seq); 1629 if (SEQ_LT(seq, tp->snd_una) || SEQ_GT(seq, tp->snd_max)) 1630 return NULL; 1631 /* 1632 * If the ICMP message advertises a Next-Hop MTU 1633 * equal or larger than the maximum packet size we have 1634 * ever sent, drop the message. 1635 */ 1636 mtu = (u_int)ntohs(icp->icmp_nextmtu); 1637 if (mtu >= tp->t_pmtud_mtu_sent) 1638 return NULL; 1639 if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) { 1640 /* 1641 * Calculate new MTU, and create corresponding 1642 * route (traditional PMTUD). 1643 */ 1644 tp->t_flags &= ~TF_PMTUD_PEND; 1645 icmp_mtudisc(icp, ip->ip_dst); 1646 } else { 1647 /* 1648 * Record the information got in the ICMP 1649 * message; act on it later. 1650 * If we had already recorded an ICMP message, 1651 * replace the old one only if the new message 1652 * refers to an older TCP segment 1653 */ 1654 if (tp->t_flags & TF_PMTUD_PEND) { 1655 if (SEQ_LT(tp->t_pmtud_th_seq, seq)) 1656 return NULL; 1657 } else 1658 tp->t_flags |= TF_PMTUD_PEND; 1659 tp->t_pmtud_th_seq = seq; 1660 tp->t_pmtud_nextmtu = icp->icmp_nextmtu; 1661 tp->t_pmtud_ip_len = icp->icmp_ip.ip_len; 1662 tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl; 1663 } 1664 return NULL; 1665 } else if (cmd == PRC_HOSTDEAD) 1666 ip = 0; 1667 else if (errno == 0) 1668 return NULL; 1669 if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) { 1670 th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1671 nmatch = in_pcbnotify(&tcbtable, satocsin(sa)->sin_addr, 1672 th->th_dport, ip->ip_src, th->th_sport, errno, notify); 1673 if (nmatch == 0 && syn_cache_count && 1674 (inetctlerrmap[cmd] == EHOSTUNREACH || 1675 inetctlerrmap[cmd] == ENETUNREACH || 1676 inetctlerrmap[cmd] == EHOSTDOWN)) { 1677 struct sockaddr_in sin; 1678 memset(&sin, 0, sizeof(sin)); 1679 sin.sin_len = sizeof(sin); 1680 sin.sin_family = AF_INET; 1681 sin.sin_port = th->th_sport; 1682 sin.sin_addr = ip->ip_src; 1683 syn_cache_unreach((struct sockaddr *)&sin, sa, th); 1684 } 1685 1686 /* XXX mapped address case */ 1687 } else 1688 in_pcbnotifyall(&tcbtable, satocsin(sa)->sin_addr, errno, 1689 notify); 1690 return NULL; 1691 } 1692 1693 /* 1694 * When a source quench is received, we are being notified of congestion. 1695 * Close the congestion window down to the Loss Window (one segment). 1696 * We will gradually open it again as we proceed. 1697 */ 1698 void 1699 tcp_quench(struct inpcb *inp, int errno) 1700 { 1701 struct tcpcb *tp = intotcpcb(inp); 1702 1703 if (tp) { 1704 tp->snd_cwnd = tp->t_segsz; 1705 tp->t_bytes_acked = 0; 1706 } 1707 } 1708 #endif 1709 1710 #ifdef INET6 1711 void 1712 tcp6_quench(struct in6pcb *in6p, int errno) 1713 { 1714 struct tcpcb *tp = in6totcpcb(in6p); 1715 1716 if (tp) { 1717 tp->snd_cwnd = tp->t_segsz; 1718 tp->t_bytes_acked = 0; 1719 } 1720 } 1721 #endif 1722 1723 #ifdef INET 1724 /* 1725 * Path MTU Discovery handlers. 1726 */ 1727 void 1728 tcp_mtudisc_callback(struct in_addr faddr) 1729 { 1730 #ifdef INET6 1731 struct in6_addr in6; 1732 #endif 1733 1734 in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc); 1735 #ifdef INET6 1736 in6_in_2_v4mapin6(&faddr, &in6); 1737 tcp6_mtudisc_callback(&in6); 1738 #endif 1739 } 1740 1741 /* 1742 * On receipt of path MTU corrections, flush old route and replace it 1743 * with the new one. Retransmit all unacknowledged packets, to ensure 1744 * that all packets will be received. 1745 */ 1746 void 1747 tcp_mtudisc(struct inpcb *inp, int errno) 1748 { 1749 struct tcpcb *tp = intotcpcb(inp); 1750 struct rtentry *rt; 1751 1752 if (tp == NULL) 1753 return; 1754 1755 rt = in_pcbrtentry(inp); 1756 if (rt != NULL) { 1757 /* 1758 * If this was not a host route, remove and realloc. 1759 */ 1760 if ((rt->rt_flags & RTF_HOST) == 0) { 1761 in_pcbrtentry_unref(rt, inp); 1762 in_rtchange(inp, errno); 1763 if ((rt = in_pcbrtentry(inp)) == NULL) 1764 return; 1765 } 1766 1767 /* 1768 * Slow start out of the error condition. We 1769 * use the MTU because we know it's smaller 1770 * than the previously transmitted segment. 1771 * 1772 * Note: This is more conservative than the 1773 * suggestion in draft-floyd-incr-init-win-03. 1774 */ 1775 if (rt->rt_rmx.rmx_mtu != 0) 1776 tp->snd_cwnd = 1777 TCP_INITIAL_WINDOW(tcp_init_win, 1778 rt->rt_rmx.rmx_mtu); 1779 in_pcbrtentry_unref(rt, inp); 1780 } 1781 1782 /* 1783 * Resend unacknowledged packets. 1784 */ 1785 tp->snd_nxt = tp->sack_newdata = tp->snd_una; 1786 tcp_output(tp); 1787 } 1788 #endif /* INET */ 1789 1790 #ifdef INET6 1791 /* 1792 * Path MTU Discovery handlers. 1793 */ 1794 void 1795 tcp6_mtudisc_callback(struct in6_addr *faddr) 1796 { 1797 struct sockaddr_in6 sin6; 1798 1799 memset(&sin6, 0, sizeof(sin6)); 1800 sin6.sin6_family = AF_INET6; 1801 sin6.sin6_len = sizeof(struct sockaddr_in6); 1802 sin6.sin6_addr = *faddr; 1803 (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0, 1804 (const struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc); 1805 } 1806 1807 void 1808 tcp6_mtudisc(struct in6pcb *in6p, int errno) 1809 { 1810 struct tcpcb *tp = in6totcpcb(in6p); 1811 struct rtentry *rt; 1812 1813 if (tp == NULL) 1814 return; 1815 1816 rt = in6_pcbrtentry(in6p); 1817 if (rt != NULL) { 1818 /* 1819 * If this was not a host route, remove and realloc. 1820 */ 1821 if ((rt->rt_flags & RTF_HOST) == 0) { 1822 in6_pcbrtentry_unref(rt, in6p); 1823 in6_rtchange(in6p, errno); 1824 rt = in6_pcbrtentry(in6p); 1825 if (rt == NULL) 1826 return; 1827 } 1828 1829 /* 1830 * Slow start out of the error condition. We 1831 * use the MTU because we know it's smaller 1832 * than the previously transmitted segment. 1833 * 1834 * Note: This is more conservative than the 1835 * suggestion in draft-floyd-incr-init-win-03. 1836 */ 1837 if (rt->rt_rmx.rmx_mtu != 0) { 1838 tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win, 1839 rt->rt_rmx.rmx_mtu); 1840 } 1841 in6_pcbrtentry_unref(rt, in6p); 1842 } 1843 1844 /* 1845 * Resend unacknowledged packets. 1846 */ 1847 tp->snd_nxt = tp->sack_newdata = tp->snd_una; 1848 tcp_output(tp); 1849 } 1850 #endif /* INET6 */ 1851 1852 /* 1853 * Compute the MSS to advertise to the peer. Called only during 1854 * the 3-way handshake. If we are the server (peer initiated 1855 * connection), we are called with a pointer to the interface 1856 * on which the SYN packet arrived. If we are the client (we 1857 * initiated connection), we are called with a pointer to the 1858 * interface out which this connection should go. 1859 * 1860 * NOTE: Do not subtract IP option/extension header size nor IPsec 1861 * header size from MSS advertisement. MSS option must hold the maximum 1862 * segment size we can accept, so it must always be: 1863 * max(if mtu) - ip header - tcp header 1864 */ 1865 u_long 1866 tcp_mss_to_advertise(const struct ifnet *ifp, int af) 1867 { 1868 extern u_long in_maxmtu; 1869 u_long mss = 0; 1870 u_long hdrsiz; 1871 1872 /* 1873 * In order to avoid defeating path MTU discovery on the peer, 1874 * we advertise the max MTU of all attached networks as our MSS, 1875 * per RFC 1191, section 3.1. 1876 * 1877 * We provide the option to advertise just the MTU of 1878 * the interface on which we hope this connection will 1879 * be receiving. If we are responding to a SYN, we 1880 * will have a pretty good idea about this, but when 1881 * initiating a connection there is a bit more doubt. 1882 * 1883 * We also need to ensure that loopback has a large enough 1884 * MSS, as the loopback MTU is never included in in_maxmtu. 1885 */ 1886 1887 if (ifp != NULL) 1888 switch (af) { 1889 case AF_INET: 1890 mss = ifp->if_mtu; 1891 break; 1892 #ifdef INET6 1893 case AF_INET6: 1894 mss = IN6_LINKMTU(ifp); 1895 break; 1896 #endif 1897 } 1898 1899 if (tcp_mss_ifmtu == 0) 1900 switch (af) { 1901 case AF_INET: 1902 mss = max(in_maxmtu, mss); 1903 break; 1904 #ifdef INET6 1905 case AF_INET6: 1906 mss = max(in6_maxmtu, mss); 1907 break; 1908 #endif 1909 } 1910 1911 switch (af) { 1912 case AF_INET: 1913 hdrsiz = sizeof(struct ip); 1914 break; 1915 #ifdef INET6 1916 case AF_INET6: 1917 hdrsiz = sizeof(struct ip6_hdr); 1918 break; 1919 #endif 1920 default: 1921 hdrsiz = 0; 1922 break; 1923 } 1924 hdrsiz += sizeof(struct tcphdr); 1925 if (mss > hdrsiz) 1926 mss -= hdrsiz; 1927 1928 mss = max(tcp_mssdflt, mss); 1929 return (mss); 1930 } 1931 1932 /* 1933 * Set connection variables based on the peer's advertised MSS. 1934 * We are passed the TCPCB for the actual connection. If we 1935 * are the server, we are called by the compressed state engine 1936 * when the 3-way handshake is complete. If we are the client, 1937 * we are called when we receive the SYN,ACK from the server. 1938 * 1939 * NOTE: Our advertised MSS value must be initialized in the TCPCB 1940 * before this routine is called! 1941 */ 1942 void 1943 tcp_mss_from_peer(struct tcpcb *tp, int offer) 1944 { 1945 struct socket *so; 1946 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) 1947 struct rtentry *rt; 1948 #endif 1949 u_long bufsize; 1950 int mss; 1951 1952 #ifdef DIAGNOSTIC 1953 if (tp->t_inpcb && tp->t_in6pcb) 1954 panic("tcp_mss_from_peer: both t_inpcb and t_in6pcb are set"); 1955 #endif 1956 so = NULL; 1957 rt = NULL; 1958 #ifdef INET 1959 if (tp->t_inpcb) { 1960 so = tp->t_inpcb->inp_socket; 1961 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) 1962 rt = in_pcbrtentry(tp->t_inpcb); 1963 #endif 1964 } 1965 #endif 1966 #ifdef INET6 1967 if (tp->t_in6pcb) { 1968 so = tp->t_in6pcb->in6p_socket; 1969 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) 1970 rt = in6_pcbrtentry(tp->t_in6pcb); 1971 #endif 1972 } 1973 #endif 1974 1975 /* 1976 * As per RFC1122, use the default MSS value, unless they 1977 * sent us an offer. Do not accept offers less than 256 bytes. 1978 */ 1979 mss = tcp_mssdflt; 1980 if (offer) 1981 mss = offer; 1982 mss = max(mss, 256); /* sanity */ 1983 tp->t_peermss = mss; 1984 mss -= tcp_optlen(tp); 1985 #ifdef INET 1986 if (tp->t_inpcb) 1987 mss -= ip_optlen(tp->t_inpcb); 1988 #endif 1989 #ifdef INET6 1990 if (tp->t_in6pcb) 1991 mss -= ip6_optlen(tp->t_in6pcb); 1992 #endif 1993 1994 /* 1995 * If there's a pipesize, change the socket buffer to that size. 1996 * Make the socket buffer an integral number of MSS units. If 1997 * the MSS is larger than the socket buffer, artificially decrease 1998 * the MSS. 1999 */ 2000 #ifdef RTV_SPIPE 2001 if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0) 2002 bufsize = rt->rt_rmx.rmx_sendpipe; 2003 else 2004 #endif 2005 { 2006 KASSERT(so != NULL); 2007 bufsize = so->so_snd.sb_hiwat; 2008 } 2009 if (bufsize < mss) 2010 mss = bufsize; 2011 else { 2012 bufsize = roundup(bufsize, mss); 2013 if (bufsize > sb_max) 2014 bufsize = sb_max; 2015 (void) sbreserve(&so->so_snd, bufsize, so); 2016 } 2017 tp->t_segsz = mss; 2018 2019 #ifdef RTV_SSTHRESH 2020 if (rt != NULL && rt->rt_rmx.rmx_ssthresh) { 2021 /* 2022 * There's some sort of gateway or interface buffer 2023 * limit on the path. Use this to set the slow 2024 * start threshold, but set the threshold to no less 2025 * than 2 * MSS. 2026 */ 2027 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); 2028 } 2029 #endif 2030 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) 2031 #ifdef INET 2032 if (tp->t_inpcb) 2033 in_pcbrtentry_unref(rt, tp->t_inpcb); 2034 #endif 2035 #ifdef INET6 2036 if (tp->t_in6pcb) 2037 in6_pcbrtentry_unref(rt, tp->t_in6pcb); 2038 #endif 2039 #endif 2040 } 2041 2042 /* 2043 * Processing necessary when a TCP connection is established. 2044 */ 2045 void 2046 tcp_established(struct tcpcb *tp) 2047 { 2048 struct socket *so; 2049 #ifdef RTV_RPIPE 2050 struct rtentry *rt; 2051 #endif 2052 u_long bufsize; 2053 2054 #ifdef DIAGNOSTIC 2055 if (tp->t_inpcb && tp->t_in6pcb) 2056 panic("tcp_established: both t_inpcb and t_in6pcb are set"); 2057 #endif 2058 so = NULL; 2059 rt = NULL; 2060 #ifdef INET 2061 /* This is a while() to reduce the dreadful stairstepping below */ 2062 while (tp->t_inpcb) { 2063 so = tp->t_inpcb->inp_socket; 2064 #if defined(RTV_RPIPE) 2065 rt = in_pcbrtentry(tp->t_inpcb); 2066 #endif 2067 if (__predict_true(tcp_msl_enable)) { 2068 if (tp->t_inpcb->inp_laddr.s_addr == INADDR_LOOPBACK) { 2069 tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2); 2070 break; 2071 } 2072 2073 if (__predict_false(tcp_rttlocal)) { 2074 /* This may be adjusted by tcp_input */ 2075 tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); 2076 break; 2077 } 2078 if (in_localaddr(tp->t_inpcb->inp_faddr)) { 2079 tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); 2080 break; 2081 } 2082 } 2083 tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL; 2084 break; 2085 } 2086 #endif 2087 #ifdef INET6 2088 /* The !tp->t_inpcb lets the compiler know it can't be v4 *and* v6 */ 2089 while (!tp->t_inpcb && tp->t_in6pcb) { 2090 so = tp->t_in6pcb->in6p_socket; 2091 #if defined(RTV_RPIPE) 2092 rt = in6_pcbrtentry(tp->t_in6pcb); 2093 #endif 2094 if (__predict_true(tcp_msl_enable)) { 2095 extern const struct in6_addr in6addr_loopback; 2096 2097 if (IN6_ARE_ADDR_EQUAL(&tp->t_in6pcb->in6p_laddr, 2098 &in6addr_loopback)) { 2099 tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2); 2100 break; 2101 } 2102 2103 if (__predict_false(tcp_rttlocal)) { 2104 /* This may be adjusted by tcp_input */ 2105 tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); 2106 break; 2107 } 2108 if (in6_localaddr(&tp->t_in6pcb->in6p_faddr)) { 2109 tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); 2110 break; 2111 } 2112 } 2113 tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL; 2114 break; 2115 } 2116 #endif 2117 2118 tp->t_state = TCPS_ESTABLISHED; 2119 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 2120 2121 #ifdef RTV_RPIPE 2122 if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0) 2123 bufsize = rt->rt_rmx.rmx_recvpipe; 2124 else 2125 #endif 2126 { 2127 KASSERT(so != NULL); 2128 bufsize = so->so_rcv.sb_hiwat; 2129 } 2130 if (bufsize > tp->t_ourmss) { 2131 bufsize = roundup(bufsize, tp->t_ourmss); 2132 if (bufsize > sb_max) 2133 bufsize = sb_max; 2134 (void) sbreserve(&so->so_rcv, bufsize, so); 2135 } 2136 #ifdef RTV_RPIPE 2137 #ifdef INET 2138 if (tp->t_inpcb) 2139 in_pcbrtentry_unref(rt, tp->t_inpcb); 2140 #endif 2141 #ifdef INET6 2142 if (tp->t_in6pcb) 2143 in6_pcbrtentry_unref(rt, tp->t_in6pcb); 2144 #endif 2145 #endif 2146 } 2147 2148 /* 2149 * Check if there's an initial rtt or rttvar. Convert from the 2150 * route-table units to scaled multiples of the slow timeout timer. 2151 * Called only during the 3-way handshake. 2152 */ 2153 void 2154 tcp_rmx_rtt(struct tcpcb *tp) 2155 { 2156 #ifdef RTV_RTT 2157 struct rtentry *rt = NULL; 2158 int rtt; 2159 2160 #ifdef DIAGNOSTIC 2161 if (tp->t_inpcb && tp->t_in6pcb) 2162 panic("tcp_rmx_rtt: both t_inpcb and t_in6pcb are set"); 2163 #endif 2164 #ifdef INET 2165 if (tp->t_inpcb) 2166 rt = in_pcbrtentry(tp->t_inpcb); 2167 #endif 2168 #ifdef INET6 2169 if (tp->t_in6pcb) 2170 rt = in6_pcbrtentry(tp->t_in6pcb); 2171 #endif 2172 if (rt == NULL) 2173 return; 2174 2175 if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { 2176 /* 2177 * XXX The lock bit for MTU indicates that the value 2178 * is also a minimum value; this is subject to time. 2179 */ 2180 if (rt->rt_rmx.rmx_locks & RTV_RTT) 2181 TCPT_RANGESET(tp->t_rttmin, 2182 rtt / (RTM_RTTUNIT / PR_SLOWHZ), 2183 TCPTV_MIN, TCPTV_REXMTMAX); 2184 tp->t_srtt = rtt / 2185 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); 2186 if (rt->rt_rmx.rmx_rttvar) { 2187 tp->t_rttvar = rt->rt_rmx.rmx_rttvar / 2188 ((RTM_RTTUNIT / PR_SLOWHZ) >> 2189 (TCP_RTTVAR_SHIFT + 2)); 2190 } else { 2191 /* Default variation is +- 1 rtt */ 2192 tp->t_rttvar = 2193 tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT); 2194 } 2195 TCPT_RANGESET(tp->t_rxtcur, 2196 ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2), 2197 tp->t_rttmin, TCPTV_REXMTMAX); 2198 } 2199 #ifdef INET 2200 if (tp->t_inpcb) 2201 in_pcbrtentry_unref(rt, tp->t_inpcb); 2202 #endif 2203 #ifdef INET6 2204 if (tp->t_in6pcb) 2205 in6_pcbrtentry_unref(rt, tp->t_in6pcb); 2206 #endif 2207 #endif 2208 } 2209 2210 tcp_seq tcp_iss_seq = 0; /* tcp initial seq # */ 2211 2212 /* 2213 * Get a new sequence value given a tcp control block 2214 */ 2215 tcp_seq 2216 tcp_new_iss(struct tcpcb *tp, tcp_seq addin) 2217 { 2218 2219 #ifdef INET 2220 if (tp->t_inpcb != NULL) { 2221 return (tcp_new_iss1(&tp->t_inpcb->inp_laddr, 2222 &tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport, 2223 tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr), 2224 addin)); 2225 } 2226 #endif 2227 #ifdef INET6 2228 if (tp->t_in6pcb != NULL) { 2229 return (tcp_new_iss1(&tp->t_in6pcb->in6p_laddr, 2230 &tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport, 2231 tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr), 2232 addin)); 2233 } 2234 #endif 2235 /* Not possible. */ 2236 panic("tcp_new_iss"); 2237 } 2238 2239 static u_int8_t tcp_iss_secret[16]; /* 128 bits; should be plenty */ 2240 2241 /* 2242 * Initialize RFC 1948 ISS Secret 2243 */ 2244 static int 2245 tcp_iss_secret_init(void) 2246 { 2247 cprng_strong(kern_cprng, 2248 tcp_iss_secret, sizeof(tcp_iss_secret), 0); 2249 2250 return 0; 2251 } 2252 2253 /* 2254 * This routine actually generates a new TCP initial sequence number. 2255 */ 2256 tcp_seq 2257 tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport, 2258 size_t addrsz, tcp_seq addin) 2259 { 2260 tcp_seq tcp_iss; 2261 2262 if (tcp_do_rfc1948) { 2263 MD5_CTX ctx; 2264 u_int8_t hash[16]; /* XXX MD5 knowledge */ 2265 static ONCE_DECL(tcp_iss_secret_control); 2266 2267 /* 2268 * If we haven't been here before, initialize our cryptographic 2269 * hash secret. 2270 */ 2271 RUN_ONCE(&tcp_iss_secret_control, tcp_iss_secret_init); 2272 2273 /* 2274 * Compute the base value of the ISS. It is a hash 2275 * of (saddr, sport, daddr, dport, secret). 2276 */ 2277 MD5Init(&ctx); 2278 2279 MD5Update(&ctx, (u_char *) laddr, addrsz); 2280 MD5Update(&ctx, (u_char *) &lport, sizeof(lport)); 2281 2282 MD5Update(&ctx, (u_char *) faddr, addrsz); 2283 MD5Update(&ctx, (u_char *) &fport, sizeof(fport)); 2284 2285 MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret)); 2286 2287 MD5Final(hash, &ctx); 2288 2289 memcpy(&tcp_iss, hash, sizeof(tcp_iss)); 2290 2291 /* 2292 * Now increment our "timer", and add it in to 2293 * the computed value. 2294 * 2295 * XXX Use `addin'? 2296 * XXX TCP_ISSINCR too large to use? 2297 */ 2298 tcp_iss_seq += TCP_ISSINCR; 2299 #ifdef TCPISS_DEBUG 2300 printf("ISS hash 0x%08x, ", tcp_iss); 2301 #endif 2302 tcp_iss += tcp_iss_seq + addin; 2303 #ifdef TCPISS_DEBUG 2304 printf("new ISS 0x%08x\n", tcp_iss); 2305 #endif 2306 } else { 2307 /* 2308 * Randomize. 2309 */ 2310 tcp_iss = cprng_fast32(); 2311 2312 /* 2313 * If we were asked to add some amount to a known value, 2314 * we will take a random value obtained above, mask off 2315 * the upper bits, and add in the known value. We also 2316 * add in a constant to ensure that we are at least a 2317 * certain distance from the original value. 2318 * 2319 * This is used when an old connection is in timed wait 2320 * and we have a new one coming in, for instance. 2321 */ 2322 if (addin != 0) { 2323 #ifdef TCPISS_DEBUG 2324 printf("Random %08x, ", tcp_iss); 2325 #endif 2326 tcp_iss &= TCP_ISS_RANDOM_MASK; 2327 tcp_iss += addin + TCP_ISSINCR; 2328 #ifdef TCPISS_DEBUG 2329 printf("Old ISS %08x, ISS %08x\n", addin, tcp_iss); 2330 #endif 2331 } else { 2332 tcp_iss &= TCP_ISS_RANDOM_MASK; 2333 tcp_iss += tcp_iss_seq; 2334 tcp_iss_seq += TCP_ISSINCR; 2335 #ifdef TCPISS_DEBUG 2336 printf("ISS %08x\n", tcp_iss); 2337 #endif 2338 } 2339 } 2340 2341 return (tcp_iss); 2342 } 2343 2344 #if defined(IPSEC) 2345 /* compute ESP/AH header size for TCP, including outer IP header. */ 2346 size_t 2347 ipsec4_hdrsiz_tcp(struct tcpcb *tp) 2348 { 2349 struct inpcb *inp; 2350 size_t hdrsiz; 2351 2352 /* XXX mapped addr case (tp->t_in6pcb) */ 2353 if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) 2354 return 0; 2355 switch (tp->t_family) { 2356 case AF_INET: 2357 /* XXX: should use currect direction. */ 2358 hdrsiz = ipsec4_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp); 2359 break; 2360 default: 2361 hdrsiz = 0; 2362 break; 2363 } 2364 2365 return hdrsiz; 2366 } 2367 2368 #ifdef INET6 2369 size_t 2370 ipsec6_hdrsiz_tcp(struct tcpcb *tp) 2371 { 2372 struct in6pcb *in6p; 2373 size_t hdrsiz; 2374 2375 if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb)) 2376 return 0; 2377 switch (tp->t_family) { 2378 case AF_INET6: 2379 /* XXX: should use currect direction. */ 2380 hdrsiz = ipsec6_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p); 2381 break; 2382 case AF_INET: 2383 /* mapped address case - tricky */ 2384 default: 2385 hdrsiz = 0; 2386 break; 2387 } 2388 2389 return hdrsiz; 2390 } 2391 #endif 2392 #endif /*IPSEC*/ 2393 2394 /* 2395 * Determine the length of the TCP options for this connection. 2396 * 2397 * XXX: What do we do for SACK, when we add that? Just reserve 2398 * all of the space? Otherwise we can't exactly be incrementing 2399 * cwnd by an amount that varies depending on the amount we last 2400 * had to SACK! 2401 */ 2402 2403 u_int 2404 tcp_optlen(struct tcpcb *tp) 2405 { 2406 u_int optlen; 2407 2408 optlen = 0; 2409 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == 2410 (TF_REQ_TSTMP | TF_RCVD_TSTMP)) 2411 optlen += TCPOLEN_TSTAMP_APPA; 2412 2413 #ifdef TCP_SIGNATURE 2414 if (tp->t_flags & TF_SIGNATURE) 2415 optlen += TCPOLEN_SIGLEN; 2416 #endif /* TCP_SIGNATURE */ 2417 2418 return optlen; 2419 } 2420 2421 u_int 2422 tcp_hdrsz(struct tcpcb *tp) 2423 { 2424 u_int hlen; 2425 2426 switch (tp->t_family) { 2427 #ifdef INET6 2428 case AF_INET6: 2429 hlen = sizeof(struct ip6_hdr); 2430 break; 2431 #endif 2432 case AF_INET: 2433 hlen = sizeof(struct ip); 2434 break; 2435 default: 2436 hlen = 0; 2437 break; 2438 } 2439 hlen += sizeof(struct tcphdr); 2440 2441 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 2442 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) 2443 hlen += TCPOLEN_TSTAMP_APPA; 2444 #ifdef TCP_SIGNATURE 2445 if (tp->t_flags & TF_SIGNATURE) 2446 hlen += TCPOLEN_SIGLEN; 2447 #endif 2448 return hlen; 2449 } 2450 2451 void 2452 tcp_statinc(u_int stat) 2453 { 2454 2455 KASSERT(stat < TCP_NSTATS); 2456 TCP_STATINC(stat); 2457 } 2458 2459 void 2460 tcp_statadd(u_int stat, uint64_t val) 2461 { 2462 2463 KASSERT(stat < TCP_NSTATS); 2464 TCP_STATADD(stat, val); 2465 } 2466