1 /* $NetBSD: tcp_timer.c,v 1.55 2001/09/11 21:03:21 thorpej Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2001 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 102 */ 103 104 #include "opt_inet.h" 105 #include "opt_tcp_debug.h" 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/malloc.h> 110 #include <sys/mbuf.h> 111 #include <sys/socket.h> 112 #include <sys/socketvar.h> 113 #include <sys/protosw.h> 114 #include <sys/errno.h> 115 #include <sys/kernel.h> 116 117 #include <net/if.h> 118 #include <net/route.h> 119 120 #include <netinet/in.h> 121 #include <netinet/in_systm.h> 122 #include <netinet/ip.h> 123 #include <netinet/in_pcb.h> 124 #include <netinet/ip_var.h> 125 126 #ifdef INET6 127 #ifndef INET 128 #include <netinet/in.h> 129 #endif 130 #include <netinet/ip6.h> 131 #include <netinet6/in6_pcb.h> 132 #endif 133 134 #include <netinet/tcp.h> 135 #include <netinet/tcp_fsm.h> 136 #include <netinet/tcp_seq.h> 137 #include <netinet/tcp_timer.h> 138 #include <netinet/tcp_var.h> 139 #include <netinet/tcpip.h> 140 #ifdef TCP_DEBUG 141 #include <netinet/tcp_debug.h> 142 #endif 143 144 /* 145 * Various tunable timer parameters. These are initialized in tcp_init(), 146 * unless they are patched. 147 */ 148 int tcp_keepidle = 0; 149 int tcp_keepintvl = 0; 150 int tcp_keepcnt = 0; /* max idle probes */ 151 int tcp_maxpersistidle = 0; /* max idle time in persist */ 152 int tcp_maxidle; /* computed in tcp_slowtimo() */ 153 154 /* 155 * Time to delay the ACK. This is initialized in tcp_init(), unless 156 * its patched. 157 */ 158 int tcp_delack_ticks = 0; 159 160 void tcp_timer_rexmt(void *); 161 void tcp_timer_persist(void *); 162 void tcp_timer_keep(void *); 163 void tcp_timer_2msl(void *); 164 165 tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { 166 tcp_timer_rexmt, 167 tcp_timer_persist, 168 tcp_timer_keep, 169 tcp_timer_2msl, 170 }; 171 172 /* 173 * Timer state initialization, called from tcp_init(). 174 */ 175 void 176 tcp_timer_init(void) 177 { 178 179 if (tcp_keepidle == 0) 180 tcp_keepidle = TCPTV_KEEP_IDLE; 181 182 if (tcp_keepintvl == 0) 183 tcp_keepintvl = TCPTV_KEEPINTVL; 184 185 if (tcp_keepcnt == 0) 186 tcp_keepcnt = TCPTV_KEEPCNT; 187 188 if (tcp_maxpersistidle == 0) 189 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 190 191 if (tcp_delack_ticks == 0) 192 tcp_delack_ticks = TCP_DELACK_TICKS; 193 } 194 195 /* 196 * Callout to process delayed ACKs for a TCPCB. 197 */ 198 void 199 tcp_delack(void *arg) 200 { 201 struct tcpcb *tp = arg; 202 int s; 203 204 /* 205 * If tcp_output() wasn't able to transmit the ACK 206 * for whatever reason, it will restart the delayed 207 * ACK callout. 208 */ 209 210 s = splsoftnet(); 211 tp->t_flags |= TF_ACKNOW; 212 (void) tcp_output(tp); 213 splx(s); 214 } 215 216 /* 217 * Tcp protocol timeout routine called every 500 ms. 218 * Updates the timers in all active tcb's and 219 * causes finite state machine actions if timers expire. 220 */ 221 void 222 tcp_slowtimo() 223 { 224 int s; 225 226 s = splsoftnet(); 227 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 228 tcp_iss_seq += TCP_ISSINCR; /* increment iss */ 229 tcp_now++; /* for timestamps */ 230 splx(s); 231 } 232 233 /* 234 * Cancel all timers for TCP tp. 235 */ 236 void 237 tcp_canceltimers(tp) 238 struct tcpcb *tp; 239 { 240 int i; 241 242 for (i = 0; i < TCPT_NTIMERS; i++) 243 TCP_TIMER_DISARM(tp, i); 244 } 245 246 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 247 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 248 249 int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 250 251 /* 252 * TCP timer processing. 253 */ 254 255 void 256 tcp_timer_rexmt(void *arg) 257 { 258 struct tcpcb *tp = arg; 259 uint32_t rto; 260 int s; 261 #ifdef TCP_DEBUG 262 struct socket *so; 263 short ostate; 264 #endif 265 266 s = splsoftnet(); 267 268 callout_deactivate(&tp->t_timer[TCPT_REXMT]); 269 270 #ifdef TCP_DEBUG 271 #ifdef INET 272 if (tp->t_inpcb) 273 so = tp->t_inpcb->inp_socket; 274 #endif 275 #ifdef INET6 276 if (tp->t_in6pcb) 277 so = tp->t_in6pcb->in6p_socket; 278 #endif 279 ostate = tp->t_state; 280 #endif /* TCP_DEBUG */ 281 282 /* 283 * Retransmission timer went off. Message has not 284 * been acked within retransmit interval. Back off 285 * to a longer retransmit interval and retransmit one segment. 286 */ 287 288 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 289 tp->t_rxtshift = TCP_MAXRXTSHIFT; 290 tcpstat.tcps_timeoutdrop++; 291 tp = tcp_drop(tp, tp->t_softerror ? 292 tp->t_softerror : ETIMEDOUT); 293 goto out; 294 } 295 tcpstat.tcps_rexmttimeo++; 296 rto = TCP_REXMTVAL(tp); 297 if (rto < tp->t_rttmin) 298 rto = tp->t_rttmin; 299 TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], 300 tp->t_rttmin, TCPTV_REXMTMAX); 301 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 302 #if 0 303 /* 304 * If we are losing and we are trying path MTU discovery, 305 * try turning it off. This will avoid black holes in 306 * the network which suppress or fail to send "packet 307 * too big" ICMP messages. We should ideally do 308 * lots more sophisticated searching to find the right 309 * value here... 310 */ 311 if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { 312 struct rtentry *rt = NULL; 313 314 #ifdef INET 315 if (tp->t_inpcb) 316 rt = in_pcbrtentry(tp->t_inpcb); 317 #endif 318 #ifdef INET6 319 if (tp->t_in6pcb) 320 rt = in6_pcbrtentry(tp->t_in6pcb); 321 #endif 322 323 /* XXX: Black hole recovery code goes here */ 324 } 325 #endif /* 0 */ 326 /* 327 * If losing, let the lower level know and try for 328 * a better route. Also, if we backed off this far, 329 * our srtt estimate is probably bogus. Clobber it 330 * so we'll take the next rtt measurement as our srtt; 331 * move the current srtt into rttvar to keep the current 332 * retransmit times until then. 333 */ 334 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 335 #ifdef INET 336 if (tp->t_inpcb) 337 in_losing(tp->t_inpcb); 338 #endif 339 #ifdef INET6 340 if (tp->t_in6pcb) 341 in6_losing(tp->t_in6pcb); 342 #endif 343 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 344 tp->t_srtt = 0; 345 } 346 tp->snd_nxt = tp->snd_una; 347 /* 348 * If timing a segment in this window, stop the timer. 349 */ 350 tp->t_rtttime = 0; 351 /* 352 * Remember if we are retransmitting a SYN, because if 353 * we do, set the initial congestion window must be set 354 * to 1 segment. 355 */ 356 if (tp->t_state == TCPS_SYN_SENT) 357 tp->t_flags |= TF_SYN_REXMT; 358 /* 359 * Close the congestion window down to one segment 360 * (we'll open it by one segment for each ack we get). 361 * Since we probably have a window's worth of unacked 362 * data accumulated, this "slow start" keeps us from 363 * dumping all that data as back-to-back packets (which 364 * might overwhelm an intermediate gateway). 365 * 366 * There are two phases to the opening: Initially we 367 * open by one mss on each ack. This makes the window 368 * size increase exponentially with time. If the 369 * window is larger than the path can handle, this 370 * exponential growth results in dropped packet(s) 371 * almost immediately. To get more time between 372 * drops but still "push" the network to take advantage 373 * of improving conditions, we switch from exponential 374 * to linear window opening at some threshhold size. 375 * For a threshhold, we use half the current window 376 * size, truncated to a multiple of the mss. 377 * 378 * (the minimum cwnd that will give us exponential 379 * growth is 2 mss. We don't allow the threshhold 380 * to go below this.) 381 */ 382 { 383 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 384 if (win < 2) 385 win = 2; 386 /* Loss Window MUST be one segment. */ 387 tp->snd_cwnd = tp->t_segsz; 388 tp->snd_ssthresh = win * tp->t_segsz; 389 tp->t_dupacks = 0; 390 } 391 (void) tcp_output(tp); 392 393 out: 394 #ifdef TCP_DEBUG 395 if (tp && so->so_options & SO_DEBUG) 396 tcp_trace(TA_USER, ostate, tp, NULL, 397 PRU_SLOWTIMO | (TCPT_REXMT << 8)); 398 #endif 399 splx(s); 400 } 401 402 void 403 tcp_timer_persist(void *arg) 404 { 405 struct tcpcb *tp = arg; 406 struct socket *so; 407 uint32_t rto; 408 int s; 409 #ifdef TCP_DEBUG 410 short ostate; 411 #endif 412 413 s = splsoftnet(); 414 415 callout_deactivate(&tp->t_timer[TCPT_PERSIST]); 416 417 #ifdef INET 418 if (tp->t_inpcb) 419 so = tp->t_inpcb->inp_socket; 420 #endif 421 #ifdef INET6 422 if (tp->t_in6pcb) 423 so = tp->t_in6pcb->in6p_socket; 424 #endif 425 426 #ifdef TCP_DEBUG 427 ostate = tp->t_state; 428 #endif 429 430 /* 431 * Persistance timer into zero window. 432 * Force a byte to be output, if possible. 433 */ 434 435 /* 436 * Hack: if the peer is dead/unreachable, we do not 437 * time out if the window is closed. After a full 438 * backoff, drop the connection if the idle time 439 * (no responses to probes) reaches the maximum 440 * backoff that we would use if retransmitting. 441 */ 442 rto = TCP_REXMTVAL(tp); 443 if (rto < tp->t_rttmin) 444 rto = tp->t_rttmin; 445 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 446 ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || 447 (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { 448 tcpstat.tcps_persistdrops++; 449 tp = tcp_drop(tp, ETIMEDOUT); 450 goto out; 451 } 452 tcpstat.tcps_persisttimeo++; 453 tcp_setpersist(tp); 454 tp->t_force = 1; 455 (void) tcp_output(tp); 456 tp->t_force = 0; 457 458 out: 459 #ifdef TCP_DEBUG 460 if (tp && so->so_options & SO_DEBUG) 461 tcp_trace(TA_USER, ostate, tp, NULL, 462 PRU_SLOWTIMO | (TCPT_PERSIST << 8)); 463 #endif 464 splx(s); 465 } 466 467 void 468 tcp_timer_keep(void *arg) 469 { 470 struct tcpcb *tp = arg; 471 struct socket *so; 472 int s; 473 #ifdef TCP_DEBUG 474 short ostate; 475 #endif 476 477 s = splsoftnet(); 478 479 callout_deactivate(&tp->t_timer[TCPT_KEEP]); 480 481 #ifdef TCP_DEBUG 482 ostate = tp->t_state; 483 #endif /* TCP_DEBUG */ 484 485 /* 486 * Keep-alive timer went off; send something 487 * or drop connection if idle for too long. 488 */ 489 490 tcpstat.tcps_keeptimeo++; 491 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 492 goto dropit; 493 #ifdef INET 494 if (tp->t_inpcb) 495 so = tp->t_inpcb->inp_socket; 496 #endif 497 #ifdef INET6 498 if (tp->t_in6pcb) 499 so = tp->t_in6pcb->in6p_socket; 500 #endif 501 if (so->so_options & SO_KEEPALIVE && 502 tp->t_state <= TCPS_CLOSE_WAIT) { 503 if ((tcp_maxidle > 0) && 504 ((tcp_now - tp->t_rcvtime) >= 505 tcp_keepidle + tcp_maxidle)) 506 goto dropit; 507 /* 508 * Send a packet designed to force a response 509 * if the peer is up and reachable: 510 * either an ACK if the connection is still alive, 511 * or an RST if the peer has closed the connection 512 * due to timeout or reboot. 513 * Using sequence number tp->snd_una-1 514 * causes the transmitted zero-length segment 515 * to lie outside the receive window; 516 * by the protocol spec, this requires the 517 * correspondent TCP to respond. 518 */ 519 tcpstat.tcps_keepprobe++; 520 if (tcp_compat_42) { 521 /* 522 * The keepalive packet must have nonzero 523 * length to get a 4.2 host to respond. 524 */ 525 (void)tcp_respond(tp, tp->t_template, 526 (struct mbuf *)NULL, NULL, tp->rcv_nxt - 1, 527 tp->snd_una - 1, 0); 528 } else { 529 (void)tcp_respond(tp, tp->t_template, 530 (struct mbuf *)NULL, NULL, tp->rcv_nxt, 531 tp->snd_una - 1, 0); 532 } 533 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); 534 } else 535 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); 536 537 #ifdef TCP_DEBUG 538 if (tp && so->so_options & SO_DEBUG) 539 tcp_trace(TA_USER, ostate, tp, NULL, 540 PRU_SLOWTIMO | (TCPT_KEEP << 8)); 541 #endif 542 splx(s); 543 return; 544 545 dropit: 546 tcpstat.tcps_keepdrops++; 547 (void) tcp_drop(tp, ETIMEDOUT); 548 splx(s); 549 } 550 551 void 552 tcp_timer_2msl(void *arg) 553 { 554 struct tcpcb *tp = arg; 555 struct socket *so; 556 int s; 557 #ifdef TCP_DEBUG 558 short ostate; 559 #endif 560 561 s = splsoftnet(); 562 563 callout_deactivate(&tp->t_timer[TCPT_2MSL]); 564 565 #ifdef INET 566 if (tp->t_inpcb) 567 so = tp->t_inpcb->inp_socket; 568 #endif 569 #ifdef INET6 570 if (tp->t_in6pcb) 571 so = tp->t_in6pcb->in6p_socket; 572 #endif 573 574 #ifdef TCP_DEBUG 575 ostate = tp->t_state; 576 #endif 577 578 /* 579 * 2 MSL timeout in shutdown went off. If we're closed but 580 * still waiting for peer to close and connection has been idle 581 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 582 * control block. Otherwise, check again in a bit. 583 */ 584 if (tp->t_state != TCPS_TIME_WAIT && 585 ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle))) 586 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); 587 else 588 tp = tcp_close(tp); 589 590 #ifdef TCP_DEBUG 591 if (tp && so->so_options & SO_DEBUG) 592 tcp_trace(TA_USER, ostate, tp, NULL, 593 PRU_SLOWTIMO | (TCPT_2MSL << 8)); 594 #endif 595 splx(s); 596 } 597