1 /* $NetBSD: tcp_timer.c,v 1.69 2005/02/03 23:51:56 perry Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2001 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. Neither the name of the University nor the names of its contributors 82 * may be used to endorse or promote products derived from this software 83 * without specific prior written permission. 84 * 85 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 86 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 87 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 88 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 89 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 90 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 91 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 92 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 93 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 94 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 95 * SUCH DAMAGE. 96 * 97 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 98 */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.69 2005/02/03 23:51:56 perry Exp $"); 102 103 #include "opt_inet.h" 104 #include "opt_tcp_debug.h" 105 106 #include <sys/param.h> 107 #include <sys/systm.h> 108 #include <sys/malloc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/protosw.h> 113 #include <sys/errno.h> 114 #include <sys/kernel.h> 115 116 #include <net/if.h> 117 #include <net/route.h> 118 119 #include <netinet/in.h> 120 #include <netinet/in_systm.h> 121 #include <netinet/ip.h> 122 #include <netinet/in_pcb.h> 123 #include <netinet/ip_var.h> 124 125 #ifdef INET6 126 #ifndef INET 127 #include <netinet/in.h> 128 #endif 129 #include <netinet/ip6.h> 130 #include <netinet6/in6_pcb.h> 131 #endif 132 133 #include <netinet/tcp.h> 134 #include <netinet/tcp_fsm.h> 135 #include <netinet/tcp_seq.h> 136 #include <netinet/tcp_timer.h> 137 #include <netinet/tcp_var.h> 138 #include <netinet/tcpip.h> 139 #ifdef TCP_DEBUG 140 #include <netinet/tcp_debug.h> 141 #endif 142 143 /* 144 * Various tunable timer parameters. These are initialized in tcp_init(), 145 * unless they are patched. 146 */ 147 int tcp_keepidle = 0; 148 int tcp_keepintvl = 0; 149 int tcp_keepcnt = 0; /* max idle probes */ 150 int tcp_maxpersistidle = 0; /* max idle time in persist */ 151 int tcp_maxidle; /* computed in tcp_slowtimo() */ 152 153 /* 154 * Time to delay the ACK. This is initialized in tcp_init(), unless 155 * its patched. 156 */ 157 int tcp_delack_ticks = 0; 158 159 void tcp_timer_rexmt(void *); 160 void tcp_timer_persist(void *); 161 void tcp_timer_keep(void *); 162 void tcp_timer_2msl(void *); 163 164 const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { 165 tcp_timer_rexmt, 166 tcp_timer_persist, 167 tcp_timer_keep, 168 tcp_timer_2msl, 169 }; 170 171 /* 172 * Timer state initialization, called from tcp_init(). 173 */ 174 void 175 tcp_timer_init(void) 176 { 177 178 if (tcp_keepidle == 0) 179 tcp_keepidle = TCPTV_KEEP_IDLE; 180 181 if (tcp_keepintvl == 0) 182 tcp_keepintvl = TCPTV_KEEPINTVL; 183 184 if (tcp_keepcnt == 0) 185 tcp_keepcnt = TCPTV_KEEPCNT; 186 187 if (tcp_maxpersistidle == 0) 188 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 189 190 if (tcp_delack_ticks == 0) 191 tcp_delack_ticks = TCP_DELACK_TICKS; 192 } 193 194 /* 195 * Return how many timers are currently being invoked. 196 */ 197 int 198 tcp_timers_invoking(struct tcpcb *tp) 199 { 200 int i; 201 int count = 0; 202 203 for (i = 0; i < TCPT_NTIMERS; i++) 204 if (callout_invoking(&tp->t_timer[i])) 205 count++; 206 if (callout_invoking(&tp->t_delack_ch)) 207 count++; 208 209 return count; 210 } 211 212 /* 213 * Callout to process delayed ACKs for a TCPCB. 214 */ 215 void 216 tcp_delack(void *arg) 217 { 218 struct tcpcb *tp = arg; 219 int s; 220 221 /* 222 * If tcp_output() wasn't able to transmit the ACK 223 * for whatever reason, it will restart the delayed 224 * ACK callout. 225 */ 226 227 s = splsoftnet(); 228 callout_ack(&tp->t_delack_ch); 229 if (tcp_isdead(tp)) { 230 splx(s); 231 return; 232 } 233 234 tp->t_flags |= TF_ACKNOW; 235 (void) tcp_output(tp); 236 splx(s); 237 } 238 239 /* 240 * Tcp protocol timeout routine called every 500 ms. 241 * Updates the timers in all active tcb's and 242 * causes finite state machine actions if timers expire. 243 */ 244 void 245 tcp_slowtimo(void) 246 { 247 int s; 248 249 s = splsoftnet(); 250 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 251 tcp_iss_seq += TCP_ISSINCR; /* increment iss */ 252 tcp_now++; /* for timestamps */ 253 splx(s); 254 } 255 256 /* 257 * Cancel all timers for TCP tp. 258 */ 259 void 260 tcp_canceltimers(struct tcpcb *tp) 261 { 262 int i; 263 264 for (i = 0; i < TCPT_NTIMERS; i++) 265 TCP_TIMER_DISARM(tp, i); 266 } 267 268 const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 269 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 270 271 const int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 272 273 /* 274 * TCP timer processing. 275 */ 276 277 void 278 tcp_timer_rexmt(void *arg) 279 { 280 struct tcpcb *tp = arg; 281 uint32_t rto; 282 int s; 283 #ifdef TCP_DEBUG 284 struct socket *so = NULL; 285 short ostate; 286 #endif 287 288 s = splsoftnet(); 289 callout_ack(&tp->t_timer[TCPT_REXMT]); 290 if (tcp_isdead(tp)) { 291 splx(s); 292 return; 293 } 294 295 #ifdef TCP_DEBUG 296 #ifdef INET 297 if (tp->t_inpcb) 298 so = tp->t_inpcb->inp_socket; 299 #endif 300 #ifdef INET6 301 if (tp->t_in6pcb) 302 so = tp->t_in6pcb->in6p_socket; 303 #endif 304 ostate = tp->t_state; 305 #endif /* TCP_DEBUG */ 306 307 /* 308 * Retransmission timer went off. Message has not 309 * been acked within retransmit interval. Back off 310 * to a longer retransmit interval and retransmit one segment. 311 */ 312 313 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 314 tp->t_rxtshift = TCP_MAXRXTSHIFT; 315 tcpstat.tcps_timeoutdrop++; 316 tp = tcp_drop(tp, tp->t_softerror ? 317 tp->t_softerror : ETIMEDOUT); 318 goto out; 319 } 320 tcpstat.tcps_rexmttimeo++; 321 rto = TCP_REXMTVAL(tp); 322 if (rto < tp->t_rttmin) 323 rto = tp->t_rttmin; 324 TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], 325 tp->t_rttmin, TCPTV_REXMTMAX); 326 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 327 328 /* 329 * If we are losing and we are trying path MTU discovery, 330 * try turning it off. This will avoid black holes in 331 * the network which suppress or fail to send "packet 332 * too big" ICMP messages. We should ideally do 333 * lots more sophisticated searching to find the right 334 * value here... 335 */ 336 if (tp->t_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { 337 tcpstat.tcps_pmtublackhole++; 338 339 #ifdef INET 340 /* try turning PMTUD off */ 341 if (tp->t_inpcb) 342 tp->t_mtudisc = 0; 343 #endif 344 #ifdef INET6 345 /* try using IPv6 minimum MTU */ 346 if (tp->t_in6pcb) 347 tp->t_mtudisc = 0; 348 #endif 349 350 /* XXX: more sophisticated Black hole recovery code? */ 351 } 352 353 /* 354 * If losing, let the lower level know and try for 355 * a better route. Also, if we backed off this far, 356 * our srtt estimate is probably bogus. Clobber it 357 * so we'll take the next rtt measurement as our srtt; 358 * move the current srtt into rttvar to keep the current 359 * retransmit times until then. 360 */ 361 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 362 #ifdef INET 363 if (tp->t_inpcb) 364 in_losing(tp->t_inpcb); 365 #endif 366 #ifdef INET6 367 if (tp->t_in6pcb) 368 in6_losing(tp->t_in6pcb); 369 #endif 370 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 371 tp->t_srtt = 0; 372 } 373 tp->snd_nxt = tp->snd_una; 374 tp->snd_high = tp->snd_max; 375 /* 376 * If timing a segment in this window, stop the timer. 377 */ 378 tp->t_rtttime = 0; 379 /* 380 * Remember if we are retransmitting a SYN, because if 381 * we do, set the initial congestion window must be set 382 * to 1 segment. 383 */ 384 if (tp->t_state == TCPS_SYN_SENT) 385 tp->t_flags |= TF_SYN_REXMT; 386 /* 387 * Close the congestion window down to one segment 388 * (we'll open it by one segment for each ack we get). 389 * Since we probably have a window's worth of unacked 390 * data accumulated, this "slow start" keeps us from 391 * dumping all that data as back-to-back packets (which 392 * might overwhelm an intermediate gateway). 393 * 394 * There are two phases to the opening: Initially we 395 * open by one mss on each ack. This makes the window 396 * size increase exponentially with time. If the 397 * window is larger than the path can handle, this 398 * exponential growth results in dropped packet(s) 399 * almost immediately. To get more time between 400 * drops but still "push" the network to take advantage 401 * of improving conditions, we switch from exponential 402 * to linear window opening at some threshhold size. 403 * For a threshhold, we use half the current window 404 * size, truncated to a multiple of the mss. 405 * 406 * (the minimum cwnd that will give us exponential 407 * growth is 2 mss. We don't allow the threshhold 408 * to go below this.) 409 */ 410 { 411 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 412 if (win < 2) 413 win = 2; 414 /* Loss Window MUST be one segment. */ 415 tp->snd_cwnd = tp->t_segsz; 416 tp->snd_ssthresh = win * tp->t_segsz; 417 tp->t_partialacks = -1; 418 tp->t_dupacks = 0; 419 } 420 (void) tcp_output(tp); 421 422 out: 423 #ifdef TCP_DEBUG 424 if (tp && so->so_options & SO_DEBUG) 425 tcp_trace(TA_USER, ostate, tp, NULL, 426 PRU_SLOWTIMO | (TCPT_REXMT << 8)); 427 #endif 428 splx(s); 429 } 430 431 void 432 tcp_timer_persist(void *arg) 433 { 434 struct tcpcb *tp = arg; 435 uint32_t rto; 436 int s; 437 #ifdef TCP_DEBUG 438 struct socket *so = NULL; 439 short ostate; 440 #endif 441 442 s = splsoftnet(); 443 callout_ack(&tp->t_timer[TCPT_PERSIST]); 444 if (tcp_isdead(tp)) { 445 splx(s); 446 return; 447 } 448 449 #ifdef TCP_DEBUG 450 #ifdef INET 451 if (tp->t_inpcb) 452 so = tp->t_inpcb->inp_socket; 453 #endif 454 #ifdef INET6 455 if (tp->t_in6pcb) 456 so = tp->t_in6pcb->in6p_socket; 457 #endif 458 459 ostate = tp->t_state; 460 #endif /* TCP_DEBUG */ 461 462 /* 463 * Persistance timer into zero window. 464 * Force a byte to be output, if possible. 465 */ 466 467 /* 468 * Hack: if the peer is dead/unreachable, we do not 469 * time out if the window is closed. After a full 470 * backoff, drop the connection if the idle time 471 * (no responses to probes) reaches the maximum 472 * backoff that we would use if retransmitting. 473 */ 474 rto = TCP_REXMTVAL(tp); 475 if (rto < tp->t_rttmin) 476 rto = tp->t_rttmin; 477 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 478 ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || 479 (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { 480 tcpstat.tcps_persistdrops++; 481 tp = tcp_drop(tp, ETIMEDOUT); 482 goto out; 483 } 484 tcpstat.tcps_persisttimeo++; 485 tcp_setpersist(tp); 486 tp->t_force = 1; 487 (void) tcp_output(tp); 488 tp->t_force = 0; 489 490 out: 491 #ifdef TCP_DEBUG 492 if (tp && so->so_options & SO_DEBUG) 493 tcp_trace(TA_USER, ostate, tp, NULL, 494 PRU_SLOWTIMO | (TCPT_PERSIST << 8)); 495 #endif 496 splx(s); 497 } 498 499 void 500 tcp_timer_keep(void *arg) 501 { 502 struct tcpcb *tp = arg; 503 struct socket *so = NULL; /* Quell compiler warning */ 504 int s; 505 #ifdef TCP_DEBUG 506 short ostate; 507 #endif 508 509 s = splsoftnet(); 510 callout_ack(&tp->t_timer[TCPT_KEEP]); 511 if (tcp_isdead(tp)) { 512 splx(s); 513 return; 514 } 515 516 #ifdef TCP_DEBUG 517 ostate = tp->t_state; 518 #endif /* TCP_DEBUG */ 519 520 /* 521 * Keep-alive timer went off; send something 522 * or drop connection if idle for too long. 523 */ 524 525 tcpstat.tcps_keeptimeo++; 526 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 527 goto dropit; 528 #ifdef INET 529 if (tp->t_inpcb) 530 so = tp->t_inpcb->inp_socket; 531 #endif 532 #ifdef INET6 533 if (tp->t_in6pcb) 534 so = tp->t_in6pcb->in6p_socket; 535 #endif 536 if (so->so_options & SO_KEEPALIVE && 537 tp->t_state <= TCPS_CLOSE_WAIT) { 538 if ((tcp_maxidle > 0) && 539 ((tcp_now - tp->t_rcvtime) >= 540 tcp_keepidle + tcp_maxidle)) 541 goto dropit; 542 /* 543 * Send a packet designed to force a response 544 * if the peer is up and reachable: 545 * either an ACK if the connection is still alive, 546 * or an RST if the peer has closed the connection 547 * due to timeout or reboot. 548 * Using sequence number tp->snd_una-1 549 * causes the transmitted zero-length segment 550 * to lie outside the receive window; 551 * by the protocol spec, this requires the 552 * correspondent TCP to respond. 553 */ 554 tcpstat.tcps_keepprobe++; 555 if (tcp_compat_42) { 556 /* 557 * The keepalive packet must have nonzero 558 * length to get a 4.2 host to respond. 559 */ 560 (void)tcp_respond(tp, tp->t_template, 561 (struct mbuf *)NULL, NULL, tp->rcv_nxt - 1, 562 tp->snd_una - 1, 0); 563 } else { 564 (void)tcp_respond(tp, tp->t_template, 565 (struct mbuf *)NULL, NULL, tp->rcv_nxt, 566 tp->snd_una - 1, 0); 567 } 568 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); 569 } else 570 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); 571 572 #ifdef TCP_DEBUG 573 if (tp && so->so_options & SO_DEBUG) 574 tcp_trace(TA_USER, ostate, tp, NULL, 575 PRU_SLOWTIMO | (TCPT_KEEP << 8)); 576 #endif 577 splx(s); 578 return; 579 580 dropit: 581 tcpstat.tcps_keepdrops++; 582 (void) tcp_drop(tp, ETIMEDOUT); 583 splx(s); 584 } 585 586 void 587 tcp_timer_2msl(void *arg) 588 { 589 struct tcpcb *tp = arg; 590 int s; 591 #ifdef TCP_DEBUG 592 struct socket *so = NULL; 593 short ostate; 594 #endif 595 596 s = splsoftnet(); 597 callout_ack(&tp->t_timer[TCPT_2MSL]); 598 if (tcp_isdead(tp)) { 599 splx(s); 600 return; 601 } 602 603 #ifdef TCP_DEBUG 604 #ifdef INET 605 if (tp->t_inpcb) 606 so = tp->t_inpcb->inp_socket; 607 #endif 608 #ifdef INET6 609 if (tp->t_in6pcb) 610 so = tp->t_in6pcb->in6p_socket; 611 #endif 612 613 ostate = tp->t_state; 614 #endif /* TCP_DEBUG */ 615 616 /* 617 * 2 MSL timeout in shutdown went off. If we're closed but 618 * still waiting for peer to close and connection has been idle 619 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 620 * control block. Otherwise, check again in a bit. 621 */ 622 if (tp->t_state != TCPS_TIME_WAIT && 623 ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle))) 624 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); 625 else 626 tp = tcp_close(tp); 627 628 #ifdef TCP_DEBUG 629 if (tp && so->so_options & SO_DEBUG) 630 tcp_trace(TA_USER, ostate, tp, NULL, 631 PRU_SLOWTIMO | (TCPT_2MSL << 8)); 632 #endif 633 splx(s); 634 } 635