1 /* $NetBSD: tcp_timer.c,v 1.48 2000/10/19 20:23:00 itojun Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 102 */ 103 104 #include "opt_inet.h" 105 106 #include <sys/param.h> 107 #include <sys/systm.h> 108 #include <sys/malloc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/protosw.h> 113 #include <sys/errno.h> 114 115 #include <net/if.h> 116 #include <net/route.h> 117 118 #include <netinet/in.h> 119 #include <netinet/in_systm.h> 120 #include <netinet/ip.h> 121 #include <netinet/in_pcb.h> 122 #include <netinet/ip_var.h> 123 124 #ifdef INET6 125 #ifndef INET 126 #include <netinet/in.h> 127 #endif 128 #include <netinet/ip6.h> 129 #include <netinet6/in6_pcb.h> 130 #endif 131 132 #include <netinet/tcp.h> 133 #include <netinet/tcp_fsm.h> 134 #include <netinet/tcp_seq.h> 135 #include <netinet/tcp_timer.h> 136 #include <netinet/tcp_var.h> 137 #include <netinet/tcpip.h> 138 139 int tcp_keepidle = TCPTV_KEEP_IDLE; 140 int tcp_keepintvl = TCPTV_KEEPINTVL; 141 int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ 142 int tcp_maxpersistidle = TCPTV_KEEP_IDLE; /* max idle time in persist */ 143 int tcp_maxidle; 144 145 struct tcp_delack_head tcp_delacks; 146 147 /* 148 * Fast timeout routine for processing delayed acks 149 */ 150 void 151 tcp_fasttimo() 152 { 153 struct tcpcb *tp, *ntp; 154 int s; 155 156 s = splsoftnet(); 157 for (tp = tcp_delacks.lh_first; tp != NULL; tp = ntp) { 158 /* 159 * If tcp_output() can't transmit the ACK for whatever 160 * reason, it will remain on the queue for the next 161 * time the heartbeat ticks. 162 */ 163 ntp = tp->t_delack.le_next; 164 tp->t_flags |= TF_ACKNOW; 165 (void) tcp_output(tp); 166 } 167 splx(s); 168 } 169 170 /* 171 * Tcp protocol timeout routine called every 500 ms. 172 * Updates the timers in all active tcb's and 173 * causes finite state machine actions if timers expire. 174 */ 175 void 176 tcp_slowtimo() 177 { 178 struct inpcb *inp, *ninp; 179 struct tcpcb *tp; 180 #ifdef INET6 181 struct in6pcb *in6p, *nin6p; 182 #endif 183 int s; 184 long i; 185 static int syn_cache_last = 0; 186 int skip, mask; 187 188 skip = mask = 0; 189 190 s = splsoftnet(); 191 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 192 /* 193 * Search through tcb's and update active timers. 194 */ 195 mask |= 1; 196 inp = tcbtable.inpt_queue.cqh_first; 197 if (inp == (struct inpcb *)0) { /* XXX */ 198 skip |= 1; 199 goto dotcb6; 200 } 201 for (; inp != (struct inpcb *)&tcbtable.inpt_queue; inp = ninp) { 202 ninp = inp->inp_queue.cqe_next; 203 tp = intotcpcb(inp); 204 if (tp == 0 || tp->t_state == TCPS_LISTEN) 205 continue; 206 for (i = 0; i < TCPT_NTIMERS; i++) { 207 if (TCP_TIMER_ISEXPIRED(tp, i)) { 208 TCP_TIMER_DISARM(tp, i); 209 (void) tcp_usrreq(tp->t_inpcb->inp_socket, 210 PRU_SLOWTIMO, (struct mbuf *)0, 211 (struct mbuf *)i, (struct mbuf *)0, 212 (struct proc *)0); 213 /* XXX NOT MP SAFE */ 214 if ((ninp == (void *)&tcbtable.inpt_queue && 215 tcbtable.inpt_queue.cqh_last != inp) || 216 ninp->inp_queue.cqe_prev != inp) 217 goto tpgone; 218 } 219 } 220 tp->t_idle++; 221 if (tp->t_rtt) 222 tp->t_rtt++; 223 tpgone: 224 ; 225 } 226 dotcb6: 227 #ifdef INET6 228 mask |= 2; 229 in6p = tcb6.in6p_next; 230 if (in6p == (struct in6pcb *)0) { /* XXX */ 231 skip |= 2; 232 goto doiss; 233 } 234 for (; in6p != (struct in6pcb *)&tcb6; in6p = nin6p) { 235 nin6p = in6p->in6p_next; 236 tp = in6totcpcb(in6p); 237 if (tp == 0 || tp->t_state == TCPS_LISTEN) 238 continue; 239 for (i = 0; i < TCPT_NTIMERS; i++) { 240 if (TCP_TIMER_ISEXPIRED(tp, i)) { 241 TCP_TIMER_DISARM(tp, i); 242 (void) tcp_usrreq(tp->t_in6pcb->in6p_socket, 243 PRU_SLOWTIMO, (struct mbuf *)0, 244 (struct mbuf *)i, (struct mbuf *)0, 245 (struct proc *)0); 246 /* XXX NOT MP SAFE */ 247 if ((nin6p == (void *)&tcb6 && 248 tcb6.in6p_prev != in6p) || 249 nin6p->in6p_prev != in6p) 250 goto tp6gone; 251 } 252 } 253 tp->t_idle++; 254 if (tp->t_rtt) 255 tp->t_rtt++; 256 tp6gone: 257 ; 258 } 259 260 doiss: 261 #endif 262 if (mask == skip) 263 goto done; 264 tcp_iss_seq += TCP_ISSINCR; /* increment iss */ 265 tcp_now++; /* for timestamps */ 266 if (++syn_cache_last >= tcp_syn_cache_interval) { 267 syn_cache_timer(); 268 syn_cache_last = 0; 269 } 270 done: 271 splx(s); 272 } 273 274 /* 275 * Cancel all timers for TCP tp. 276 */ 277 void 278 tcp_canceltimers(tp) 279 struct tcpcb *tp; 280 { 281 int i; 282 283 for (i = 0; i < TCPT_NTIMERS; i++) 284 TCP_TIMER_DISARM(tp, i); 285 } 286 287 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 288 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 289 290 int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 291 292 /* 293 * TCP timer processing. 294 */ 295 struct tcpcb * 296 tcp_timers(tp, timer) 297 struct tcpcb *tp; 298 int timer; 299 { 300 short rto; 301 302 #ifdef DIAGNOSTIC 303 if (tp->t_inpcb && tp->t_in6pcb) 304 panic("tcp_timers: both t_inpcb and t_in6pcb are set"); 305 #endif 306 307 switch (timer) { 308 309 /* 310 * 2 MSL timeout in shutdown went off. If we're closed but 311 * still waiting for peer to close and connection has been idle 312 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 313 * control block. Otherwise, check again in a bit. 314 */ 315 case TCPT_2MSL: 316 if (tp->t_state != TCPS_TIME_WAIT && 317 ((tcp_maxidle == 0) || (tp->t_idle <= tcp_maxidle))) 318 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); 319 else 320 tp = tcp_close(tp); 321 break; 322 323 /* 324 * Retransmission timer went off. Message has not 325 * been acked within retransmit interval. Back off 326 * to a longer retransmit interval and retransmit one segment. 327 */ 328 case TCPT_REXMT: 329 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 330 tp->t_rxtshift = TCP_MAXRXTSHIFT; 331 tcpstat.tcps_timeoutdrop++; 332 tp = tcp_drop(tp, tp->t_softerror ? 333 tp->t_softerror : ETIMEDOUT); 334 break; 335 } 336 tcpstat.tcps_rexmttimeo++; 337 rto = TCP_REXMTVAL(tp); 338 if (rto < tp->t_rttmin) 339 rto = tp->t_rttmin; 340 TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], 341 tp->t_rttmin, TCPTV_REXMTMAX); 342 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 343 #if 0 344 /* 345 * If we are losing and we are trying path MTU discovery, 346 * try turning it off. This will avoid black holes in 347 * the network which suppress or fail to send "packet 348 * too big" ICMP messages. We should ideally do 349 * lots more sophisticated searching to find the right 350 * value here... 351 */ 352 if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { 353 struct rtentry *rt = NULL; 354 355 #ifdef INET 356 if (tp->t_inpcb) 357 rt = in_pcbrtentry(tp->t_inpcb); 358 #endif 359 #ifdef INET6 360 if (tp->t_in6pcb) 361 rt = in6_pcbrtentry(tp->t_in6pcb); 362 #endif 363 364 /* XXX: Black hole recovery code goes here */ 365 } 366 #endif 367 /* 368 * If losing, let the lower level know and try for 369 * a better route. Also, if we backed off this far, 370 * our srtt estimate is probably bogus. Clobber it 371 * so we'll take the next rtt measurement as our srtt; 372 * move the current srtt into rttvar to keep the current 373 * retransmit times until then. 374 */ 375 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 376 #ifdef INET 377 if (tp->t_inpcb) 378 in_losing(tp->t_inpcb); 379 #endif 380 #ifdef INET6 381 if (tp->t_in6pcb) 382 in6_losing(tp->t_in6pcb); 383 #endif 384 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 385 tp->t_srtt = 0; 386 } 387 tp->snd_nxt = tp->snd_una; 388 /* 389 * If timing a segment in this window, stop the timer. 390 */ 391 tp->t_rtt = 0; 392 /* 393 * Remember if we are retransmitting a SYN, because if 394 * we do, set the initial congestion window must be set 395 * to 1 segment. 396 */ 397 if (tp->t_state == TCPS_SYN_SENT) 398 tp->t_flags |= TF_SYN_REXMT; 399 /* 400 * Close the congestion window down to one segment 401 * (we'll open it by one segment for each ack we get). 402 * Since we probably have a window's worth of unacked 403 * data accumulated, this "slow start" keeps us from 404 * dumping all that data as back-to-back packets (which 405 * might overwhelm an intermediate gateway). 406 * 407 * There are two phases to the opening: Initially we 408 * open by one mss on each ack. This makes the window 409 * size increase exponentially with time. If the 410 * window is larger than the path can handle, this 411 * exponential growth results in dropped packet(s) 412 * almost immediately. To get more time between 413 * drops but still "push" the network to take advantage 414 * of improving conditions, we switch from exponential 415 * to linear window opening at some threshhold size. 416 * For a threshhold, we use half the current window 417 * size, truncated to a multiple of the mss. 418 * 419 * (the minimum cwnd that will give us exponential 420 * growth is 2 mss. We don't allow the threshhold 421 * to go below this.) 422 */ 423 { 424 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 425 if (win < 2) 426 win = 2; 427 /* Loss Window MUST be one segment. */ 428 tp->snd_cwnd = tp->t_segsz; 429 tp->snd_ssthresh = win * tp->t_segsz; 430 tp->t_dupacks = 0; 431 } 432 (void) tcp_output(tp); 433 break; 434 435 /* 436 * Persistance timer into zero window. 437 * Force a byte to be output, if possible. 438 */ 439 case TCPT_PERSIST: 440 /* 441 * Hack: if the peer is dead/unreachable, we do not 442 * time out if the window is closed. After a full 443 * backoff, drop the connection if the idle time 444 * (no responses to probes) reaches the maximum 445 * backoff that we would use if retransmitting. 446 */ 447 rto = TCP_REXMTVAL(tp); 448 if (rto < tp->t_rttmin) 449 rto = tp->t_rttmin; 450 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 451 (tp->t_idle >= tcp_maxpersistidle || 452 tp->t_idle >= rto * tcp_totbackoff)) { 453 tcpstat.tcps_persistdrops++; 454 tp = tcp_drop(tp, ETIMEDOUT); 455 break; 456 } 457 tcpstat.tcps_persisttimeo++; 458 tcp_setpersist(tp); 459 tp->t_force = 1; 460 (void) tcp_output(tp); 461 tp->t_force = 0; 462 break; 463 464 /* 465 * Keep-alive timer went off; send something 466 * or drop connection if idle for too long. 467 */ 468 case TCPT_KEEP: 469 { 470 struct socket *so = NULL; 471 472 tcpstat.tcps_keeptimeo++; 473 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 474 goto dropit; 475 #ifdef INET 476 if (tp->t_inpcb) 477 so = tp->t_inpcb->inp_socket; 478 #endif 479 #ifdef INET6 480 if (tp->t_in6pcb) 481 so = tp->t_in6pcb->in6p_socket; 482 #endif 483 if (so->so_options & SO_KEEPALIVE && 484 tp->t_state <= TCPS_CLOSE_WAIT) { 485 if ((tcp_maxidle > 0) && 486 (tp->t_idle >= tcp_keepidle + tcp_maxidle)) 487 goto dropit; 488 /* 489 * Send a packet designed to force a response 490 * if the peer is up and reachable: 491 * either an ACK if the connection is still alive, 492 * or an RST if the peer has closed the connection 493 * due to timeout or reboot. 494 * Using sequence number tp->snd_una-1 495 * causes the transmitted zero-length segment 496 * to lie outside the receive window; 497 * by the protocol spec, this requires the 498 * correspondent TCP to respond. 499 */ 500 tcpstat.tcps_keepprobe++; 501 if (tcp_compat_42) { 502 /* 503 * The keepalive packet must have nonzero 504 * length to get a 4.2 host to respond. 505 */ 506 (void)tcp_respond(tp, tp->t_template, 507 (struct mbuf *)NULL, NULL, tp->rcv_nxt - 1, 508 tp->snd_una - 1, 0); 509 } else { 510 (void)tcp_respond(tp, tp->t_template, 511 (struct mbuf *)NULL, NULL, tp->rcv_nxt, 512 tp->snd_una - 1, 0); 513 } 514 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); 515 } else 516 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); 517 break; 518 } 519 dropit: 520 tcpstat.tcps_keepdrops++; 521 tp = tcp_drop(tp, ETIMEDOUT); 522 break; 523 } 524 return (tp); 525 } 526