1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ 68 * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.17 2008/03/30 20:39:01 dillon Exp $ 69 */ 70 71 #include "opt_compat.h" 72 #include "opt_inet6.h" 73 #include "opt_tcpdebug.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/kernel.h> 78 #include <sys/mbuf.h> 79 #include <sys/sysctl.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/protosw.h> 83 #include <sys/thread.h> 84 #include <sys/globaldata.h> 85 #include <sys/thread2.h> 86 #include <sys/msgport2.h> 87 88 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 89 90 #include <net/route.h> 91 #include <net/netmsg2.h> 92 93 #include <netinet/in.h> 94 #include <netinet/in_systm.h> 95 #include <netinet/in_pcb.h> 96 #ifdef INET6 97 #include <netinet6/in6_pcb.h> 98 #endif 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 #include <netinet/tcp_timer.h> 104 #include <netinet/tcp_var.h> 105 #include <netinet/tcpip.h> 106 #ifdef TCPDEBUG 107 #include <netinet/tcp_debug.h> 108 #endif 109 110 #define TCP_TIMER_REXMT 0x01 111 #define TCP_TIMER_PERSIST 0x02 112 #define TCP_TIMER_KEEP 0x04 113 #define TCP_TIMER_2MSL 0x08 114 #define TCP_TIMER_DELACK 0x10 115 116 static struct tcpcb *tcp_timer_rexmt_handler(struct tcpcb *); 117 static struct tcpcb *tcp_timer_persist_handler(struct tcpcb *); 118 static struct tcpcb *tcp_timer_keep_handler(struct tcpcb *); 119 static struct tcpcb *tcp_timer_2msl_handler(struct tcpcb *); 120 static struct tcpcb *tcp_timer_delack_handler(struct tcpcb *); 121 122 static const struct tcp_timer { 123 uint32_t tt_task; 124 struct tcpcb *(*tt_handler)(struct tcpcb *); 125 } tcp_timer_handlers[] = { 126 { TCP_TIMER_DELACK, tcp_timer_delack_handler }, 127 { TCP_TIMER_PERSIST, tcp_timer_persist_handler }, 128 { TCP_TIMER_REXMT, tcp_timer_rexmt_handler }, 129 { TCP_TIMER_KEEP, tcp_timer_keep_handler }, 130 { TCP_TIMER_2MSL, tcp_timer_2msl_handler }, 131 { 0, NULL } 132 }; 133 134 static int 135 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 136 { 137 int error, s, tt; 138 139 tt = *(int *)oidp->oid_arg1; 140 s = (int)((int64_t)tt * 1000 / hz); 141 142 error = sysctl_handle_int(oidp, &s, 0, req); 143 if (error || !req->newptr) 144 return (error); 145 146 tt = (int)((int64_t)s * hz / 1000); 147 if (tt < 1) 148 return (EINVAL); 149 150 *(int *)oidp->oid_arg1 = tt; 151 return (0); 152 } 153 154 int tcp_keepinit; 155 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 156 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 157 158 int tcp_keepidle; 159 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 160 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 161 162 int tcp_keepintvl; 163 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 164 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 165 166 int tcp_delacktime; 167 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, 168 CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 169 "Time before a delayed ACK is sent"); 170 171 int tcp_msl; 172 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 173 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 174 175 int tcp_rexmit_min; 176 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 177 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout"); 178 179 int tcp_rexmit_slop; 180 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 181 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 182 "Retransmission Timer Slop"); 183 184 static int always_keepalive = 1; 185 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 186 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 187 188 static int tcp_keepcnt = TCPTV_KEEPCNT; 189 /* max idle probes */ 190 int tcp_maxpersistidle; 191 /* max idle time in persist */ 192 int tcp_maxidle; 193 194 /* 195 * Tcp protocol timeout routine called every 500 ms. 196 * Updates timestamps used for TCP 197 * causes finite state machine actions if timers expire. 198 */ 199 void 200 tcp_slowtimo(void) 201 { 202 crit_enter(); 203 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 204 crit_exit(); 205 } 206 207 /* 208 * Cancel all timers for TCP tp. 209 */ 210 void 211 tcp_canceltimers(struct tcpcb *tp) 212 { 213 callout_stop(tp->tt_2msl); 214 callout_stop(tp->tt_persist); 215 callout_stop(tp->tt_keep); 216 callout_stop(tp->tt_rexmt); 217 } 218 219 /* 220 * Caller should be in critical section 221 */ 222 static void 223 tcp_send_timermsg(struct tcpcb *tp, uint32_t task) 224 { 225 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 226 227 KKASSERT(tmsg != NULL && tmsg->tt_cpuid == mycpuid && 228 tmsg->tt_tcb != NULL); 229 230 tmsg->tt_tasks |= task; 231 if (tmsg->tt_nmsg.nm_lmsg.ms_flags & MSGF_DONE) 232 lwkt_sendmsg(tcp_cport(mycpuid), &tmsg->tt_nmsg.nm_lmsg); 233 } 234 235 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 236 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 237 238 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 239 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 240 241 static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 242 243 /* Caller should be in critical section */ 244 static struct tcpcb * 245 tcp_timer_delack_handler(struct tcpcb *tp) 246 { 247 tp->t_flags |= TF_ACKNOW; 248 tcpstat.tcps_delack++; 249 tcp_output(tp); 250 return tp; 251 } 252 253 /* 254 * TCP timer processing. 255 */ 256 void 257 tcp_timer_delack(void *xtp) 258 { 259 struct tcpcb *tp = xtp; 260 261 crit_enter(); 262 if (callout_pending(tp->tt_delack) || !callout_active(tp->tt_delack)) { 263 crit_exit(); 264 return; 265 } 266 callout_deactivate(tp->tt_delack); 267 tcp_send_timermsg(tp, TCP_TIMER_DELACK); 268 crit_exit(); 269 } 270 271 /* Caller should be in critical section */ 272 static struct tcpcb * 273 tcp_timer_2msl_handler(struct tcpcb *tp) 274 { 275 #ifdef TCPDEBUG 276 int ostate; 277 #endif 278 279 #ifdef TCPDEBUG 280 ostate = tp->t_state; 281 #endif 282 /* 283 * 2 MSL timeout in shutdown went off. If we're closed but 284 * still waiting for peer to close and connection has been idle 285 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 286 * control block. Otherwise, check again in a bit. 287 */ 288 if (tp->t_state != TCPS_TIME_WAIT && 289 (ticks - tp->t_rcvtime) <= tcp_maxidle) 290 callout_reset(tp->tt_2msl, tcp_keepintvl, 291 tcp_timer_2msl, tp); 292 else 293 tp = tcp_close(tp); 294 295 #ifdef TCPDEBUG 296 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 297 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 298 #endif 299 return tp; 300 } 301 302 void 303 tcp_timer_2msl(void *xtp) 304 { 305 struct tcpcb *tp = xtp; 306 307 crit_enter(); 308 if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) { 309 crit_exit(); 310 return; 311 } 312 callout_deactivate(tp->tt_2msl); 313 tcp_send_timermsg(tp, TCP_TIMER_2MSL); 314 crit_exit(); 315 } 316 317 /* Caller should be in critical section */ 318 static struct tcpcb * 319 tcp_timer_keep_handler(struct tcpcb *tp) 320 { 321 struct tcptemp *t_template; 322 #ifdef TCPDEBUG 323 int ostate; 324 #endif 325 326 #ifdef TCPDEBUG 327 ostate = tp->t_state; 328 #endif 329 /* 330 * Keep-alive timer went off; send something 331 * or drop connection if idle for too long. 332 */ 333 tcpstat.tcps_keeptimeo++; 334 if (tp->t_state < TCPS_ESTABLISHED) 335 goto dropit; 336 if ((always_keepalive || 337 tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && 338 tp->t_state <= TCPS_CLOSING) { 339 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 340 goto dropit; 341 /* 342 * Send a packet designed to force a response 343 * if the peer is up and reachable: 344 * either an ACK if the connection is still alive, 345 * or an RST if the peer has closed the connection 346 * due to timeout or reboot. 347 * Using sequence number tp->snd_una-1 348 * causes the transmitted zero-length segment 349 * to lie outside the receive window; 350 * by the protocol spec, this requires the 351 * correspondent TCP to respond. 352 */ 353 tcpstat.tcps_keepprobe++; 354 t_template = tcp_maketemplate(tp); 355 if (t_template) { 356 tcp_respond(tp, t_template->tt_ipgen, 357 &t_template->tt_t, (struct mbuf *)NULL, 358 tp->rcv_nxt, tp->snd_una - 1, 0); 359 tcp_freetemplate(t_template); 360 } 361 callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); 362 } else 363 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 364 365 #ifdef TCPDEBUG 366 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 367 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 368 #endif 369 return tp; 370 371 dropit: 372 tcpstat.tcps_keepdrops++; 373 tp = tcp_drop(tp, ETIMEDOUT); 374 375 #ifdef TCPDEBUG 376 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 377 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 378 #endif 379 return tp; 380 } 381 382 void 383 tcp_timer_keep(void *xtp) 384 { 385 struct tcpcb *tp = xtp; 386 387 crit_enter(); 388 if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) { 389 crit_exit(); 390 return; 391 } 392 callout_deactivate(tp->tt_keep); 393 tcp_send_timermsg(tp, TCP_TIMER_KEEP); 394 crit_exit(); 395 } 396 397 /* Caller should be in critical section */ 398 static struct tcpcb * 399 tcp_timer_persist_handler(struct tcpcb *tp) 400 { 401 #ifdef TCPDEBUG 402 int ostate; 403 #endif 404 405 #ifdef TCPDEBUG 406 ostate = tp->t_state; 407 #endif 408 /* 409 * Persistance timer into zero window. 410 * Force a byte to be output, if possible. 411 */ 412 tcpstat.tcps_persisttimeo++; 413 /* 414 * Hack: if the peer is dead/unreachable, we do not 415 * time out if the window is closed. After a full 416 * backoff, drop the connection if the idle time 417 * (no responses to probes) reaches the maximum 418 * backoff that we would use if retransmitting. 419 */ 420 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 421 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 422 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 423 tcpstat.tcps_persistdrop++; 424 tp = tcp_drop(tp, ETIMEDOUT); 425 goto out; 426 } 427 tcp_setpersist(tp); 428 tp->t_flags |= TF_FORCE; 429 tcp_output(tp); 430 tp->t_flags &= ~TF_FORCE; 431 432 out: 433 #ifdef TCPDEBUG 434 if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 435 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 436 #endif 437 return tp; 438 } 439 440 void 441 tcp_timer_persist(void *xtp) 442 { 443 struct tcpcb *tp = xtp; 444 445 crit_enter(); 446 if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){ 447 crit_exit(); 448 return; 449 } 450 callout_deactivate(tp->tt_persist); 451 tcp_send_timermsg(tp, TCP_TIMER_PERSIST); 452 crit_exit(); 453 } 454 455 void 456 tcp_save_congestion_state(struct tcpcb *tp) 457 { 458 tp->snd_cwnd_prev = tp->snd_cwnd; 459 tp->snd_wacked_prev = tp->snd_wacked; 460 tp->snd_ssthresh_prev = tp->snd_ssthresh; 461 tp->snd_recover_prev = tp->snd_recover; 462 if (IN_FASTRECOVERY(tp)) 463 tp->t_flags |= TF_WASFRECOVERY; 464 else 465 tp->t_flags &= ~TF_WASFRECOVERY; 466 if (tp->t_flags & TF_RCVD_TSTMP) { 467 tp->t_rexmtTS = ticks; 468 tp->t_flags |= TF_FIRSTACCACK; 469 } 470 #ifdef later 471 tcp_sack_save_scoreboard(&tp->scb); 472 #endif 473 } 474 475 void 476 tcp_revert_congestion_state(struct tcpcb *tp) 477 { 478 tp->snd_cwnd = tp->snd_cwnd_prev; 479 tp->snd_wacked = tp->snd_wacked_prev; 480 tp->snd_ssthresh = tp->snd_ssthresh_prev; 481 tp->snd_recover = tp->snd_recover_prev; 482 if (tp->t_flags & TF_WASFRECOVERY) 483 ENTER_FASTRECOVERY(tp); 484 if (tp->t_flags & TF_FASTREXMT) { 485 ++tcpstat.tcps_sndfastrexmitbad; 486 if (tp->t_flags & TF_EARLYREXMT) 487 ++tcpstat.tcps_sndearlyrexmitbad; 488 } else 489 ++tcpstat.tcps_sndrtobad; 490 tp->t_badrxtwin = 0; 491 tp->t_rxtshift = 0; 492 tp->snd_nxt = tp->snd_max; 493 #ifdef later 494 tcp_sack_revert_scoreboard(&tp->scb, tp->snd_una); 495 #endif 496 } 497 498 /* Caller should be in critical section */ 499 static struct tcpcb * 500 tcp_timer_rexmt_handler(struct tcpcb *tp) 501 { 502 int rexmt; 503 #ifdef TCPDEBUG 504 int ostate; 505 #endif 506 507 #ifdef TCPDEBUG 508 ostate = tp->t_state; 509 #endif 510 /* 511 * Retransmission timer went off. Message has not 512 * been acked within retransmit interval. Back off 513 * to a longer retransmit interval and retransmit one segment. 514 */ 515 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 516 tp->t_rxtshift = TCP_MAXRXTSHIFT; 517 tcpstat.tcps_timeoutdrop++; 518 tp = tcp_drop(tp, tp->t_softerror ? 519 tp->t_softerror : ETIMEDOUT); 520 goto out; 521 } 522 if (tp->t_rxtshift == 1) { 523 /* 524 * first retransmit; record ssthresh and cwnd so they can 525 * be recovered if this turns out to be a "bad" retransmit. 526 * A retransmit is considered "bad" if an ACK for this 527 * segment is received within RTT/2 interval; the assumption 528 * here is that the ACK was already in flight. See 529 * "On Estimating End-to-End Network Path Properties" by 530 * Allman and Paxson for more details. 531 */ 532 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 533 tcp_save_congestion_state(tp); 534 tp->t_flags &= ~(TF_FASTREXMT | TF_EARLYREXMT); 535 } 536 /* Throw away SACK blocks on a RTO, as specified by RFC2018. */ 537 tcp_sack_cleanup(&tp->scb); 538 tcpstat.tcps_rexmttimeo++; 539 if (tp->t_state == TCPS_SYN_SENT) 540 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 541 else 542 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 543 TCPT_RANGESET(tp->t_rxtcur, rexmt, 544 tp->t_rttmin, TCPTV_REXMTMAX); 545 /* 546 * Disable rfc1323 and rfc1644 if we havn't got any response to 547 * our third SYN to work-around some broken terminal servers 548 * (most of which have hopefully been retired) that have bad VJ 549 * header compression code which trashes TCP segments containing 550 * unknown-to-them TCP options. 551 */ 552 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 553 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); 554 /* 555 * If losing, let the lower level know and try for 556 * a better route. Also, if we backed off this far, 557 * our srtt estimate is probably bogus. Clobber it 558 * so we'll take the next rtt measurement as our srtt; 559 * move the current srtt into rttvar to keep the current 560 * retransmit times until then. 561 */ 562 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 563 #ifdef INET6 564 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 565 in6_losing(tp->t_inpcb); 566 else 567 #endif 568 in_losing(tp->t_inpcb); 569 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 570 tp->t_srtt = 0; 571 } 572 tp->snd_nxt = tp->snd_una; 573 tp->rexmt_high = tp->snd_una; 574 tp->snd_recover = tp->snd_max; 575 /* 576 * Force a segment to be sent. 577 */ 578 tp->t_flags |= TF_ACKNOW; 579 /* 580 * If timing a segment in this window, stop the timer. 581 */ 582 tp->t_rtttime = 0; 583 /* 584 * Close the congestion window down to one segment 585 * (we'll open it by one segment for each ack we get). 586 * Since we probably have a window's worth of unacked 587 * data accumulated, this "slow start" keeps us from 588 * dumping all that data as back-to-back packets (which 589 * might overwhelm an intermediate gateway). 590 * 591 * There are two phases to the opening: Initially we 592 * open by one mss on each ack. This makes the window 593 * size increase exponentially with time. If the 594 * window is larger than the path can handle, this 595 * exponential growth results in dropped packet(s) 596 * almost immediately. To get more time between 597 * drops but still "push" the network to take advantage 598 * of improving conditions, we switch from exponential 599 * to linear window opening at some threshhold size. 600 * For a threshhold, we use half the current window 601 * size, truncated to a multiple of the mss. 602 * 603 * (the minimum cwnd that will give us exponential 604 * growth is 2 mss. We don't allow the threshhold 605 * to go below this.) 606 */ 607 { 608 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 609 610 if (win < 2) 611 win = 2; 612 tp->snd_cwnd = tp->t_maxseg; 613 tp->snd_wacked = 0; 614 tp->snd_ssthresh = win * tp->t_maxseg; 615 tp->t_dupacks = 0; 616 } 617 EXIT_FASTRECOVERY(tp); 618 tcp_output(tp); 619 620 out: 621 #ifdef TCPDEBUG 622 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 623 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 624 #endif 625 return tp; 626 } 627 628 void 629 tcp_timer_rexmt(void *xtp) 630 { 631 struct tcpcb *tp = xtp; 632 633 crit_enter(); 634 if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) { 635 crit_exit(); 636 return; 637 } 638 callout_deactivate(tp->tt_rexmt); 639 tcp_send_timermsg(tp, TCP_TIMER_REXMT); 640 crit_exit(); 641 } 642 643 static void 644 tcp_timer_handler(struct netmsg *nmsg) 645 { 646 struct netmsg_tcp_timer *tmsg = (struct netmsg_tcp_timer *)nmsg; 647 const struct tcp_timer *tt; 648 struct tcpcb *tp; 649 uint32_t tasks; 650 651 crit_enter(); 652 653 KKASSERT(tmsg->tt_cpuid == mycpuid && tmsg->tt_tcb != NULL); 654 tp = tmsg->tt_tcb; 655 656 /* Save pending tasks and reset the tasks in message */ 657 tasks = tmsg->tt_tasks; 658 tmsg->tt_tasks = 0; 659 660 /* Reply ASAP */ 661 lwkt_replymsg(&tmsg->tt_nmsg.nm_lmsg, 0); 662 663 for (tt = tcp_timer_handlers; tt->tt_handler != NULL; ++tt) { 664 if ((tasks & tt->tt_task) == 0) 665 continue; 666 667 tp = tt->tt_handler(tp); 668 if (tp == NULL) 669 break; 670 671 tasks &= ~tt->tt_task; 672 if (tasks == 0) /* nothing left to do */ 673 break; 674 } 675 676 crit_exit(); 677 } 678 679 void 680 tcp_create_timermsg(struct tcpcb *tp) 681 { 682 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 683 684 netmsg_init(&tmsg->tt_nmsg, &netisr_adone_rport, 685 MSGF_DROPABLE | MSGF_PRIORITY, tcp_timer_handler); 686 tmsg->tt_cpuid = mycpuid; 687 tmsg->tt_tcb = tp; 688 tmsg->tt_tasks = 0; 689 } 690 691 void 692 tcp_destroy_timermsg(struct tcpcb *tp) 693 { 694 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 695 696 if (tmsg == NULL || /* listen socket */ 697 tmsg->tt_tcb == NULL) /* only tcp_attach() is called */ 698 return; 699 700 KKASSERT(tmsg->tt_cpuid == mycpuid); 701 crit_enter(); 702 if ((tmsg->tt_nmsg.nm_lmsg.ms_flags & MSGF_DONE) == 0) { 703 /* 704 * This message is still pending to be processed; 705 * drop it. 706 */ 707 lwkt_dropmsg(&tmsg->tt_nmsg.nm_lmsg); 708 } 709 crit_exit(); 710 } 711