1 /* $NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 9 * Facility, NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * This code is derived from software contributed to The NetBSD Foundation 13 * by Rui Paulo. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the NetBSD 26 * Foundation, Inc. and its contributors. 27 * 4. Neither the name of The NetBSD Foundation nor the names of its 28 * contributors may be used to endorse or promote products derived 29 * from this software without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 32 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 33 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 34 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 35 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 38 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 39 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 * POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 /* 45 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 46 * All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. Neither the name of the project nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 */ 72 73 /* 74 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 75 * 76 * NRL grants permission for redistribution and use in source and binary 77 * forms, with or without modification, of the software and documentation 78 * created at NRL provided that the following conditions are met: 79 * 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgements: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * This product includes software developed at the Information 90 * Technology Division, US Naval Research Laboratory. 91 * 4. Neither the name of the NRL nor the names of its contributors 92 * may be used to endorse or promote products derived from this software 93 * without specific prior written permission. 94 * 95 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 96 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 97 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 98 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 99 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 100 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 101 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 102 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 103 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 104 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 105 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 106 * 107 * The views and conclusions contained in the software and documentation 108 * are those of the authors and should not be interpreted as representing 109 * official policies, either expressed or implied, of the US Naval 110 * Research Laboratory (NRL). 111 */ 112 113 /* 114 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 115 * The Regents of the University of California. All rights reserved. 116 * 117 * Redistribution and use in source and binary forms, with or without 118 * modification, are permitted provided that the following conditions 119 * are met: 120 * 1. Redistributions of source code must retain the above copyright 121 * notice, this list of conditions and the following disclaimer. 122 * 2. Redistributions in binary form must reproduce the above copyright 123 * notice, this list of conditions and the following disclaimer in the 124 * documentation and/or other materials provided with the distribution. 125 * 3. Neither the name of the University nor the names of its contributors 126 * may be used to endorse or promote products derived from this software 127 * without specific prior written permission. 128 * 129 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 130 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 131 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 132 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 133 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 134 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 135 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 136 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 137 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 138 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 139 * SUCH DAMAGE. 140 * 141 * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 142 */ 143 144 #include <sys/cdefs.h> 145 __KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $"); 146 147 #include "opt_inet.h" 148 #include "opt_tcp_debug.h" 149 #include "opt_tcp_congctl.h" 150 151 #include <sys/param.h> 152 #include <sys/systm.h> 153 #include <sys/malloc.h> 154 #include <sys/mbuf.h> 155 #include <sys/protosw.h> 156 #include <sys/socket.h> 157 #include <sys/socketvar.h> 158 #include <sys/errno.h> 159 #include <sys/syslog.h> 160 #include <sys/pool.h> 161 #include <sys/domain.h> 162 #include <sys/kernel.h> 163 #include <sys/mutex.h> 164 165 #include <net/if.h> 166 #include <net/route.h> 167 168 #include <netinet/in.h> 169 #include <netinet/in_systm.h> 170 #include <netinet/ip.h> 171 #include <netinet/in_pcb.h> 172 #include <netinet/in_var.h> 173 #include <netinet/ip_var.h> 174 175 #ifdef INET6 176 #ifndef INET 177 #include <netinet/in.h> 178 #endif 179 #include <netinet/ip6.h> 180 #include <netinet6/ip6_var.h> 181 #include <netinet6/in6_pcb.h> 182 #include <netinet6/ip6_var.h> 183 #include <netinet6/in6_var.h> 184 #include <netinet/icmp6.h> 185 #include <netinet6/nd6.h> 186 #endif 187 188 #include <netinet/tcp.h> 189 #include <netinet/tcp_fsm.h> 190 #include <netinet/tcp_seq.h> 191 #include <netinet/tcp_timer.h> 192 #include <netinet/tcp_var.h> 193 #include <netinet/tcpip.h> 194 #include <netinet/tcp_congctl.h> 195 #ifdef TCP_DEBUG 196 #include <netinet/tcp_debug.h> 197 #endif 198 199 /* 200 * TODO: 201 * consider separating the actual implementations in another file. 202 */ 203 204 static int tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *); 205 static void tcp_reno_slow_retransmit(struct tcpcb *); 206 static void tcp_reno_fast_retransmit_newack(struct tcpcb *, 207 const struct tcphdr *); 208 static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *); 209 static void tcp_reno_congestion_exp(struct tcpcb *tp); 210 211 static int tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *); 212 static void tcp_newreno_fast_retransmit_newack(struct tcpcb *, 213 const struct tcphdr *); 214 static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *); 215 216 217 static void tcp_congctl_fillnames(void); 218 219 extern int tcprexmtthresh; 220 221 MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures"); 222 223 /* currently selected global congestion control */ 224 char tcp_congctl_global_name[TCPCC_MAXLEN]; 225 226 /* available global congestion control algorithms */ 227 char tcp_congctl_avail[10 * TCPCC_MAXLEN]; 228 229 /* 230 * Used to list the available congestion control algorithms. 231 */ 232 TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd = 233 TAILQ_HEAD_INITIALIZER(tcp_congctlhd); 234 235 static struct tcp_congctlent * tcp_congctl_global; 236 237 static kmutex_t tcp_congctl_mtx; 238 239 void 240 tcp_congctl_init(void) 241 { 242 int r; 243 244 mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE); 245 246 /* Base algorithms. */ 247 r = tcp_congctl_register("reno", &tcp_reno_ctl); 248 KASSERT(r == 0); 249 r = tcp_congctl_register("newreno", &tcp_newreno_ctl); 250 KASSERT(r == 0); 251 252 /* NewReno is the default. */ 253 #ifndef TCP_CONGCTL_DEFAULT 254 #define TCP_CONGCTL_DEFAULT "newreno" 255 #endif 256 257 r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT); 258 KASSERT(r == 0); 259 } 260 261 /* 262 * Register a congestion algorithm and select it if we have none. 263 */ 264 int 265 tcp_congctl_register(const char *name, const struct tcp_congctl *tcc) 266 { 267 struct tcp_congctlent *ntcc, *tccp; 268 269 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) 270 if (!strcmp(name, tccp->congctl_name)) { 271 /* name already registered */ 272 return EEXIST; 273 } 274 275 ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO); 276 277 strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1); 278 ntcc->congctl_ctl = tcc; 279 280 TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent); 281 tcp_congctl_fillnames(); 282 283 if (TAILQ_FIRST(&tcp_congctlhd) == ntcc) 284 tcp_congctl_select(NULL, name); 285 286 return 0; 287 } 288 289 int 290 tcp_congctl_unregister(const char *name) 291 { 292 struct tcp_congctlent *tccp, *rtccp; 293 unsigned int size; 294 295 rtccp = NULL; 296 size = 0; 297 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { 298 if (!strcmp(name, tccp->congctl_name)) 299 rtccp = tccp; 300 size++; 301 } 302 303 if (!rtccp) 304 return ENOENT; 305 306 if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt) 307 return EBUSY; 308 309 TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent); 310 free(rtccp, M_TCPCONGCTL); 311 tcp_congctl_fillnames(); 312 313 return 0; 314 } 315 316 /* 317 * Select a congestion algorithm by name. 318 */ 319 int 320 tcp_congctl_select(struct tcpcb *tp, const char *name) 321 { 322 struct tcp_congctlent *tccp, *old_tccp, *new_tccp; 323 bool old_found, new_found; 324 325 KASSERT(name); 326 327 old_found = (tp == NULL || tp->t_congctl == NULL); 328 old_tccp = NULL; 329 new_found = false; 330 new_tccp = NULL; 331 332 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { 333 if (!old_found && tccp->congctl_ctl == tp->t_congctl) { 334 old_tccp = tccp; 335 old_found = true; 336 } 337 338 if (!new_found && !strcmp(name, tccp->congctl_name)) { 339 new_tccp = tccp; 340 new_found = true; 341 } 342 343 if (new_found && old_found) { 344 if (tp) { 345 mutex_enter(&tcp_congctl_mtx); 346 if (old_tccp) 347 old_tccp->congctl_refcnt--; 348 tp->t_congctl = new_tccp->congctl_ctl; 349 new_tccp->congctl_refcnt++; 350 mutex_exit(&tcp_congctl_mtx); 351 } else { 352 tcp_congctl_global = new_tccp; 353 strlcpy(tcp_congctl_global_name, 354 new_tccp->congctl_name, 355 sizeof(tcp_congctl_global_name) - 1); 356 } 357 return 0; 358 } 359 } 360 361 return EINVAL; 362 } 363 364 void 365 tcp_congctl_release(struct tcpcb *tp) 366 { 367 struct tcp_congctlent *tccp; 368 369 KASSERT(tp->t_congctl); 370 371 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { 372 if (tccp->congctl_ctl == tp->t_congctl) { 373 tccp->congctl_refcnt--; 374 return; 375 } 376 } 377 } 378 379 /* 380 * Returns the name of a congestion algorithm. 381 */ 382 const char * 383 tcp_congctl_bystruct(const struct tcp_congctl *tcc) 384 { 385 struct tcp_congctlent *tccp; 386 387 KASSERT(tcc); 388 389 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) 390 if (tccp->congctl_ctl == tcc) 391 return tccp->congctl_name; 392 393 return NULL; 394 } 395 396 static void 397 tcp_congctl_fillnames(void) 398 { 399 struct tcp_congctlent *tccp; 400 const char *delim = " "; 401 402 tcp_congctl_avail[0] = '\0'; 403 TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) { 404 strlcat(tcp_congctl_avail, tccp->congctl_name, 405 sizeof(tcp_congctl_avail) - 1); 406 if (TAILQ_NEXT(tccp, congctl_ent)) 407 strlcat(tcp_congctl_avail, delim, 408 sizeof(tcp_congctl_avail) - 1); 409 } 410 411 } 412 413 /* ------------------------------------------------------------------------ */ 414 415 /* 416 * TCP/Reno congestion control. 417 */ 418 static void 419 tcp_reno_congestion_exp(struct tcpcb *tp) 420 { 421 u_int win; 422 423 /* 424 * Halve the congestion window and reduce the 425 * slow start threshold. 426 */ 427 win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 428 if (win < 2) 429 win = 2; 430 431 tp->snd_ssthresh = win * tp->t_segsz; 432 tp->snd_recover = tp->snd_max; 433 tp->snd_cwnd = tp->snd_ssthresh; 434 435 /* 436 * When using TCP ECN, notify the peer that 437 * we reduced the cwnd. 438 */ 439 if (TCP_ECN_ALLOWED(tp)) 440 tp->t_flags |= TF_ECN_SND_CWR; 441 } 442 443 444 445 static int 446 tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) 447 { 448 /* 449 * We know we're losing at the current 450 * window size so do congestion avoidance 451 * (set ssthresh to half the current window 452 * and pull our congestion window back to 453 * the new ssthresh). 454 * 455 * Dup acks mean that packets have left the 456 * network (they're now cached at the receiver) 457 * so bump cwnd by the amount in the receiver 458 * to keep a constant cwnd packets in the 459 * network. 460 * 461 * If we are using TCP/SACK, then enter 462 * Fast Recovery if the receiver SACKs 463 * data that is tcprexmtthresh * MSS 464 * bytes past the last ACKed segment, 465 * irrespective of the number of DupAcks. 466 */ 467 468 tcp_seq onxt; 469 470 onxt = tp->snd_nxt; 471 tcp_reno_congestion_exp(tp); 472 tp->t_partialacks = 0; 473 TCP_TIMER_DISARM(tp, TCPT_REXMT); 474 tp->t_rtttime = 0; 475 if (TCP_SACK_ENABLED(tp)) { 476 tp->t_dupacks = tcprexmtthresh; 477 tp->sack_newdata = tp->snd_nxt; 478 tp->snd_cwnd = tp->t_segsz; 479 (void) tcp_output(tp); 480 return 0; 481 } 482 tp->snd_nxt = th->th_ack; 483 tp->snd_cwnd = tp->t_segsz; 484 (void) tcp_output(tp); 485 tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks; 486 if (SEQ_GT(onxt, tp->snd_nxt)) 487 tp->snd_nxt = onxt; 488 489 return 0; 490 } 491 492 static void 493 tcp_reno_slow_retransmit(struct tcpcb *tp) 494 { 495 u_int win; 496 497 /* 498 * Close the congestion window down to one segment 499 * (we'll open it by one segment for each ack we get). 500 * Since we probably have a window's worth of unacked 501 * data accumulated, this "slow start" keeps us from 502 * dumping all that data as back-to-back packets (which 503 * might overwhelm an intermediate gateway). 504 * 505 * There are two phases to the opening: Initially we 506 * open by one mss on each ack. This makes the window 507 * size increase exponentially with time. If the 508 * window is larger than the path can handle, this 509 * exponential growth results in dropped packet(s) 510 * almost immediately. To get more time between 511 * drops but still "push" the network to take advantage 512 * of improving conditions, we switch from exponential 513 * to linear window opening at some threshhold size. 514 * For a threshhold, we use half the current window 515 * size, truncated to a multiple of the mss. 516 * 517 * (the minimum cwnd that will give us exponential 518 * growth is 2 mss. We don't allow the threshhold 519 * to go below this.) 520 */ 521 522 win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 523 if (win < 2) 524 win = 2; 525 /* Loss Window MUST be one segment. */ 526 tp->snd_cwnd = tp->t_segsz; 527 tp->snd_ssthresh = win * tp->t_segsz; 528 tp->t_partialacks = -1; 529 tp->t_dupacks = 0; 530 tp->t_bytes_acked = 0; 531 } 532 533 static void 534 tcp_reno_fast_retransmit_newack(struct tcpcb *tp, 535 const struct tcphdr *th) 536 { 537 if (tp->t_partialacks < 0) { 538 /* 539 * We were not in fast recovery. Reset the duplicate ack 540 * counter. 541 */ 542 tp->t_dupacks = 0; 543 } else { 544 /* 545 * Clamp the congestion window to the crossover point and 546 * exit fast recovery. 547 */ 548 if (tp->snd_cwnd > tp->snd_ssthresh) 549 tp->snd_cwnd = tp->snd_ssthresh; 550 tp->t_partialacks = -1; 551 tp->t_dupacks = 0; 552 tp->t_bytes_acked = 0; 553 } 554 } 555 556 static void 557 tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th) 558 { 559 /* 560 * When new data is acked, open the congestion window. 561 */ 562 563 u_int cw = tp->snd_cwnd; 564 u_int incr = tp->t_segsz; 565 566 if (tcp_do_abc) { 567 568 /* 569 * RFC 3465 Appropriate Byte Counting (ABC) 570 */ 571 572 int acked = th->th_ack - tp->snd_una; 573 574 if (cw >= tp->snd_ssthresh) { 575 tp->t_bytes_acked += acked; 576 if (tp->t_bytes_acked >= cw) { 577 /* Time to increase the window. */ 578 tp->t_bytes_acked -= cw; 579 } else { 580 /* No need to increase yet. */ 581 incr = 0; 582 } 583 } else { 584 /* 585 * use 2*SMSS or 1*SMSS for the "L" param, 586 * depending on sysctl setting. 587 * 588 * (See RFC 3465 2.3 Choosing the Limit) 589 */ 590 u_int abc_lim; 591 592 abc_lim = (tcp_abc_aggressive == 0 || 593 tp->snd_nxt != tp->snd_max) ? incr : incr * 2; 594 incr = min(acked, abc_lim); 595 } 596 } else { 597 598 /* 599 * If the window gives us less than ssthresh packets 600 * in flight, open exponentially (segsz per packet). 601 * Otherwise open linearly: segsz per window 602 * (segsz^2 / cwnd per packet). 603 */ 604 605 if (cw >= tp->snd_ssthresh) { 606 incr = incr * incr / cw; 607 } 608 } 609 610 tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale); 611 } 612 613 const struct tcp_congctl tcp_reno_ctl = { 614 .fast_retransmit = tcp_reno_fast_retransmit, 615 .slow_retransmit = tcp_reno_slow_retransmit, 616 .fast_retransmit_newack = tcp_reno_fast_retransmit_newack, 617 .newack = tcp_reno_newack, 618 .cong_exp = tcp_reno_congestion_exp, 619 }; 620 621 /* 622 * TCP/NewReno Congestion control. 623 */ 624 static int 625 tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th) 626 { 627 if (SEQ_LT(th->th_ack, tp->snd_high)) { 628 /* 629 * False fast retransmit after timeout. 630 * Do not enter fast recovery 631 */ 632 tp->t_dupacks = 0; 633 return 1; 634 } else { 635 /* 636 * Fast retransmit is same as reno. 637 */ 638 return tcp_reno_fast_retransmit(tp, th); 639 } 640 641 return 0; 642 } 643 644 /* 645 * Implement the NewReno response to a new ack, checking for partial acks in 646 * fast recovery. 647 */ 648 static void 649 tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th) 650 { 651 if (tp->t_partialacks < 0) { 652 /* 653 * We were not in fast recovery. Reset the duplicate ack 654 * counter. 655 */ 656 tp->t_dupacks = 0; 657 } else if (SEQ_LT(th->th_ack, tp->snd_recover)) { 658 /* 659 * This is a partial ack. Retransmit the first unacknowledged 660 * segment and deflate the congestion window by the amount of 661 * acknowledged data. Do not exit fast recovery. 662 */ 663 tcp_seq onxt = tp->snd_nxt; 664 u_long ocwnd = tp->snd_cwnd; 665 666 /* 667 * snd_una has not yet been updated and the socket's send 668 * buffer has not yet drained off the ACK'd data, so we 669 * have to leave snd_una as it was to get the correct data 670 * offset in tcp_output(). 671 */ 672 if (++tp->t_partialacks == 1) 673 TCP_TIMER_DISARM(tp, TCPT_REXMT); 674 tp->t_rtttime = 0; 675 tp->snd_nxt = th->th_ack; 676 /* 677 * Set snd_cwnd to one segment beyond ACK'd offset. snd_una 678 * is not yet updated when we're called. 679 */ 680 tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una); 681 (void) tcp_output(tp); 682 tp->snd_cwnd = ocwnd; 683 if (SEQ_GT(onxt, tp->snd_nxt)) 684 tp->snd_nxt = onxt; 685 /* 686 * Partial window deflation. Relies on fact that tp->snd_una 687 * not updated yet. 688 */ 689 tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_segsz); 690 } else { 691 /* 692 * Complete ack. Inflate the congestion window to ssthresh 693 * and exit fast recovery. 694 * 695 * Window inflation should have left us with approx. 696 * snd_ssthresh outstanding data. But in case we 697 * would be inclined to send a burst, better to do 698 * it via the slow start mechanism. 699 */ 700 if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh) 701 tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack) 702 + tp->t_segsz; 703 else 704 tp->snd_cwnd = tp->snd_ssthresh; 705 tp->t_partialacks = -1; 706 tp->t_dupacks = 0; 707 tp->t_bytes_acked = 0; 708 } 709 } 710 711 static void 712 tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th) 713 { 714 /* 715 * If we are still in fast recovery (meaning we are using 716 * NewReno and we have only received partial acks), do not 717 * inflate the window yet. 718 */ 719 if (tp->t_partialacks < 0) 720 tcp_reno_newack(tp, th); 721 } 722 723 724 const struct tcp_congctl tcp_newreno_ctl = { 725 .fast_retransmit = tcp_newreno_fast_retransmit, 726 .slow_retransmit = tcp_reno_slow_retransmit, 727 .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack, 728 .newack = tcp_newreno_newack, 729 .cong_exp = tcp_reno_congestion_exp, 730 }; 731 732 733