xref: /netbsd-src/sys/netinet/tcp_congctl.c (revision 8ac07aec990b9d2e483062509d0a9fa5b4f57cf2)
1 /*	$NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
9  * Facility, NASA Ames Research Center.
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by Charles M. Hannum.
12  * This code is derived from software contributed to The NetBSD Foundation
13  * by Rui Paulo.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. All advertising materials mentioning features or use of this software
24  *    must display the following acknowledgement:
25  *	This product includes software developed by the NetBSD
26  *	Foundation, Inc. and its contributors.
27  * 4. Neither the name of The NetBSD Foundation nor the names of its
28  *    contributors may be used to endorse or promote products derived
29  *    from this software without specific prior written permission.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
32  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
33  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
34  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
35  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41  * POSSIBILITY OF SUCH DAMAGE.
42  */
43 
44 /*
45  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
46  * All rights reserved.
47  *
48  * Redistribution and use in source and binary forms, with or without
49  * modification, are permitted provided that the following conditions
50  * are met:
51  * 1. Redistributions of source code must retain the above copyright
52  *    notice, this list of conditions and the following disclaimer.
53  * 2. Redistributions in binary form must reproduce the above copyright
54  *    notice, this list of conditions and the following disclaimer in the
55  *    documentation and/or other materials provided with the distribution.
56  * 3. Neither the name of the project nor the names of its contributors
57  *    may be used to endorse or promote products derived from this software
58  *    without specific prior written permission.
59  *
60  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
61  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
63  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
64  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
65  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
66  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70  * SUCH DAMAGE.
71  */
72 
73 /*
74  *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
75  *
76  * NRL grants permission for redistribution and use in source and binary
77  * forms, with or without modification, of the software and documentation
78  * created at NRL provided that the following conditions are met:
79  *
80  * 1. Redistributions of source code must retain the above copyright
81  *    notice, this list of conditions and the following disclaimer.
82  * 2. Redistributions in binary form must reproduce the above copyright
83  *    notice, this list of conditions and the following disclaimer in the
84  *    documentation and/or other materials provided with the distribution.
85  * 3. All advertising materials mentioning features or use of this software
86  *    must display the following acknowledgements:
87  *      This product includes software developed by the University of
88  *      California, Berkeley and its contributors.
89  *      This product includes software developed at the Information
90  *      Technology Division, US Naval Research Laboratory.
91  * 4. Neither the name of the NRL nor the names of its contributors
92  *    may be used to endorse or promote products derived from this software
93  *    without specific prior written permission.
94  *
95  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
96  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
97  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
98  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
99  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
100  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
101  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
102  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
103  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
104  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
105  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
106  *
107  * The views and conclusions contained in the software and documentation
108  * are those of the authors and should not be interpreted as representing
109  * official policies, either expressed or implied, of the US Naval
110  * Research Laboratory (NRL).
111  */
112 
113 /*
114  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
115  *	The Regents of the University of California.  All rights reserved.
116  *
117  * Redistribution and use in source and binary forms, with or without
118  * modification, are permitted provided that the following conditions
119  * are met:
120  * 1. Redistributions of source code must retain the above copyright
121  *    notice, this list of conditions and the following disclaimer.
122  * 2. Redistributions in binary form must reproduce the above copyright
123  *    notice, this list of conditions and the following disclaimer in the
124  *    documentation and/or other materials provided with the distribution.
125  * 3. Neither the name of the University nor the names of its contributors
126  *    may be used to endorse or promote products derived from this software
127  *    without specific prior written permission.
128  *
129  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
130  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
131  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
132  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
133  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
134  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
135  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
136  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
137  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
138  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
139  * SUCH DAMAGE.
140  *
141  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
142  */
143 
144 #include <sys/cdefs.h>
145 __KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $");
146 
147 #include "opt_inet.h"
148 #include "opt_tcp_debug.h"
149 #include "opt_tcp_congctl.h"
150 
151 #include <sys/param.h>
152 #include <sys/systm.h>
153 #include <sys/malloc.h>
154 #include <sys/mbuf.h>
155 #include <sys/protosw.h>
156 #include <sys/socket.h>
157 #include <sys/socketvar.h>
158 #include <sys/errno.h>
159 #include <sys/syslog.h>
160 #include <sys/pool.h>
161 #include <sys/domain.h>
162 #include <sys/kernel.h>
163 #include <sys/mutex.h>
164 
165 #include <net/if.h>
166 #include <net/route.h>
167 
168 #include <netinet/in.h>
169 #include <netinet/in_systm.h>
170 #include <netinet/ip.h>
171 #include <netinet/in_pcb.h>
172 #include <netinet/in_var.h>
173 #include <netinet/ip_var.h>
174 
175 #ifdef INET6
176 #ifndef INET
177 #include <netinet/in.h>
178 #endif
179 #include <netinet/ip6.h>
180 #include <netinet6/ip6_var.h>
181 #include <netinet6/in6_pcb.h>
182 #include <netinet6/ip6_var.h>
183 #include <netinet6/in6_var.h>
184 #include <netinet/icmp6.h>
185 #include <netinet6/nd6.h>
186 #endif
187 
188 #include <netinet/tcp.h>
189 #include <netinet/tcp_fsm.h>
190 #include <netinet/tcp_seq.h>
191 #include <netinet/tcp_timer.h>
192 #include <netinet/tcp_var.h>
193 #include <netinet/tcpip.h>
194 #include <netinet/tcp_congctl.h>
195 #ifdef TCP_DEBUG
196 #include <netinet/tcp_debug.h>
197 #endif
198 
199 /*
200  * TODO:
201  *   consider separating the actual implementations in another file.
202  */
203 
204 static int  tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
205 static void tcp_reno_slow_retransmit(struct tcpcb *);
206 static void tcp_reno_fast_retransmit_newack(struct tcpcb *,
207     const struct tcphdr *);
208 static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *);
209 static void tcp_reno_congestion_exp(struct tcpcb *tp);
210 
211 static int  tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
212 static void tcp_newreno_fast_retransmit_newack(struct tcpcb *,
213 	const struct tcphdr *);
214 static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *);
215 
216 
217 static void tcp_congctl_fillnames(void);
218 
219 extern int tcprexmtthresh;
220 
221 MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures");
222 
223 /* currently selected global congestion control */
224 char tcp_congctl_global_name[TCPCC_MAXLEN];
225 
226 /* available global congestion control algorithms */
227 char tcp_congctl_avail[10 * TCPCC_MAXLEN];
228 
229 /*
230  * Used to list the available congestion control algorithms.
231  */
232 TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd =
233     TAILQ_HEAD_INITIALIZER(tcp_congctlhd);
234 
235 static struct tcp_congctlent * tcp_congctl_global;
236 
237 static kmutex_t tcp_congctl_mtx;
238 
239 void
240 tcp_congctl_init(void)
241 {
242 	int r;
243 
244 	mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE);
245 
246 	/* Base algorithms. */
247 	r = tcp_congctl_register("reno", &tcp_reno_ctl);
248 	KASSERT(r == 0);
249 	r = tcp_congctl_register("newreno", &tcp_newreno_ctl);
250 	KASSERT(r == 0);
251 
252 	/* NewReno is the default. */
253 #ifndef TCP_CONGCTL_DEFAULT
254 #define TCP_CONGCTL_DEFAULT "newreno"
255 #endif
256 
257 	r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT);
258 	KASSERT(r == 0);
259 }
260 
261 /*
262  * Register a congestion algorithm and select it if we have none.
263  */
264 int
265 tcp_congctl_register(const char *name, const struct tcp_congctl *tcc)
266 {
267 	struct tcp_congctlent *ntcc, *tccp;
268 
269 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent)
270 		if (!strcmp(name, tccp->congctl_name)) {
271 			/* name already registered */
272 			return EEXIST;
273 		}
274 
275 	ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO);
276 
277 	strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1);
278 	ntcc->congctl_ctl = tcc;
279 
280 	TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent);
281 	tcp_congctl_fillnames();
282 
283 	if (TAILQ_FIRST(&tcp_congctlhd) == ntcc)
284 		tcp_congctl_select(NULL, name);
285 
286 	return 0;
287 }
288 
289 int
290 tcp_congctl_unregister(const char *name)
291 {
292 	struct tcp_congctlent *tccp, *rtccp;
293 	unsigned int size;
294 
295 	rtccp = NULL;
296 	size = 0;
297 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
298 		if (!strcmp(name, tccp->congctl_name))
299 			rtccp = tccp;
300 		size++;
301 	}
302 
303 	if (!rtccp)
304 		return ENOENT;
305 
306 	if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt)
307 		return EBUSY;
308 
309 	TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent);
310 	free(rtccp, M_TCPCONGCTL);
311 	tcp_congctl_fillnames();
312 
313 	return 0;
314 }
315 
316 /*
317  * Select a congestion algorithm by name.
318  */
319 int
320 tcp_congctl_select(struct tcpcb *tp, const char *name)
321 {
322 	struct tcp_congctlent *tccp, *old_tccp, *new_tccp;
323 	bool old_found, new_found;
324 
325 	KASSERT(name);
326 
327 	old_found = (tp == NULL || tp->t_congctl == NULL);
328 	old_tccp = NULL;
329 	new_found = false;
330 	new_tccp = NULL;
331 
332 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
333 		if (!old_found && tccp->congctl_ctl == tp->t_congctl) {
334 			old_tccp = tccp;
335 			old_found = true;
336 		}
337 
338 		if (!new_found && !strcmp(name, tccp->congctl_name)) {
339 			new_tccp = tccp;
340 			new_found = true;
341 		}
342 
343 		if (new_found && old_found) {
344 			if (tp) {
345 				mutex_enter(&tcp_congctl_mtx);
346 				if (old_tccp)
347 					old_tccp->congctl_refcnt--;
348 				tp->t_congctl = new_tccp->congctl_ctl;
349 				new_tccp->congctl_refcnt++;
350 				mutex_exit(&tcp_congctl_mtx);
351 			} else {
352 				tcp_congctl_global = new_tccp;
353 				strlcpy(tcp_congctl_global_name,
354 				    new_tccp->congctl_name,
355 				    sizeof(tcp_congctl_global_name) - 1);
356 			}
357 			return 0;
358 		}
359 	}
360 
361 	return EINVAL;
362 }
363 
364 void
365 tcp_congctl_release(struct tcpcb *tp)
366 {
367 	struct tcp_congctlent *tccp;
368 
369 	KASSERT(tp->t_congctl);
370 
371 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
372 		if (tccp->congctl_ctl == tp->t_congctl) {
373 			tccp->congctl_refcnt--;
374 			return;
375 		}
376 	}
377 }
378 
379 /*
380  * Returns the name of a congestion algorithm.
381  */
382 const char *
383 tcp_congctl_bystruct(const struct tcp_congctl *tcc)
384 {
385 	struct tcp_congctlent *tccp;
386 
387 	KASSERT(tcc);
388 
389 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent)
390 		if (tccp->congctl_ctl == tcc)
391 			return tccp->congctl_name;
392 
393 	return NULL;
394 }
395 
396 static void
397 tcp_congctl_fillnames(void)
398 {
399 	struct tcp_congctlent *tccp;
400 	const char *delim = " ";
401 
402 	tcp_congctl_avail[0] = '\0';
403 	TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
404 		strlcat(tcp_congctl_avail, tccp->congctl_name,
405 		    sizeof(tcp_congctl_avail) - 1);
406 		if (TAILQ_NEXT(tccp, congctl_ent))
407 			strlcat(tcp_congctl_avail, delim,
408 			    sizeof(tcp_congctl_avail) - 1);
409 	}
410 
411 }
412 
413 /* ------------------------------------------------------------------------ */
414 
415 /*
416  * TCP/Reno congestion control.
417  */
418 static void
419 tcp_reno_congestion_exp(struct tcpcb *tp)
420 {
421 	u_int win;
422 
423 	/*
424 	 * Halve the congestion window and reduce the
425 	 * slow start threshold.
426 	 */
427 	win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
428 	if (win < 2)
429 		win = 2;
430 
431 	tp->snd_ssthresh = win * tp->t_segsz;
432 	tp->snd_recover = tp->snd_max;
433 	tp->snd_cwnd = tp->snd_ssthresh;
434 
435 	/*
436 	 * When using TCP ECN, notify the peer that
437 	 * we reduced the cwnd.
438 	 */
439 	if (TCP_ECN_ALLOWED(tp))
440 		tp->t_flags |= TF_ECN_SND_CWR;
441 }
442 
443 
444 
445 static int
446 tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
447 {
448 	/*
449 	 * We know we're losing at the current
450 	 * window size so do congestion avoidance
451 	 * (set ssthresh to half the current window
452 	 * and pull our congestion window back to
453 	 * the new ssthresh).
454 	 *
455 	 * Dup acks mean that packets have left the
456 	 * network (they're now cached at the receiver)
457 	 * so bump cwnd by the amount in the receiver
458 	 * to keep a constant cwnd packets in the
459 	 * network.
460 	 *
461 	 * If we are using TCP/SACK, then enter
462 	 * Fast Recovery if the receiver SACKs
463 	 * data that is tcprexmtthresh * MSS
464 	 * bytes past the last ACKed segment,
465 	 * irrespective of the number of DupAcks.
466 	 */
467 
468 	tcp_seq onxt;
469 
470 	onxt = tp->snd_nxt;
471 	tcp_reno_congestion_exp(tp);
472 	tp->t_partialacks = 0;
473 	TCP_TIMER_DISARM(tp, TCPT_REXMT);
474 	tp->t_rtttime = 0;
475 	if (TCP_SACK_ENABLED(tp)) {
476 		tp->t_dupacks = tcprexmtthresh;
477 		tp->sack_newdata = tp->snd_nxt;
478 		tp->snd_cwnd = tp->t_segsz;
479 		(void) tcp_output(tp);
480 		return 0;
481 	}
482 	tp->snd_nxt = th->th_ack;
483 	tp->snd_cwnd = tp->t_segsz;
484 	(void) tcp_output(tp);
485 	tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks;
486 	if (SEQ_GT(onxt, tp->snd_nxt))
487 		tp->snd_nxt = onxt;
488 
489 	return 0;
490 }
491 
492 static void
493 tcp_reno_slow_retransmit(struct tcpcb *tp)
494 {
495 	u_int win;
496 
497 	/*
498 	 * Close the congestion window down to one segment
499 	 * (we'll open it by one segment for each ack we get).
500 	 * Since we probably have a window's worth of unacked
501 	 * data accumulated, this "slow start" keeps us from
502 	 * dumping all that data as back-to-back packets (which
503 	 * might overwhelm an intermediate gateway).
504 	 *
505 	 * There are two phases to the opening: Initially we
506 	 * open by one mss on each ack.  This makes the window
507 	 * size increase exponentially with time.  If the
508 	 * window is larger than the path can handle, this
509 	 * exponential growth results in dropped packet(s)
510 	 * almost immediately.  To get more time between
511 	 * drops but still "push" the network to take advantage
512 	 * of improving conditions, we switch from exponential
513 	 * to linear window opening at some threshhold size.
514 	 * For a threshhold, we use half the current window
515 	 * size, truncated to a multiple of the mss.
516 	 *
517 	 * (the minimum cwnd that will give us exponential
518 	 * growth is 2 mss.  We don't allow the threshhold
519 	 * to go below this.)
520 	 */
521 
522 	win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
523 	if (win < 2)
524 		win = 2;
525 	/* Loss Window MUST be one segment. */
526 	tp->snd_cwnd = tp->t_segsz;
527 	tp->snd_ssthresh = win * tp->t_segsz;
528 	tp->t_partialacks = -1;
529 	tp->t_dupacks = 0;
530 	tp->t_bytes_acked = 0;
531 }
532 
533 static void
534 tcp_reno_fast_retransmit_newack(struct tcpcb *tp,
535     const struct tcphdr *th)
536 {
537 	if (tp->t_partialacks < 0) {
538 		/*
539 		 * We were not in fast recovery.  Reset the duplicate ack
540 		 * counter.
541 		 */
542 		tp->t_dupacks = 0;
543 	} else {
544 		/*
545 		 * Clamp the congestion window to the crossover point and
546 		 * exit fast recovery.
547 		 */
548 		if (tp->snd_cwnd > tp->snd_ssthresh)
549 			tp->snd_cwnd = tp->snd_ssthresh;
550 		tp->t_partialacks = -1;
551 		tp->t_dupacks = 0;
552 		tp->t_bytes_acked = 0;
553 	}
554 }
555 
556 static void
557 tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th)
558 {
559 	/*
560 	 * When new data is acked, open the congestion window.
561 	 */
562 
563 	u_int cw = tp->snd_cwnd;
564 	u_int incr = tp->t_segsz;
565 
566 	if (tcp_do_abc) {
567 
568 		/*
569 		 * RFC 3465 Appropriate Byte Counting (ABC)
570 		 */
571 
572 		int acked = th->th_ack - tp->snd_una;
573 
574 		if (cw >= tp->snd_ssthresh) {
575 			tp->t_bytes_acked += acked;
576 			if (tp->t_bytes_acked >= cw) {
577 				/* Time to increase the window. */
578 				tp->t_bytes_acked -= cw;
579 			} else {
580 				/* No need to increase yet. */
581 				incr = 0;
582 			}
583 		} else {
584 			/*
585 			 * use 2*SMSS or 1*SMSS for the "L" param,
586 			 * depending on sysctl setting.
587 			 *
588 			 * (See RFC 3465 2.3 Choosing the Limit)
589 			 */
590 			u_int abc_lim;
591 
592 			abc_lim = (tcp_abc_aggressive == 0 ||
593 			    tp->snd_nxt != tp->snd_max) ? incr : incr * 2;
594 			incr = min(acked, abc_lim);
595 		}
596 	} else {
597 
598 		/*
599 		 * If the window gives us less than ssthresh packets
600 		 * in flight, open exponentially (segsz per packet).
601 		 * Otherwise open linearly: segsz per window
602 		 * (segsz^2 / cwnd per packet).
603 		 */
604 
605 		if (cw >= tp->snd_ssthresh) {
606 			incr = incr * incr / cw;
607 		}
608 	}
609 
610 	tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
611 }
612 
613 const struct tcp_congctl tcp_reno_ctl = {
614 	.fast_retransmit = tcp_reno_fast_retransmit,
615 	.slow_retransmit = tcp_reno_slow_retransmit,
616 	.fast_retransmit_newack = tcp_reno_fast_retransmit_newack,
617 	.newack = tcp_reno_newack,
618 	.cong_exp = tcp_reno_congestion_exp,
619 };
620 
621 /*
622  * TCP/NewReno Congestion control.
623  */
624 static int
625 tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
626 {
627 	if (SEQ_LT(th->th_ack, tp->snd_high)) {
628 		/*
629 		 * False fast retransmit after timeout.
630 		 * Do not enter fast recovery
631 		 */
632 		tp->t_dupacks = 0;
633 		return 1;
634 	} else {
635 		/*
636 		 * Fast retransmit is same as reno.
637 		 */
638 		return tcp_reno_fast_retransmit(tp, th);
639 	}
640 
641 	return 0;
642 }
643 
644 /*
645  * Implement the NewReno response to a new ack, checking for partial acks in
646  * fast recovery.
647  */
648 static void
649 tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th)
650 {
651 	if (tp->t_partialacks < 0) {
652 		/*
653 		 * We were not in fast recovery.  Reset the duplicate ack
654 		 * counter.
655 		 */
656 		tp->t_dupacks = 0;
657 	} else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
658 		/*
659 		 * This is a partial ack.  Retransmit the first unacknowledged
660 		 * segment and deflate the congestion window by the amount of
661 		 * acknowledged data.  Do not exit fast recovery.
662 		 */
663 		tcp_seq onxt = tp->snd_nxt;
664 		u_long ocwnd = tp->snd_cwnd;
665 
666 		/*
667 		 * snd_una has not yet been updated and the socket's send
668 		 * buffer has not yet drained off the ACK'd data, so we
669 		 * have to leave snd_una as it was to get the correct data
670 		 * offset in tcp_output().
671 		 */
672 		if (++tp->t_partialacks == 1)
673 			TCP_TIMER_DISARM(tp, TCPT_REXMT);
674 		tp->t_rtttime = 0;
675 		tp->snd_nxt = th->th_ack;
676 		/*
677 		 * Set snd_cwnd to one segment beyond ACK'd offset.  snd_una
678 		 * is not yet updated when we're called.
679 		 */
680 		tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una);
681 		(void) tcp_output(tp);
682 		tp->snd_cwnd = ocwnd;
683 		if (SEQ_GT(onxt, tp->snd_nxt))
684 			tp->snd_nxt = onxt;
685 		/*
686 		 * Partial window deflation.  Relies on fact that tp->snd_una
687 		 * not updated yet.
688 		 */
689 		tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_segsz);
690 	} else {
691 		/*
692 		 * Complete ack.  Inflate the congestion window to ssthresh
693 		 * and exit fast recovery.
694 		 *
695 		 * Window inflation should have left us with approx.
696 		 * snd_ssthresh outstanding data.  But in case we
697 		 * would be inclined to send a burst, better to do
698 		 * it via the slow start mechanism.
699 		 */
700 		if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
701 			tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
702 			    + tp->t_segsz;
703 		else
704 			tp->snd_cwnd = tp->snd_ssthresh;
705 		tp->t_partialacks = -1;
706 		tp->t_dupacks = 0;
707 		tp->t_bytes_acked = 0;
708 	}
709 }
710 
711 static void
712 tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th)
713 {
714 	/*
715 	 * If we are still in fast recovery (meaning we are using
716 	 * NewReno and we have only received partial acks), do not
717 	 * inflate the window yet.
718 	 */
719 	if (tp->t_partialacks < 0)
720 		tcp_reno_newack(tp, th);
721 }
722 
723 
724 const struct tcp_congctl tcp_newreno_ctl = {
725 	.fast_retransmit = tcp_newreno_fast_retransmit,
726 	.slow_retransmit = tcp_reno_slow_retransmit,
727 	.fast_retransmit_newack = tcp_newreno_fast_retransmit_newack,
728 	.newack = tcp_newreno_newack,
729 	.cong_exp = tcp_reno_congestion_exp,
730 };
731 
732 
733