xref: /freebsd-src/sys/netinet/cc/cc_chd.c (revision 22dcc81293854c4d39df639a329fecded175b2b0)
10927e1a1SLawrence Stewart /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
40927e1a1SLawrence Stewart  * Copyright (c) 2009-2010
50927e1a1SLawrence Stewart  *	Swinburne University of Technology, Melbourne, Australia
60927e1a1SLawrence Stewart  * Copyright (c) 2010-2011 The FreeBSD Foundation
70927e1a1SLawrence Stewart  * All rights reserved.
80927e1a1SLawrence Stewart  *
90927e1a1SLawrence Stewart  * This software was developed at the Centre for Advanced Internet
10891b8ed4SLawrence Stewart  * Architectures, Swinburne University of Technology, by David Hayes and
11891b8ed4SLawrence Stewart  * Lawrence Stewart, made possible in part by a grant from the Cisco University
12891b8ed4SLawrence Stewart  * Research Program Fund at Community Foundation Silicon Valley.
130927e1a1SLawrence Stewart  *
140927e1a1SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced Internet
150927e1a1SLawrence Stewart  * Architectures, Swinburne University of Technology, Melbourne, Australia by
160927e1a1SLawrence Stewart  * David Hayes under sponsorship from the FreeBSD Foundation.
170927e1a1SLawrence Stewart  *
180927e1a1SLawrence Stewart  * Redistribution and use in source and binary forms, with or without
190927e1a1SLawrence Stewart  * modification, are permitted provided that the following conditions
200927e1a1SLawrence Stewart  * are met:
210927e1a1SLawrence Stewart  * 1. Redistributions of source code must retain the above copyright
220927e1a1SLawrence Stewart  *    notice, this list of conditions and the following disclaimer.
230927e1a1SLawrence Stewart  * 2. Redistributions in binary form must reproduce the above copyright
240927e1a1SLawrence Stewart  *    notice, this list of conditions and the following disclaimer in the
250927e1a1SLawrence Stewart  *    documentation and/or other materials provided with the distribution.
260927e1a1SLawrence Stewart  *
270927e1a1SLawrence Stewart  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
280927e1a1SLawrence Stewart  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
290927e1a1SLawrence Stewart  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
300927e1a1SLawrence Stewart  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
310927e1a1SLawrence Stewart  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
320927e1a1SLawrence Stewart  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
330927e1a1SLawrence Stewart  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
340927e1a1SLawrence Stewart  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
350927e1a1SLawrence Stewart  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
360927e1a1SLawrence Stewart  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
370927e1a1SLawrence Stewart  * SUCH DAMAGE.
380927e1a1SLawrence Stewart  */
390927e1a1SLawrence Stewart 
400927e1a1SLawrence Stewart /*
410927e1a1SLawrence Stewart  * An implementation of the CAIA-Hamilton delay based congestion control
420927e1a1SLawrence Stewart  * algorithm, based on "Improved coexistence and loss tolerance for delay based
430927e1a1SLawrence Stewart  * TCP congestion control" by D. A. Hayes and G. Armitage., in 35th Annual IEEE
440927e1a1SLawrence Stewart  * Conference on Local Computer Networks (LCN 2010), Denver, Colorado, USA,
450927e1a1SLawrence Stewart  * 11-14 October 2010.
460927e1a1SLawrence Stewart  *
470927e1a1SLawrence Stewart  * Originally released as part of the NewTCP research project at Swinburne
48891b8ed4SLawrence Stewart  * University of Technology's Centre for Advanced Internet Architectures,
49891b8ed4SLawrence Stewart  * Melbourne, Australia, which was made possible in part by a grant from the
50891b8ed4SLawrence Stewart  * Cisco University Research Program Fund at Community Foundation Silicon
51891b8ed4SLawrence Stewart  * Valley. More details are available at:
520927e1a1SLawrence Stewart  *   http://caia.swin.edu.au/urp/newtcp/
530927e1a1SLawrence Stewart  */
540927e1a1SLawrence Stewart 
550927e1a1SLawrence Stewart #include <sys/param.h>
560927e1a1SLawrence Stewart #include <sys/kernel.h>
570927e1a1SLawrence Stewart #include <sys/khelp.h>
580927e1a1SLawrence Stewart #include <sys/limits.h>
590927e1a1SLawrence Stewart #include <sys/malloc.h>
600927e1a1SLawrence Stewart #include <sys/module.h>
61674956e1SHenrich Hartzer #include <sys/prng.h>
620927e1a1SLawrence Stewart #include <sys/queue.h>
630927e1a1SLawrence Stewart #include <sys/socket.h>
640927e1a1SLawrence Stewart #include <sys/socketvar.h>
650927e1a1SLawrence Stewart #include <sys/sysctl.h>
660927e1a1SLawrence Stewart #include <sys/systm.h>
670927e1a1SLawrence Stewart 
680927e1a1SLawrence Stewart #include <net/vnet.h>
690927e1a1SLawrence Stewart 
70b8d60729SRandall Stewart #include <net/route.h>
71b8d60729SRandall Stewart #include <net/route/nhop.h>
72b8d60729SRandall Stewart 
73b8d60729SRandall Stewart #include <netinet/in_pcb.h>
742de3e790SGleb Smirnoff #include <netinet/tcp.h>
750927e1a1SLawrence Stewart #include <netinet/tcp_seq.h>
760927e1a1SLawrence Stewart #include <netinet/tcp_timer.h>
770927e1a1SLawrence Stewart #include <netinet/tcp_var.h>
784644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
790927e1a1SLawrence Stewart #include <netinet/cc/cc_module.h>
800927e1a1SLawrence Stewart 
810927e1a1SLawrence Stewart #include <netinet/khelp/h_ertt.h>
820927e1a1SLawrence Stewart 
830927e1a1SLawrence Stewart /*
840927e1a1SLawrence Stewart  * Private signal type for rate based congestion signal.
850927e1a1SLawrence Stewart  * See <netinet/cc.h> for appropriate bit-range to use for private signals.
860927e1a1SLawrence Stewart  */
870927e1a1SLawrence Stewart #define	CC_CHD_DELAY	0x02000000
880927e1a1SLawrence Stewart 
89674956e1SHenrich Hartzer /* Largest possible number returned by prng32(). */
90674956e1SHenrich Hartzer #define	RANDOM_MAX	UINT32_MAX
910927e1a1SLawrence Stewart 
92f74352fbSRichard Scheffenegger static void	chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
930927e1a1SLawrence Stewart static void	chd_cb_destroy(struct cc_var *ccv);
94b8d60729SRandall Stewart static int	chd_cb_init(struct cc_var *ccv, void *ptr);
95f74352fbSRichard Scheffenegger static void	chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type);
960927e1a1SLawrence Stewart static void	chd_conn_init(struct cc_var *ccv);
970927e1a1SLawrence Stewart static int	chd_mod_init(void);
98b8d60729SRandall Stewart static size_t	chd_data_sz(void);
990927e1a1SLawrence Stewart 
1000927e1a1SLawrence Stewart struct chd {
1010927e1a1SLawrence Stewart 	/*
1020927e1a1SLawrence Stewart 	 * Shadow window - keeps track of what the NewReno congestion window
1030927e1a1SLawrence Stewart 	 * would have been if delay-based cwnd backoffs had not been made. This
1040927e1a1SLawrence Stewart 	 * functionality aids coexistence with loss-based TCP flows which may be
1050927e1a1SLawrence Stewart 	 * sharing links along the path.
1060927e1a1SLawrence Stewart 	 */
1070927e1a1SLawrence Stewart 	unsigned long shadow_w;
1080927e1a1SLawrence Stewart 	/*
1090927e1a1SLawrence Stewart 	 * Loss-based TCP compatibility flag - When set, it turns on the shadow
1100927e1a1SLawrence Stewart 	 * window functionality.
1110927e1a1SLawrence Stewart 	 */
1120927e1a1SLawrence Stewart 	int loss_compete;
1130927e1a1SLawrence Stewart 	 /* The maximum round trip time seen within a measured rtt period. */
1140927e1a1SLawrence Stewart 	int maxrtt_in_rtt;
1150927e1a1SLawrence Stewart 	/* The previous qdly that caused cwnd to backoff. */
1160927e1a1SLawrence Stewart 	int prev_backoff_qdly;
1170927e1a1SLawrence Stewart };
1180927e1a1SLawrence Stewart 
1190927e1a1SLawrence Stewart static int ertt_id;
1200927e1a1SLawrence Stewart 
1215f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_qmin) = 5;
1225f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_pmax) = 50;
1235f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_loss_fair) = 1;
1245f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_use_max) = 1;
1255f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_qthresh) = 20;
1260927e1a1SLawrence Stewart #define	V_chd_qthresh	VNET(chd_qthresh)
1270927e1a1SLawrence Stewart #define	V_chd_qmin	VNET(chd_qmin)
1280927e1a1SLawrence Stewart #define	V_chd_pmax	VNET(chd_pmax)
1290927e1a1SLawrence Stewart #define	V_chd_loss_fair	VNET(chd_loss_fair)
1300927e1a1SLawrence Stewart #define	V_chd_use_max	VNET(chd_use_max)
1310927e1a1SLawrence Stewart 
1320927e1a1SLawrence Stewart 
1330927e1a1SLawrence Stewart struct cc_algo chd_cc_algo = {
1340927e1a1SLawrence Stewart 	.name = "chd",
1350927e1a1SLawrence Stewart 	.ack_received = chd_ack_received,
1360927e1a1SLawrence Stewart 	.cb_destroy = chd_cb_destroy,
1370927e1a1SLawrence Stewart 	.cb_init = chd_cb_init,
1380927e1a1SLawrence Stewart 	.cong_signal = chd_cong_signal,
1390927e1a1SLawrence Stewart 	.conn_init = chd_conn_init,
140b8d60729SRandall Stewart 	.mod_init = chd_mod_init,
141b8d60729SRandall Stewart 	.cc_data_sz = chd_data_sz,
142b8d60729SRandall Stewart 	.after_idle = newreno_cc_after_idle,
143b8d60729SRandall Stewart 	.post_recovery = newreno_cc_post_recovery,
1440927e1a1SLawrence Stewart };
1450927e1a1SLawrence Stewart 
1460927e1a1SLawrence Stewart static __inline void
1470927e1a1SLawrence Stewart chd_window_decrease(struct cc_var *ccv)
1480927e1a1SLawrence Stewart {
1490927e1a1SLawrence Stewart 	unsigned long win;
150*22dcc812SRichard Scheffenegger 	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
1510927e1a1SLawrence Stewart 
152*22dcc812SRichard Scheffenegger 	win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / mss;
1530927e1a1SLawrence Stewart 	win -= max((win / 2), 1);
154*22dcc812SRichard Scheffenegger 	CCV(ccv, snd_ssthresh) = max(win, 2) * mss;
1550927e1a1SLawrence Stewart }
1560927e1a1SLawrence Stewart 
1570927e1a1SLawrence Stewart /*
1580927e1a1SLawrence Stewart  * Probabilistic backoff function. Returns 1 if we should backoff or 0
1590927e1a1SLawrence Stewart  * otherwise. The calculation of p is similar to the calculation of p in cc_hd.
1600927e1a1SLawrence Stewart  */
1610927e1a1SLawrence Stewart static __inline int
1620927e1a1SLawrence Stewart should_backoff(int qdly, int maxqdly, struct chd *chd_data)
1630927e1a1SLawrence Stewart {
164674956e1SHenrich Hartzer 	uint32_t rand, p;
1650927e1a1SLawrence Stewart 
166674956e1SHenrich Hartzer 	rand = prng32();
1670927e1a1SLawrence Stewart 
1680927e1a1SLawrence Stewart 	if (qdly < V_chd_qthresh) {
1690927e1a1SLawrence Stewart 		chd_data->loss_compete = 0;
1700927e1a1SLawrence Stewart 		p = (((RANDOM_MAX / 100) * V_chd_pmax) /
1710927e1a1SLawrence Stewart 		    (V_chd_qthresh - V_chd_qmin)) *
1720927e1a1SLawrence Stewart 		    (qdly - V_chd_qmin);
1730927e1a1SLawrence Stewart 	} else {
1740927e1a1SLawrence Stewart 		if (qdly > V_chd_qthresh) {
1750927e1a1SLawrence Stewart 			p = (((RANDOM_MAX / 100) * V_chd_pmax) /
1760927e1a1SLawrence Stewart 			    (maxqdly - V_chd_qthresh)) *
1770927e1a1SLawrence Stewart 			    (maxqdly - qdly);
1780927e1a1SLawrence Stewart 			if (V_chd_loss_fair && rand < p)
1790927e1a1SLawrence Stewart 				chd_data->loss_compete = 1;
1800927e1a1SLawrence Stewart 		} else {
1810927e1a1SLawrence Stewart 			p = (RANDOM_MAX / 100) * V_chd_pmax;
1820927e1a1SLawrence Stewart 			chd_data->loss_compete = 0;
1830927e1a1SLawrence Stewart 		}
1840927e1a1SLawrence Stewart 	}
1850927e1a1SLawrence Stewart 
1860927e1a1SLawrence Stewart 	return (rand < p);
1870927e1a1SLawrence Stewart }
1880927e1a1SLawrence Stewart 
1890927e1a1SLawrence Stewart static __inline void
1900927e1a1SLawrence Stewart chd_window_increase(struct cc_var *ccv, int new_measurement)
1910927e1a1SLawrence Stewart {
1920927e1a1SLawrence Stewart 	struct chd *chd_data;
1930927e1a1SLawrence Stewart 	int incr;
194*22dcc812SRichard Scheffenegger 	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
1950927e1a1SLawrence Stewart 
1960927e1a1SLawrence Stewart 	chd_data = ccv->cc_data;
1970927e1a1SLawrence Stewart 	incr = 0;
1980927e1a1SLawrence Stewart 
1990927e1a1SLawrence Stewart 	if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
2000927e1a1SLawrence Stewart 		/* Adapted from NewReno slow start. */
2010927e1a1SLawrence Stewart 		if (V_tcp_do_rfc3465) {
2020927e1a1SLawrence Stewart 			/* In slow-start with ABC enabled. */
2030927e1a1SLawrence Stewart 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) {
2040927e1a1SLawrence Stewart 				/* Not due to RTO. */
2050927e1a1SLawrence Stewart 				incr = min(ccv->bytes_this_ack,
206*22dcc812SRichard Scheffenegger 				    V_tcp_abc_l_var * mss);
2070927e1a1SLawrence Stewart 			} else {
2080927e1a1SLawrence Stewart 				/* Due to RTO. */
209*22dcc812SRichard Scheffenegger 				incr = min(ccv->bytes_this_ack, mss);
2100927e1a1SLawrence Stewart 			}
2110927e1a1SLawrence Stewart 		} else
212*22dcc812SRichard Scheffenegger 			incr = mss;
2130927e1a1SLawrence Stewart 
2140927e1a1SLawrence Stewart 	} else { /* Congestion avoidance. */
2150927e1a1SLawrence Stewart 		if (V_tcp_do_rfc3465) {
2160927e1a1SLawrence Stewart 			if (ccv->flags & CCF_ABC_SENTAWND) {
2170927e1a1SLawrence Stewart 				ccv->flags &= ~CCF_ABC_SENTAWND;
218*22dcc812SRichard Scheffenegger 				incr = mss;
2190927e1a1SLawrence Stewart 			}
2200927e1a1SLawrence Stewart 		} else if (new_measurement)
221*22dcc812SRichard Scheffenegger 			incr = mss;
2220927e1a1SLawrence Stewart 	}
2230927e1a1SLawrence Stewart 
2240927e1a1SLawrence Stewart 	if (chd_data->shadow_w > 0) {
2250927e1a1SLawrence Stewart 		/* Track NewReno window. */
2260927e1a1SLawrence Stewart 		chd_data->shadow_w = min(chd_data->shadow_w + incr,
2270927e1a1SLawrence Stewart 		    TCP_MAXWIN << CCV(ccv, snd_scale));
2280927e1a1SLawrence Stewart 	}
2290927e1a1SLawrence Stewart 
2300927e1a1SLawrence Stewart 	CCV(ccv,snd_cwnd) = min(CCV(ccv, snd_cwnd) + incr,
2310927e1a1SLawrence Stewart 	    TCP_MAXWIN << CCV(ccv, snd_scale));
2320927e1a1SLawrence Stewart }
2330927e1a1SLawrence Stewart 
2340927e1a1SLawrence Stewart /*
2350927e1a1SLawrence Stewart  * All ACK signals are used for timing measurements to determine delay-based
2360927e1a1SLawrence Stewart  * congestion. However, window increases are only performed when
2370927e1a1SLawrence Stewart  * ack_type == CC_ACK.
2380927e1a1SLawrence Stewart  */
2390927e1a1SLawrence Stewart static void
240f74352fbSRichard Scheffenegger chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
2410927e1a1SLawrence Stewart {
2420927e1a1SLawrence Stewart 	struct chd *chd_data;
2430927e1a1SLawrence Stewart 	struct ertt *e_t;
2440927e1a1SLawrence Stewart 	int backoff, new_measurement, qdly, rtt;
2450927e1a1SLawrence Stewart 
246e68b3792SGleb Smirnoff 	e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
2470927e1a1SLawrence Stewart 	chd_data = ccv->cc_data;
2480927e1a1SLawrence Stewart 	new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
2490927e1a1SLawrence Stewart 	backoff = qdly = 0;
2500927e1a1SLawrence Stewart 
2510927e1a1SLawrence Stewart 	chd_data->maxrtt_in_rtt = imax(e_t->rtt, chd_data->maxrtt_in_rtt);
2520927e1a1SLawrence Stewart 
2530927e1a1SLawrence Stewart 	if (new_measurement) {
2540927e1a1SLawrence Stewart 		/*
2550927e1a1SLawrence Stewart 		 * There is a new per RTT measurement, so check to see if there
2560927e1a1SLawrence Stewart 		 * is delay based congestion.
2570927e1a1SLawrence Stewart 		 */
2580927e1a1SLawrence Stewart 		rtt = V_chd_use_max ? chd_data->maxrtt_in_rtt : e_t->rtt;
2590927e1a1SLawrence Stewart 		chd_data->maxrtt_in_rtt = 0;
2600927e1a1SLawrence Stewart 
2610927e1a1SLawrence Stewart 		if (rtt && e_t->minrtt && !IN_RECOVERY(CCV(ccv, t_flags))) {
2620927e1a1SLawrence Stewart 			qdly = rtt - e_t->minrtt;
2630927e1a1SLawrence Stewart 			if (qdly > V_chd_qmin) {
2640927e1a1SLawrence Stewart 				/*
2650927e1a1SLawrence Stewart 				 * Probabilistic delay based congestion
2660927e1a1SLawrence Stewart 				 * indication.
2670927e1a1SLawrence Stewart 				 */
2680927e1a1SLawrence Stewart 				backoff = should_backoff(qdly,
2690927e1a1SLawrence Stewart 				    e_t->maxrtt - e_t->minrtt, chd_data);
2700927e1a1SLawrence Stewart 			} else
2710927e1a1SLawrence Stewart 				chd_data->loss_compete = 0;
2720927e1a1SLawrence Stewart 		}
2730927e1a1SLawrence Stewart 		/* Reset per RTT measurement flag to start a new measurement. */
2740927e1a1SLawrence Stewart 		e_t->flags &= ~ERTT_NEW_MEASUREMENT;
2750927e1a1SLawrence Stewart 	}
2760927e1a1SLawrence Stewart 
2770927e1a1SLawrence Stewart 	if (backoff) {
2780927e1a1SLawrence Stewart 		/*
2790927e1a1SLawrence Stewart 		 * Update shadow_w before delay based backoff.
2800927e1a1SLawrence Stewart 		 */
2810927e1a1SLawrence Stewart 		if (chd_data->loss_compete ||
2820927e1a1SLawrence Stewart 		    qdly > chd_data->prev_backoff_qdly) {
2830927e1a1SLawrence Stewart 			/*
2840927e1a1SLawrence Stewart 			 * Delay is higher than when we backed off previously,
2850927e1a1SLawrence Stewart 			 * so it is possible that this flow is competing with
2860927e1a1SLawrence Stewart 			 * loss based flows.
2870927e1a1SLawrence Stewart 			 */
2880927e1a1SLawrence Stewart 			chd_data->shadow_w = max(CCV(ccv, snd_cwnd),
2890927e1a1SLawrence Stewart 			    chd_data->shadow_w);
2900927e1a1SLawrence Stewart 		} else {
2910927e1a1SLawrence Stewart 			/*
2920927e1a1SLawrence Stewart 			 * Reset shadow_w, as it is probable that this flow is
2930927e1a1SLawrence Stewart 			 * not competing with loss based flows at the moment.
2940927e1a1SLawrence Stewart 			 */
2950927e1a1SLawrence Stewart 			chd_data->shadow_w = 0;
2960927e1a1SLawrence Stewart 		}
2970927e1a1SLawrence Stewart 
2980927e1a1SLawrence Stewart 		chd_data->prev_backoff_qdly = qdly;
2990927e1a1SLawrence Stewart 		/*
3000927e1a1SLawrence Stewart 		 * Send delay-based congestion signal to the congestion signal
3010927e1a1SLawrence Stewart 		 * handler.
3020927e1a1SLawrence Stewart 		 */
3030927e1a1SLawrence Stewart 		chd_cong_signal(ccv, CC_CHD_DELAY);
3040927e1a1SLawrence Stewart 
3050927e1a1SLawrence Stewart 	} else if (ack_type == CC_ACK)
3060927e1a1SLawrence Stewart 		chd_window_increase(ccv, new_measurement);
3070927e1a1SLawrence Stewart }
3080927e1a1SLawrence Stewart 
3090927e1a1SLawrence Stewart static void
3100927e1a1SLawrence Stewart chd_cb_destroy(struct cc_var *ccv)
3110927e1a1SLawrence Stewart {
312b8d60729SRandall Stewart 	free(ccv->cc_data, M_CC_MEM);
313b8d60729SRandall Stewart }
3140927e1a1SLawrence Stewart 
315b8d60729SRandall Stewart size_t
316b8d60729SRandall Stewart chd_data_sz(void)
317b8d60729SRandall Stewart {
318b8d60729SRandall Stewart 	return (sizeof(struct chd));
3190927e1a1SLawrence Stewart }
3200927e1a1SLawrence Stewart 
3210927e1a1SLawrence Stewart static int
322b8d60729SRandall Stewart chd_cb_init(struct cc_var *ccv, void *ptr)
3230927e1a1SLawrence Stewart {
3240927e1a1SLawrence Stewart 	struct chd *chd_data;
3250927e1a1SLawrence Stewart 
32600d3b744SMichael Tuexen 	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
327b8d60729SRandall Stewart 	if (ptr == NULL) {
328b8d60729SRandall Stewart 		chd_data = malloc(sizeof(struct chd), M_CC_MEM, M_NOWAIT);
3290927e1a1SLawrence Stewart 		if (chd_data == NULL)
3300927e1a1SLawrence Stewart 			return (ENOMEM);
331b8d60729SRandall Stewart 	} else
332b8d60729SRandall Stewart 		chd_data = ptr;
3330927e1a1SLawrence Stewart 
3340927e1a1SLawrence Stewart 	chd_data->shadow_w = 0;
3350927e1a1SLawrence Stewart 	ccv->cc_data = chd_data;
3360927e1a1SLawrence Stewart 
3370927e1a1SLawrence Stewart 	return (0);
3380927e1a1SLawrence Stewart }
3390927e1a1SLawrence Stewart 
3400927e1a1SLawrence Stewart static void
341f74352fbSRichard Scheffenegger chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
3420927e1a1SLawrence Stewart {
3430927e1a1SLawrence Stewart 	struct ertt *e_t;
3440927e1a1SLawrence Stewart 	struct chd *chd_data;
3450927e1a1SLawrence Stewart 	int qdly;
3460927e1a1SLawrence Stewart 
347e68b3792SGleb Smirnoff 	e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
3480927e1a1SLawrence Stewart 	chd_data = ccv->cc_data;
3490927e1a1SLawrence Stewart 	qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt;
3500927e1a1SLawrence Stewart 
351f74352fbSRichard Scheffenegger 	switch((int)signal_type) {
3520927e1a1SLawrence Stewart 	case CC_CHD_DELAY:
3530927e1a1SLawrence Stewart 		chd_window_decrease(ccv); /* Set new ssthresh. */
3540927e1a1SLawrence Stewart 		CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
3550927e1a1SLawrence Stewart 		CCV(ccv, snd_recover) = CCV(ccv, snd_max);
3560927e1a1SLawrence Stewart 		ENTER_CONGRECOVERY(CCV(ccv, t_flags));
3570927e1a1SLawrence Stewart 		break;
3580927e1a1SLawrence Stewart 
3590927e1a1SLawrence Stewart 	case CC_NDUPACK: /* Packet loss. */
3600927e1a1SLawrence Stewart 		/*
3610927e1a1SLawrence Stewart 		 * Only react to loss as a congestion signal if qdly >
3620927e1a1SLawrence Stewart 		 * V_chd_qthresh.  If qdly is less than qthresh, presume that
3630927e1a1SLawrence Stewart 		 * this is a non congestion related loss. If qdly is greater
3640927e1a1SLawrence Stewart 		 * than qthresh, assume that we are competing with loss based
3650927e1a1SLawrence Stewart 		 * tcp flows and restore window from any unnecessary backoffs,
3660927e1a1SLawrence Stewart 		 * before the decrease.
3670927e1a1SLawrence Stewart 		 */
3680927e1a1SLawrence Stewart 		if (!IN_RECOVERY(CCV(ccv, t_flags)) && qdly > V_chd_qthresh) {
3690927e1a1SLawrence Stewart 			if (chd_data->loss_compete) {
3700927e1a1SLawrence Stewart 				CCV(ccv, snd_cwnd) = max(CCV(ccv, snd_cwnd),
3710927e1a1SLawrence Stewart 				    chd_data->shadow_w);
3720927e1a1SLawrence Stewart 			}
3730927e1a1SLawrence Stewart 			chd_window_decrease(ccv);
3740927e1a1SLawrence Stewart 		} else {
3750927e1a1SLawrence Stewart 			 /*
3760927e1a1SLawrence Stewart 			  * This loss isn't congestion related, or already
3770927e1a1SLawrence Stewart 			  * recovering from congestion.
3780927e1a1SLawrence Stewart 			  */
3790927e1a1SLawrence Stewart 			CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
3800927e1a1SLawrence Stewart 			CCV(ccv, snd_recover) = CCV(ccv, snd_max);
3810927e1a1SLawrence Stewart 		}
3820927e1a1SLawrence Stewart 
3830927e1a1SLawrence Stewart 		if (chd_data->shadow_w > 0) {
384*22dcc812SRichard Scheffenegger 			uint32_t mss = tcp_fixed_maxseg(ccv->tp);
3850927e1a1SLawrence Stewart 			chd_data->shadow_w = max(chd_data->shadow_w /
386*22dcc812SRichard Scheffenegger 			    mss / 2, 2) * mss;
3870927e1a1SLawrence Stewart 		}
3880927e1a1SLawrence Stewart 		ENTER_FASTRECOVERY(CCV(ccv, t_flags));
3890927e1a1SLawrence Stewart 		break;
3900927e1a1SLawrence Stewart 
3910927e1a1SLawrence Stewart 	default:
392b8d60729SRandall Stewart 		newreno_cc_cong_signal(ccv, signal_type);
393f74352fbSRichard Scheffenegger 		break;
3940927e1a1SLawrence Stewart 	}
3950927e1a1SLawrence Stewart }
3960927e1a1SLawrence Stewart 
3970927e1a1SLawrence Stewart static void
3980927e1a1SLawrence Stewart chd_conn_init(struct cc_var *ccv)
3990927e1a1SLawrence Stewart {
4000927e1a1SLawrence Stewart 	struct chd *chd_data;
4010927e1a1SLawrence Stewart 
4020927e1a1SLawrence Stewart 	chd_data = ccv->cc_data;
4030927e1a1SLawrence Stewart 	chd_data->prev_backoff_qdly = 0;
4040927e1a1SLawrence Stewart 	chd_data->maxrtt_in_rtt = 0;
4050927e1a1SLawrence Stewart 	chd_data->loss_compete = 0;
4060927e1a1SLawrence Stewart 	/*
4070927e1a1SLawrence Stewart 	 * Initialise the shadow_cwnd to be equal to snd_cwnd in case we are
4080927e1a1SLawrence Stewart 	 * competing with loss based flows from the start.
4090927e1a1SLawrence Stewart 	 */
4100927e1a1SLawrence Stewart 	chd_data->shadow_w = CCV(ccv, snd_cwnd);
4110927e1a1SLawrence Stewart }
4120927e1a1SLawrence Stewart 
4130927e1a1SLawrence Stewart static int
4140927e1a1SLawrence Stewart chd_mod_init(void)
4150927e1a1SLawrence Stewart {
4160927e1a1SLawrence Stewart 
4170927e1a1SLawrence Stewart 	ertt_id = khelp_get_id("ertt");
4180927e1a1SLawrence Stewart 	if (ertt_id <= 0) {
4190927e1a1SLawrence Stewart 		printf("%s: h_ertt module not found\n", __func__);
4200927e1a1SLawrence Stewart 		return (ENOENT);
4210927e1a1SLawrence Stewart 	}
4220927e1a1SLawrence Stewart 	return (0);
4230927e1a1SLawrence Stewart }
4240927e1a1SLawrence Stewart 
4250927e1a1SLawrence Stewart static int
4260927e1a1SLawrence Stewart chd_loss_fair_handler(SYSCTL_HANDLER_ARGS)
4270927e1a1SLawrence Stewart {
4280927e1a1SLawrence Stewart 	int error;
4290927e1a1SLawrence Stewart 	uint32_t new;
4300927e1a1SLawrence Stewart 
4310927e1a1SLawrence Stewart 	new = V_chd_loss_fair;
4320927e1a1SLawrence Stewart 	error = sysctl_handle_int(oidp, &new, 0, req);
4330927e1a1SLawrence Stewart 	if (error == 0 && req->newptr != NULL) {
434855acb84SBrooks Davis 		if (new > 1)
4350927e1a1SLawrence Stewart 			error = EINVAL;
4360927e1a1SLawrence Stewart 		else
4370927e1a1SLawrence Stewart 			V_chd_loss_fair = new;
4380927e1a1SLawrence Stewart 	}
4390927e1a1SLawrence Stewart 
4400927e1a1SLawrence Stewart 	return (error);
4410927e1a1SLawrence Stewart }
4420927e1a1SLawrence Stewart 
4430927e1a1SLawrence Stewart static int
4440927e1a1SLawrence Stewart chd_pmax_handler(SYSCTL_HANDLER_ARGS)
4450927e1a1SLawrence Stewart {
4460927e1a1SLawrence Stewart 	int error;
4470927e1a1SLawrence Stewart 	uint32_t new;
4480927e1a1SLawrence Stewart 
4490927e1a1SLawrence Stewart 	new = V_chd_pmax;
4500927e1a1SLawrence Stewart 	error = sysctl_handle_int(oidp, &new, 0, req);
4510927e1a1SLawrence Stewart 	if (error == 0 && req->newptr != NULL) {
452855acb84SBrooks Davis 		if (new == 0 || new > 100)
4530927e1a1SLawrence Stewart 			error = EINVAL;
4540927e1a1SLawrence Stewart 		else
4550927e1a1SLawrence Stewart 			V_chd_pmax = new;
4560927e1a1SLawrence Stewart 	}
4570927e1a1SLawrence Stewart 
4580927e1a1SLawrence Stewart 	return (error);
4590927e1a1SLawrence Stewart }
4600927e1a1SLawrence Stewart 
4610927e1a1SLawrence Stewart static int
4620927e1a1SLawrence Stewart chd_qthresh_handler(SYSCTL_HANDLER_ARGS)
4630927e1a1SLawrence Stewart {
4640927e1a1SLawrence Stewart 	int error;
4650927e1a1SLawrence Stewart 	uint32_t new;
4660927e1a1SLawrence Stewart 
4670927e1a1SLawrence Stewart 	new = V_chd_qthresh;
4680927e1a1SLawrence Stewart 	error = sysctl_handle_int(oidp, &new, 0, req);
4690927e1a1SLawrence Stewart 	if (error == 0 && req->newptr != NULL) {
470855acb84SBrooks Davis 		if (new <= V_chd_qmin)
4710927e1a1SLawrence Stewart 			error = EINVAL;
4720927e1a1SLawrence Stewart 		else
4730927e1a1SLawrence Stewart 			V_chd_qthresh = new;
4740927e1a1SLawrence Stewart 	}
4750927e1a1SLawrence Stewart 
4760927e1a1SLawrence Stewart 	return (error);
4770927e1a1SLawrence Stewart }
4780927e1a1SLawrence Stewart 
4790927e1a1SLawrence Stewart SYSCTL_DECL(_net_inet_tcp_cc_chd);
4807029da5cSPawel Biernacki SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, chd, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
4810927e1a1SLawrence Stewart     "CAIA Hamilton delay-based congestion control related settings");
4820927e1a1SLawrence Stewart 
4836df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, loss_fair,
4847029da5cSPawel Biernacki     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
4856df8a710SGleb Smirnoff     &VNET_NAME(chd_loss_fair), 1, &chd_loss_fair_handler,
4860927e1a1SLawrence Stewart     "IU", "Flag to enable shadow window functionality.");
4870927e1a1SLawrence Stewart 
4886df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, pmax,
4897029da5cSPawel Biernacki     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
4906df8a710SGleb Smirnoff     &VNET_NAME(chd_pmax), 5, &chd_pmax_handler,
4910927e1a1SLawrence Stewart     "IU", "Per RTT maximum backoff probability as a percentage");
4920927e1a1SLawrence Stewart 
4936df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, queue_threshold,
4947029da5cSPawel Biernacki     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
4956df8a710SGleb Smirnoff     &VNET_NAME(chd_qthresh), 20, &chd_qthresh_handler,
4960927e1a1SLawrence Stewart     "IU", "Queueing congestion threshold in ticks");
4970927e1a1SLawrence Stewart 
4986df8a710SGleb Smirnoff SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min,
4996df8a710SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
5000927e1a1SLawrence Stewart     "Minimum queueing delay threshold in ticks");
5010927e1a1SLawrence Stewart 
5026df8a710SGleb Smirnoff SYSCTL_UINT(_net_inet_tcp_cc_chd,  OID_AUTO, use_max,
5036df8a710SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
5040927e1a1SLawrence Stewart     "Use the maximum RTT seen within the measurement period (RTT) "
5050927e1a1SLawrence Stewart     "as the basic delay measurement for the algorithm.");
5060927e1a1SLawrence Stewart 
5070927e1a1SLawrence Stewart DECLARE_CC_MODULE(chd, &chd_cc_algo);
508b8d60729SRandall Stewart MODULE_VERSION(chd, 2);
5090927e1a1SLawrence Stewart MODULE_DEPEND(chd, ertt, 1, 1, 1);
510