10927e1a1SLawrence Stewart /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 40927e1a1SLawrence Stewart * Copyright (c) 2009-2010 50927e1a1SLawrence Stewart * Swinburne University of Technology, Melbourne, Australia 60927e1a1SLawrence Stewart * Copyright (c) 2010-2011 The FreeBSD Foundation 70927e1a1SLawrence Stewart * All rights reserved. 80927e1a1SLawrence Stewart * 90927e1a1SLawrence Stewart * This software was developed at the Centre for Advanced Internet 10891b8ed4SLawrence Stewart * Architectures, Swinburne University of Technology, by David Hayes and 11891b8ed4SLawrence Stewart * Lawrence Stewart, made possible in part by a grant from the Cisco University 12891b8ed4SLawrence Stewart * Research Program Fund at Community Foundation Silicon Valley. 130927e1a1SLawrence Stewart * 140927e1a1SLawrence Stewart * Portions of this software were developed at the Centre for Advanced Internet 150927e1a1SLawrence Stewart * Architectures, Swinburne University of Technology, Melbourne, Australia by 160927e1a1SLawrence Stewart * David Hayes under sponsorship from the FreeBSD Foundation. 170927e1a1SLawrence Stewart * 180927e1a1SLawrence Stewart * Redistribution and use in source and binary forms, with or without 190927e1a1SLawrence Stewart * modification, are permitted provided that the following conditions 200927e1a1SLawrence Stewart * are met: 210927e1a1SLawrence Stewart * 1. Redistributions of source code must retain the above copyright 220927e1a1SLawrence Stewart * notice, this list of conditions and the following disclaimer. 230927e1a1SLawrence Stewart * 2. Redistributions in binary form must reproduce the above copyright 240927e1a1SLawrence Stewart * notice, this list of conditions and the following disclaimer in the 250927e1a1SLawrence Stewart * documentation and/or other materials provided with the distribution. 260927e1a1SLawrence Stewart * 270927e1a1SLawrence Stewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 280927e1a1SLawrence Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 290927e1a1SLawrence Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 300927e1a1SLawrence Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 310927e1a1SLawrence Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 320927e1a1SLawrence Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 330927e1a1SLawrence Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 340927e1a1SLawrence Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 350927e1a1SLawrence Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 360927e1a1SLawrence Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 370927e1a1SLawrence Stewart * SUCH DAMAGE. 380927e1a1SLawrence Stewart */ 390927e1a1SLawrence Stewart 400927e1a1SLawrence Stewart /* 410927e1a1SLawrence Stewart * An implementation of the CAIA-Hamilton delay based congestion control 420927e1a1SLawrence Stewart * algorithm, based on "Improved coexistence and loss tolerance for delay based 430927e1a1SLawrence Stewart * TCP congestion control" by D. A. Hayes and G. Armitage., in 35th Annual IEEE 440927e1a1SLawrence Stewart * Conference on Local Computer Networks (LCN 2010), Denver, Colorado, USA, 450927e1a1SLawrence Stewart * 11-14 October 2010. 460927e1a1SLawrence Stewart * 470927e1a1SLawrence Stewart * Originally released as part of the NewTCP research project at Swinburne 48891b8ed4SLawrence Stewart * University of Technology's Centre for Advanced Internet Architectures, 49891b8ed4SLawrence Stewart * Melbourne, Australia, which was made possible in part by a grant from the 50891b8ed4SLawrence Stewart * Cisco University Research Program Fund at Community Foundation Silicon 51891b8ed4SLawrence Stewart * Valley. More details are available at: 520927e1a1SLawrence Stewart * http://caia.swin.edu.au/urp/newtcp/ 530927e1a1SLawrence Stewart */ 540927e1a1SLawrence Stewart 550927e1a1SLawrence Stewart #include <sys/param.h> 560927e1a1SLawrence Stewart #include <sys/kernel.h> 570927e1a1SLawrence Stewart #include <sys/khelp.h> 580927e1a1SLawrence Stewart #include <sys/limits.h> 590927e1a1SLawrence Stewart #include <sys/malloc.h> 600927e1a1SLawrence Stewart #include <sys/module.h> 61674956e1SHenrich Hartzer #include <sys/prng.h> 620927e1a1SLawrence Stewart #include <sys/queue.h> 630927e1a1SLawrence Stewart #include <sys/socket.h> 640927e1a1SLawrence Stewart #include <sys/socketvar.h> 650927e1a1SLawrence Stewart #include <sys/sysctl.h> 660927e1a1SLawrence Stewart #include <sys/systm.h> 670927e1a1SLawrence Stewart 680927e1a1SLawrence Stewart #include <net/vnet.h> 690927e1a1SLawrence Stewart 70b8d60729SRandall Stewart #include <net/route.h> 71b8d60729SRandall Stewart #include <net/route/nhop.h> 72b8d60729SRandall Stewart 73b8d60729SRandall Stewart #include <netinet/in_pcb.h> 742de3e790SGleb Smirnoff #include <netinet/tcp.h> 750927e1a1SLawrence Stewart #include <netinet/tcp_seq.h> 760927e1a1SLawrence Stewart #include <netinet/tcp_timer.h> 770927e1a1SLawrence Stewart #include <netinet/tcp_var.h> 784644fda3SGleb Smirnoff #include <netinet/cc/cc.h> 790927e1a1SLawrence Stewart #include <netinet/cc/cc_module.h> 800927e1a1SLawrence Stewart 810927e1a1SLawrence Stewart #include <netinet/khelp/h_ertt.h> 820927e1a1SLawrence Stewart 830927e1a1SLawrence Stewart /* 840927e1a1SLawrence Stewart * Private signal type for rate based congestion signal. 850927e1a1SLawrence Stewart * See <netinet/cc.h> for appropriate bit-range to use for private signals. 860927e1a1SLawrence Stewart */ 870927e1a1SLawrence Stewart #define CC_CHD_DELAY 0x02000000 880927e1a1SLawrence Stewart 89674956e1SHenrich Hartzer /* Largest possible number returned by prng32(). */ 90674956e1SHenrich Hartzer #define RANDOM_MAX UINT32_MAX 910927e1a1SLawrence Stewart 92f74352fbSRichard Scheffenegger static void chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type); 930927e1a1SLawrence Stewart static void chd_cb_destroy(struct cc_var *ccv); 94b8d60729SRandall Stewart static int chd_cb_init(struct cc_var *ccv, void *ptr); 95f74352fbSRichard Scheffenegger static void chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type); 960927e1a1SLawrence Stewart static void chd_conn_init(struct cc_var *ccv); 970927e1a1SLawrence Stewart static int chd_mod_init(void); 98b8d60729SRandall Stewart static size_t chd_data_sz(void); 990927e1a1SLawrence Stewart 1000927e1a1SLawrence Stewart struct chd { 1010927e1a1SLawrence Stewart /* 1020927e1a1SLawrence Stewart * Shadow window - keeps track of what the NewReno congestion window 1030927e1a1SLawrence Stewart * would have been if delay-based cwnd backoffs had not been made. This 1040927e1a1SLawrence Stewart * functionality aids coexistence with loss-based TCP flows which may be 1050927e1a1SLawrence Stewart * sharing links along the path. 1060927e1a1SLawrence Stewart */ 1070927e1a1SLawrence Stewart unsigned long shadow_w; 1080927e1a1SLawrence Stewart /* 1090927e1a1SLawrence Stewart * Loss-based TCP compatibility flag - When set, it turns on the shadow 1100927e1a1SLawrence Stewart * window functionality. 1110927e1a1SLawrence Stewart */ 1120927e1a1SLawrence Stewart int loss_compete; 1130927e1a1SLawrence Stewart /* The maximum round trip time seen within a measured rtt period. */ 1140927e1a1SLawrence Stewart int maxrtt_in_rtt; 1150927e1a1SLawrence Stewart /* The previous qdly that caused cwnd to backoff. */ 1160927e1a1SLawrence Stewart int prev_backoff_qdly; 1170927e1a1SLawrence Stewart }; 1180927e1a1SLawrence Stewart 1190927e1a1SLawrence Stewart static int ertt_id; 1200927e1a1SLawrence Stewart 1215f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_qmin) = 5; 1225f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_pmax) = 50; 1235f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_loss_fair) = 1; 1245f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_use_max) = 1; 1255f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, chd_qthresh) = 20; 1260927e1a1SLawrence Stewart #define V_chd_qthresh VNET(chd_qthresh) 1270927e1a1SLawrence Stewart #define V_chd_qmin VNET(chd_qmin) 1280927e1a1SLawrence Stewart #define V_chd_pmax VNET(chd_pmax) 1290927e1a1SLawrence Stewart #define V_chd_loss_fair VNET(chd_loss_fair) 1300927e1a1SLawrence Stewart #define V_chd_use_max VNET(chd_use_max) 1310927e1a1SLawrence Stewart 1320927e1a1SLawrence Stewart 1330927e1a1SLawrence Stewart struct cc_algo chd_cc_algo = { 1340927e1a1SLawrence Stewart .name = "chd", 1350927e1a1SLawrence Stewart .ack_received = chd_ack_received, 1360927e1a1SLawrence Stewart .cb_destroy = chd_cb_destroy, 1370927e1a1SLawrence Stewart .cb_init = chd_cb_init, 1380927e1a1SLawrence Stewart .cong_signal = chd_cong_signal, 1390927e1a1SLawrence Stewart .conn_init = chd_conn_init, 140b8d60729SRandall Stewart .mod_init = chd_mod_init, 141b8d60729SRandall Stewart .cc_data_sz = chd_data_sz, 142b8d60729SRandall Stewart .after_idle = newreno_cc_after_idle, 143b8d60729SRandall Stewart .post_recovery = newreno_cc_post_recovery, 1440927e1a1SLawrence Stewart }; 1450927e1a1SLawrence Stewart 1460927e1a1SLawrence Stewart static __inline void 1470927e1a1SLawrence Stewart chd_window_decrease(struct cc_var *ccv) 1480927e1a1SLawrence Stewart { 1490927e1a1SLawrence Stewart unsigned long win; 150*22dcc812SRichard Scheffenegger uint32_t mss = tcp_fixed_maxseg(ccv->tp); 1510927e1a1SLawrence Stewart 152*22dcc812SRichard Scheffenegger win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / mss; 1530927e1a1SLawrence Stewart win -= max((win / 2), 1); 154*22dcc812SRichard Scheffenegger CCV(ccv, snd_ssthresh) = max(win, 2) * mss; 1550927e1a1SLawrence Stewart } 1560927e1a1SLawrence Stewart 1570927e1a1SLawrence Stewart /* 1580927e1a1SLawrence Stewart * Probabilistic backoff function. Returns 1 if we should backoff or 0 1590927e1a1SLawrence Stewart * otherwise. The calculation of p is similar to the calculation of p in cc_hd. 1600927e1a1SLawrence Stewart */ 1610927e1a1SLawrence Stewart static __inline int 1620927e1a1SLawrence Stewart should_backoff(int qdly, int maxqdly, struct chd *chd_data) 1630927e1a1SLawrence Stewart { 164674956e1SHenrich Hartzer uint32_t rand, p; 1650927e1a1SLawrence Stewart 166674956e1SHenrich Hartzer rand = prng32(); 1670927e1a1SLawrence Stewart 1680927e1a1SLawrence Stewart if (qdly < V_chd_qthresh) { 1690927e1a1SLawrence Stewart chd_data->loss_compete = 0; 1700927e1a1SLawrence Stewart p = (((RANDOM_MAX / 100) * V_chd_pmax) / 1710927e1a1SLawrence Stewart (V_chd_qthresh - V_chd_qmin)) * 1720927e1a1SLawrence Stewart (qdly - V_chd_qmin); 1730927e1a1SLawrence Stewart } else { 1740927e1a1SLawrence Stewart if (qdly > V_chd_qthresh) { 1750927e1a1SLawrence Stewart p = (((RANDOM_MAX / 100) * V_chd_pmax) / 1760927e1a1SLawrence Stewart (maxqdly - V_chd_qthresh)) * 1770927e1a1SLawrence Stewart (maxqdly - qdly); 1780927e1a1SLawrence Stewart if (V_chd_loss_fair && rand < p) 1790927e1a1SLawrence Stewart chd_data->loss_compete = 1; 1800927e1a1SLawrence Stewart } else { 1810927e1a1SLawrence Stewart p = (RANDOM_MAX / 100) * V_chd_pmax; 1820927e1a1SLawrence Stewart chd_data->loss_compete = 0; 1830927e1a1SLawrence Stewart } 1840927e1a1SLawrence Stewart } 1850927e1a1SLawrence Stewart 1860927e1a1SLawrence Stewart return (rand < p); 1870927e1a1SLawrence Stewart } 1880927e1a1SLawrence Stewart 1890927e1a1SLawrence Stewart static __inline void 1900927e1a1SLawrence Stewart chd_window_increase(struct cc_var *ccv, int new_measurement) 1910927e1a1SLawrence Stewart { 1920927e1a1SLawrence Stewart struct chd *chd_data; 1930927e1a1SLawrence Stewart int incr; 194*22dcc812SRichard Scheffenegger uint32_t mss = tcp_fixed_maxseg(ccv->tp); 1950927e1a1SLawrence Stewart 1960927e1a1SLawrence Stewart chd_data = ccv->cc_data; 1970927e1a1SLawrence Stewart incr = 0; 1980927e1a1SLawrence Stewart 1990927e1a1SLawrence Stewart if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) { 2000927e1a1SLawrence Stewart /* Adapted from NewReno slow start. */ 2010927e1a1SLawrence Stewart if (V_tcp_do_rfc3465) { 2020927e1a1SLawrence Stewart /* In slow-start with ABC enabled. */ 2030927e1a1SLawrence Stewart if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) { 2040927e1a1SLawrence Stewart /* Not due to RTO. */ 2050927e1a1SLawrence Stewart incr = min(ccv->bytes_this_ack, 206*22dcc812SRichard Scheffenegger V_tcp_abc_l_var * mss); 2070927e1a1SLawrence Stewart } else { 2080927e1a1SLawrence Stewart /* Due to RTO. */ 209*22dcc812SRichard Scheffenegger incr = min(ccv->bytes_this_ack, mss); 2100927e1a1SLawrence Stewart } 2110927e1a1SLawrence Stewart } else 212*22dcc812SRichard Scheffenegger incr = mss; 2130927e1a1SLawrence Stewart 2140927e1a1SLawrence Stewart } else { /* Congestion avoidance. */ 2150927e1a1SLawrence Stewart if (V_tcp_do_rfc3465) { 2160927e1a1SLawrence Stewart if (ccv->flags & CCF_ABC_SENTAWND) { 2170927e1a1SLawrence Stewart ccv->flags &= ~CCF_ABC_SENTAWND; 218*22dcc812SRichard Scheffenegger incr = mss; 2190927e1a1SLawrence Stewart } 2200927e1a1SLawrence Stewart } else if (new_measurement) 221*22dcc812SRichard Scheffenegger incr = mss; 2220927e1a1SLawrence Stewart } 2230927e1a1SLawrence Stewart 2240927e1a1SLawrence Stewart if (chd_data->shadow_w > 0) { 2250927e1a1SLawrence Stewart /* Track NewReno window. */ 2260927e1a1SLawrence Stewart chd_data->shadow_w = min(chd_data->shadow_w + incr, 2270927e1a1SLawrence Stewart TCP_MAXWIN << CCV(ccv, snd_scale)); 2280927e1a1SLawrence Stewart } 2290927e1a1SLawrence Stewart 2300927e1a1SLawrence Stewart CCV(ccv,snd_cwnd) = min(CCV(ccv, snd_cwnd) + incr, 2310927e1a1SLawrence Stewart TCP_MAXWIN << CCV(ccv, snd_scale)); 2320927e1a1SLawrence Stewart } 2330927e1a1SLawrence Stewart 2340927e1a1SLawrence Stewart /* 2350927e1a1SLawrence Stewart * All ACK signals are used for timing measurements to determine delay-based 2360927e1a1SLawrence Stewart * congestion. However, window increases are only performed when 2370927e1a1SLawrence Stewart * ack_type == CC_ACK. 2380927e1a1SLawrence Stewart */ 2390927e1a1SLawrence Stewart static void 240f74352fbSRichard Scheffenegger chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type) 2410927e1a1SLawrence Stewart { 2420927e1a1SLawrence Stewart struct chd *chd_data; 2430927e1a1SLawrence Stewart struct ertt *e_t; 2440927e1a1SLawrence Stewart int backoff, new_measurement, qdly, rtt; 2450927e1a1SLawrence Stewart 246e68b3792SGleb Smirnoff e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); 2470927e1a1SLawrence Stewart chd_data = ccv->cc_data; 2480927e1a1SLawrence Stewart new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT; 2490927e1a1SLawrence Stewart backoff = qdly = 0; 2500927e1a1SLawrence Stewart 2510927e1a1SLawrence Stewart chd_data->maxrtt_in_rtt = imax(e_t->rtt, chd_data->maxrtt_in_rtt); 2520927e1a1SLawrence Stewart 2530927e1a1SLawrence Stewart if (new_measurement) { 2540927e1a1SLawrence Stewart /* 2550927e1a1SLawrence Stewart * There is a new per RTT measurement, so check to see if there 2560927e1a1SLawrence Stewart * is delay based congestion. 2570927e1a1SLawrence Stewart */ 2580927e1a1SLawrence Stewart rtt = V_chd_use_max ? chd_data->maxrtt_in_rtt : e_t->rtt; 2590927e1a1SLawrence Stewart chd_data->maxrtt_in_rtt = 0; 2600927e1a1SLawrence Stewart 2610927e1a1SLawrence Stewart if (rtt && e_t->minrtt && !IN_RECOVERY(CCV(ccv, t_flags))) { 2620927e1a1SLawrence Stewart qdly = rtt - e_t->minrtt; 2630927e1a1SLawrence Stewart if (qdly > V_chd_qmin) { 2640927e1a1SLawrence Stewart /* 2650927e1a1SLawrence Stewart * Probabilistic delay based congestion 2660927e1a1SLawrence Stewart * indication. 2670927e1a1SLawrence Stewart */ 2680927e1a1SLawrence Stewart backoff = should_backoff(qdly, 2690927e1a1SLawrence Stewart e_t->maxrtt - e_t->minrtt, chd_data); 2700927e1a1SLawrence Stewart } else 2710927e1a1SLawrence Stewart chd_data->loss_compete = 0; 2720927e1a1SLawrence Stewart } 2730927e1a1SLawrence Stewart /* Reset per RTT measurement flag to start a new measurement. */ 2740927e1a1SLawrence Stewart e_t->flags &= ~ERTT_NEW_MEASUREMENT; 2750927e1a1SLawrence Stewart } 2760927e1a1SLawrence Stewart 2770927e1a1SLawrence Stewart if (backoff) { 2780927e1a1SLawrence Stewart /* 2790927e1a1SLawrence Stewart * Update shadow_w before delay based backoff. 2800927e1a1SLawrence Stewart */ 2810927e1a1SLawrence Stewart if (chd_data->loss_compete || 2820927e1a1SLawrence Stewart qdly > chd_data->prev_backoff_qdly) { 2830927e1a1SLawrence Stewart /* 2840927e1a1SLawrence Stewart * Delay is higher than when we backed off previously, 2850927e1a1SLawrence Stewart * so it is possible that this flow is competing with 2860927e1a1SLawrence Stewart * loss based flows. 2870927e1a1SLawrence Stewart */ 2880927e1a1SLawrence Stewart chd_data->shadow_w = max(CCV(ccv, snd_cwnd), 2890927e1a1SLawrence Stewart chd_data->shadow_w); 2900927e1a1SLawrence Stewart } else { 2910927e1a1SLawrence Stewart /* 2920927e1a1SLawrence Stewart * Reset shadow_w, as it is probable that this flow is 2930927e1a1SLawrence Stewart * not competing with loss based flows at the moment. 2940927e1a1SLawrence Stewart */ 2950927e1a1SLawrence Stewart chd_data->shadow_w = 0; 2960927e1a1SLawrence Stewart } 2970927e1a1SLawrence Stewart 2980927e1a1SLawrence Stewart chd_data->prev_backoff_qdly = qdly; 2990927e1a1SLawrence Stewart /* 3000927e1a1SLawrence Stewart * Send delay-based congestion signal to the congestion signal 3010927e1a1SLawrence Stewart * handler. 3020927e1a1SLawrence Stewart */ 3030927e1a1SLawrence Stewart chd_cong_signal(ccv, CC_CHD_DELAY); 3040927e1a1SLawrence Stewart 3050927e1a1SLawrence Stewart } else if (ack_type == CC_ACK) 3060927e1a1SLawrence Stewart chd_window_increase(ccv, new_measurement); 3070927e1a1SLawrence Stewart } 3080927e1a1SLawrence Stewart 3090927e1a1SLawrence Stewart static void 3100927e1a1SLawrence Stewart chd_cb_destroy(struct cc_var *ccv) 3110927e1a1SLawrence Stewart { 312b8d60729SRandall Stewart free(ccv->cc_data, M_CC_MEM); 313b8d60729SRandall Stewart } 3140927e1a1SLawrence Stewart 315b8d60729SRandall Stewart size_t 316b8d60729SRandall Stewart chd_data_sz(void) 317b8d60729SRandall Stewart { 318b8d60729SRandall Stewart return (sizeof(struct chd)); 3190927e1a1SLawrence Stewart } 3200927e1a1SLawrence Stewart 3210927e1a1SLawrence Stewart static int 322b8d60729SRandall Stewart chd_cb_init(struct cc_var *ccv, void *ptr) 3230927e1a1SLawrence Stewart { 3240927e1a1SLawrence Stewart struct chd *chd_data; 3250927e1a1SLawrence Stewart 32600d3b744SMichael Tuexen INP_WLOCK_ASSERT(tptoinpcb(ccv->tp)); 327b8d60729SRandall Stewart if (ptr == NULL) { 328b8d60729SRandall Stewart chd_data = malloc(sizeof(struct chd), M_CC_MEM, M_NOWAIT); 3290927e1a1SLawrence Stewart if (chd_data == NULL) 3300927e1a1SLawrence Stewart return (ENOMEM); 331b8d60729SRandall Stewart } else 332b8d60729SRandall Stewart chd_data = ptr; 3330927e1a1SLawrence Stewart 3340927e1a1SLawrence Stewart chd_data->shadow_w = 0; 3350927e1a1SLawrence Stewart ccv->cc_data = chd_data; 3360927e1a1SLawrence Stewart 3370927e1a1SLawrence Stewart return (0); 3380927e1a1SLawrence Stewart } 3390927e1a1SLawrence Stewart 3400927e1a1SLawrence Stewart static void 341f74352fbSRichard Scheffenegger chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type) 3420927e1a1SLawrence Stewart { 3430927e1a1SLawrence Stewart struct ertt *e_t; 3440927e1a1SLawrence Stewart struct chd *chd_data; 3450927e1a1SLawrence Stewart int qdly; 3460927e1a1SLawrence Stewart 347e68b3792SGleb Smirnoff e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); 3480927e1a1SLawrence Stewart chd_data = ccv->cc_data; 3490927e1a1SLawrence Stewart qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; 3500927e1a1SLawrence Stewart 351f74352fbSRichard Scheffenegger switch((int)signal_type) { 3520927e1a1SLawrence Stewart case CC_CHD_DELAY: 3530927e1a1SLawrence Stewart chd_window_decrease(ccv); /* Set new ssthresh. */ 3540927e1a1SLawrence Stewart CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); 3550927e1a1SLawrence Stewart CCV(ccv, snd_recover) = CCV(ccv, snd_max); 3560927e1a1SLawrence Stewart ENTER_CONGRECOVERY(CCV(ccv, t_flags)); 3570927e1a1SLawrence Stewart break; 3580927e1a1SLawrence Stewart 3590927e1a1SLawrence Stewart case CC_NDUPACK: /* Packet loss. */ 3600927e1a1SLawrence Stewart /* 3610927e1a1SLawrence Stewart * Only react to loss as a congestion signal if qdly > 3620927e1a1SLawrence Stewart * V_chd_qthresh. If qdly is less than qthresh, presume that 3630927e1a1SLawrence Stewart * this is a non congestion related loss. If qdly is greater 3640927e1a1SLawrence Stewart * than qthresh, assume that we are competing with loss based 3650927e1a1SLawrence Stewart * tcp flows and restore window from any unnecessary backoffs, 3660927e1a1SLawrence Stewart * before the decrease. 3670927e1a1SLawrence Stewart */ 3680927e1a1SLawrence Stewart if (!IN_RECOVERY(CCV(ccv, t_flags)) && qdly > V_chd_qthresh) { 3690927e1a1SLawrence Stewart if (chd_data->loss_compete) { 3700927e1a1SLawrence Stewart CCV(ccv, snd_cwnd) = max(CCV(ccv, snd_cwnd), 3710927e1a1SLawrence Stewart chd_data->shadow_w); 3720927e1a1SLawrence Stewart } 3730927e1a1SLawrence Stewart chd_window_decrease(ccv); 3740927e1a1SLawrence Stewart } else { 3750927e1a1SLawrence Stewart /* 3760927e1a1SLawrence Stewart * This loss isn't congestion related, or already 3770927e1a1SLawrence Stewart * recovering from congestion. 3780927e1a1SLawrence Stewart */ 3790927e1a1SLawrence Stewart CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); 3800927e1a1SLawrence Stewart CCV(ccv, snd_recover) = CCV(ccv, snd_max); 3810927e1a1SLawrence Stewart } 3820927e1a1SLawrence Stewart 3830927e1a1SLawrence Stewart if (chd_data->shadow_w > 0) { 384*22dcc812SRichard Scheffenegger uint32_t mss = tcp_fixed_maxseg(ccv->tp); 3850927e1a1SLawrence Stewart chd_data->shadow_w = max(chd_data->shadow_w / 386*22dcc812SRichard Scheffenegger mss / 2, 2) * mss; 3870927e1a1SLawrence Stewart } 3880927e1a1SLawrence Stewart ENTER_FASTRECOVERY(CCV(ccv, t_flags)); 3890927e1a1SLawrence Stewart break; 3900927e1a1SLawrence Stewart 3910927e1a1SLawrence Stewart default: 392b8d60729SRandall Stewart newreno_cc_cong_signal(ccv, signal_type); 393f74352fbSRichard Scheffenegger break; 3940927e1a1SLawrence Stewart } 3950927e1a1SLawrence Stewart } 3960927e1a1SLawrence Stewart 3970927e1a1SLawrence Stewart static void 3980927e1a1SLawrence Stewart chd_conn_init(struct cc_var *ccv) 3990927e1a1SLawrence Stewart { 4000927e1a1SLawrence Stewart struct chd *chd_data; 4010927e1a1SLawrence Stewart 4020927e1a1SLawrence Stewart chd_data = ccv->cc_data; 4030927e1a1SLawrence Stewart chd_data->prev_backoff_qdly = 0; 4040927e1a1SLawrence Stewart chd_data->maxrtt_in_rtt = 0; 4050927e1a1SLawrence Stewart chd_data->loss_compete = 0; 4060927e1a1SLawrence Stewart /* 4070927e1a1SLawrence Stewart * Initialise the shadow_cwnd to be equal to snd_cwnd in case we are 4080927e1a1SLawrence Stewart * competing with loss based flows from the start. 4090927e1a1SLawrence Stewart */ 4100927e1a1SLawrence Stewart chd_data->shadow_w = CCV(ccv, snd_cwnd); 4110927e1a1SLawrence Stewart } 4120927e1a1SLawrence Stewart 4130927e1a1SLawrence Stewart static int 4140927e1a1SLawrence Stewart chd_mod_init(void) 4150927e1a1SLawrence Stewart { 4160927e1a1SLawrence Stewart 4170927e1a1SLawrence Stewart ertt_id = khelp_get_id("ertt"); 4180927e1a1SLawrence Stewart if (ertt_id <= 0) { 4190927e1a1SLawrence Stewart printf("%s: h_ertt module not found\n", __func__); 4200927e1a1SLawrence Stewart return (ENOENT); 4210927e1a1SLawrence Stewart } 4220927e1a1SLawrence Stewart return (0); 4230927e1a1SLawrence Stewart } 4240927e1a1SLawrence Stewart 4250927e1a1SLawrence Stewart static int 4260927e1a1SLawrence Stewart chd_loss_fair_handler(SYSCTL_HANDLER_ARGS) 4270927e1a1SLawrence Stewart { 4280927e1a1SLawrence Stewart int error; 4290927e1a1SLawrence Stewart uint32_t new; 4300927e1a1SLawrence Stewart 4310927e1a1SLawrence Stewart new = V_chd_loss_fair; 4320927e1a1SLawrence Stewart error = sysctl_handle_int(oidp, &new, 0, req); 4330927e1a1SLawrence Stewart if (error == 0 && req->newptr != NULL) { 434855acb84SBrooks Davis if (new > 1) 4350927e1a1SLawrence Stewart error = EINVAL; 4360927e1a1SLawrence Stewart else 4370927e1a1SLawrence Stewart V_chd_loss_fair = new; 4380927e1a1SLawrence Stewart } 4390927e1a1SLawrence Stewart 4400927e1a1SLawrence Stewart return (error); 4410927e1a1SLawrence Stewart } 4420927e1a1SLawrence Stewart 4430927e1a1SLawrence Stewart static int 4440927e1a1SLawrence Stewart chd_pmax_handler(SYSCTL_HANDLER_ARGS) 4450927e1a1SLawrence Stewart { 4460927e1a1SLawrence Stewart int error; 4470927e1a1SLawrence Stewart uint32_t new; 4480927e1a1SLawrence Stewart 4490927e1a1SLawrence Stewart new = V_chd_pmax; 4500927e1a1SLawrence Stewart error = sysctl_handle_int(oidp, &new, 0, req); 4510927e1a1SLawrence Stewart if (error == 0 && req->newptr != NULL) { 452855acb84SBrooks Davis if (new == 0 || new > 100) 4530927e1a1SLawrence Stewart error = EINVAL; 4540927e1a1SLawrence Stewart else 4550927e1a1SLawrence Stewart V_chd_pmax = new; 4560927e1a1SLawrence Stewart } 4570927e1a1SLawrence Stewart 4580927e1a1SLawrence Stewart return (error); 4590927e1a1SLawrence Stewart } 4600927e1a1SLawrence Stewart 4610927e1a1SLawrence Stewart static int 4620927e1a1SLawrence Stewart chd_qthresh_handler(SYSCTL_HANDLER_ARGS) 4630927e1a1SLawrence Stewart { 4640927e1a1SLawrence Stewart int error; 4650927e1a1SLawrence Stewart uint32_t new; 4660927e1a1SLawrence Stewart 4670927e1a1SLawrence Stewart new = V_chd_qthresh; 4680927e1a1SLawrence Stewart error = sysctl_handle_int(oidp, &new, 0, req); 4690927e1a1SLawrence Stewart if (error == 0 && req->newptr != NULL) { 470855acb84SBrooks Davis if (new <= V_chd_qmin) 4710927e1a1SLawrence Stewart error = EINVAL; 4720927e1a1SLawrence Stewart else 4730927e1a1SLawrence Stewart V_chd_qthresh = new; 4740927e1a1SLawrence Stewart } 4750927e1a1SLawrence Stewart 4760927e1a1SLawrence Stewart return (error); 4770927e1a1SLawrence Stewart } 4780927e1a1SLawrence Stewart 4790927e1a1SLawrence Stewart SYSCTL_DECL(_net_inet_tcp_cc_chd); 4807029da5cSPawel Biernacki SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, chd, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 4810927e1a1SLawrence Stewart "CAIA Hamilton delay-based congestion control related settings"); 4820927e1a1SLawrence Stewart 4836df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, loss_fair, 4847029da5cSPawel Biernacki CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 4856df8a710SGleb Smirnoff &VNET_NAME(chd_loss_fair), 1, &chd_loss_fair_handler, 4860927e1a1SLawrence Stewart "IU", "Flag to enable shadow window functionality."); 4870927e1a1SLawrence Stewart 4886df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, pmax, 4897029da5cSPawel Biernacki CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 4906df8a710SGleb Smirnoff &VNET_NAME(chd_pmax), 5, &chd_pmax_handler, 4910927e1a1SLawrence Stewart "IU", "Per RTT maximum backoff probability as a percentage"); 4920927e1a1SLawrence Stewart 4936df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, queue_threshold, 4947029da5cSPawel Biernacki CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 4956df8a710SGleb Smirnoff &VNET_NAME(chd_qthresh), 20, &chd_qthresh_handler, 4960927e1a1SLawrence Stewart "IU", "Queueing congestion threshold in ticks"); 4970927e1a1SLawrence Stewart 4986df8a710SGleb Smirnoff SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min, 4996df8a710SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_qmin), 5, 5000927e1a1SLawrence Stewart "Minimum queueing delay threshold in ticks"); 5010927e1a1SLawrence Stewart 5026df8a710SGleb Smirnoff SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max, 5036df8a710SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_use_max), 1, 5040927e1a1SLawrence Stewart "Use the maximum RTT seen within the measurement period (RTT) " 5050927e1a1SLawrence Stewart "as the basic delay measurement for the algorithm."); 5060927e1a1SLawrence Stewart 5070927e1a1SLawrence Stewart DECLARE_CC_MODULE(chd, &chd_cc_algo); 508b8d60729SRandall Stewart MODULE_VERSION(chd, 2); 5090927e1a1SLawrence Stewart MODULE_DEPEND(chd, ertt, 1, 1, 1); 510