1*a1d8c752Smaxv /* $NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $ */
24ae1f36dSjonathan
34ae1f36dSjonathan /*
44ae1f36dSjonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
54ae1f36dSjonathan * All rights reserved.
64ae1f36dSjonathan *
74ae1f36dSjonathan * This code is derived from software contributed to The NetBSD Foundation
84ae1f36dSjonathan * by Kentaro A. Kurahone.
94ae1f36dSjonathan *
104ae1f36dSjonathan * Redistribution and use in source and binary forms, with or without
114ae1f36dSjonathan * modification, are permitted provided that the following conditions
124ae1f36dSjonathan * are met:
134ae1f36dSjonathan * 1. Redistributions of source code must retain the above copyright
144ae1f36dSjonathan * notice, this list of conditions and the following disclaimer.
154ae1f36dSjonathan * 2. Redistributions in binary form must reproduce the above copyright
164ae1f36dSjonathan * notice, this list of conditions and the following disclaimer in the
174ae1f36dSjonathan * documentation and/or other materials provided with the distribution.
184ae1f36dSjonathan *
194ae1f36dSjonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
204ae1f36dSjonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
214ae1f36dSjonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
224ae1f36dSjonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
234ae1f36dSjonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
244ae1f36dSjonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
254ae1f36dSjonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
264ae1f36dSjonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
274ae1f36dSjonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
284ae1f36dSjonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
294ae1f36dSjonathan * POSSIBILITY OF SUCH DAMAGE.
304ae1f36dSjonathan */
314ae1f36dSjonathan
324ae1f36dSjonathan /*
334ae1f36dSjonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
344ae1f36dSjonathan * The Regents of the University of California. All rights reserved.
354ae1f36dSjonathan *
364ae1f36dSjonathan * Redistribution and use in source and binary forms, with or without
374ae1f36dSjonathan * modification, are permitted provided that the following conditions
384ae1f36dSjonathan * are met:
394ae1f36dSjonathan * 1. Redistributions of source code must retain the above copyright
404ae1f36dSjonathan * notice, this list of conditions and the following disclaimer.
414ae1f36dSjonathan * 2. Redistributions in binary form must reproduce the above copyright
424ae1f36dSjonathan * notice, this list of conditions and the following disclaimer in the
434ae1f36dSjonathan * documentation and/or other materials provided with the distribution.
444ae1f36dSjonathan * 4. Neither the name of the University nor the names of its contributors
454ae1f36dSjonathan * may be used to endorse or promote products derived from this software
464ae1f36dSjonathan * without specific prior written permission.
474ae1f36dSjonathan *
484ae1f36dSjonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
494ae1f36dSjonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
504ae1f36dSjonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
514ae1f36dSjonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
524ae1f36dSjonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
534ae1f36dSjonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
544ae1f36dSjonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
554ae1f36dSjonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
564ae1f36dSjonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
574ae1f36dSjonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
584ae1f36dSjonathan * SUCH DAMAGE.
594ae1f36dSjonathan *
604ae1f36dSjonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
614ae1f36dSjonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
624ae1f36dSjonathan */
634ae1f36dSjonathan
644ae1f36dSjonathan /*
654ae1f36dSjonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
664ae1f36dSjonathan *
674ae1f36dSjonathan * NRL grants permission for redistribution and use in source and binary
684ae1f36dSjonathan * forms, with or without modification, of the software and documentation
694ae1f36dSjonathan * created at NRL provided that the following conditions are met:
704ae1f36dSjonathan *
714ae1f36dSjonathan * 1. Redistributions of source code must retain the above copyright
724ae1f36dSjonathan * notice, this list of conditions and the following disclaimer.
734ae1f36dSjonathan * 2. Redistributions in binary form must reproduce the above copyright
744ae1f36dSjonathan * notice, this list of conditions and the following disclaimer in the
754ae1f36dSjonathan * documentation and/or other materials provided with the distribution.
764ae1f36dSjonathan * 3. All advertising materials mentioning features or use of this software
774ae1f36dSjonathan * must display the following acknowledgements:
784ae1f36dSjonathan * This product includes software developed by the University of
794ae1f36dSjonathan * California, Berkeley and its contributors.
804ae1f36dSjonathan * This product includes software developed at the Information
814ae1f36dSjonathan * Technology Division, US Naval Research Laboratory.
824ae1f36dSjonathan * 4. Neither the name of the NRL nor the names of its contributors
834ae1f36dSjonathan * may be used to endorse or promote products derived from this software
844ae1f36dSjonathan * without specific prior written permission.
854ae1f36dSjonathan *
864ae1f36dSjonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
874ae1f36dSjonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
884ae1f36dSjonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
894ae1f36dSjonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
904ae1f36dSjonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
914ae1f36dSjonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
924ae1f36dSjonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
934ae1f36dSjonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
944ae1f36dSjonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
954ae1f36dSjonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
964ae1f36dSjonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
974ae1f36dSjonathan *
984ae1f36dSjonathan * The views and conclusions contained in the software and documentation
994ae1f36dSjonathan * are those of the authors and should not be interpreted as representing
1004ae1f36dSjonathan * official policies, either expressed or implied, of the US Naval
1014ae1f36dSjonathan * Research Laboratory (NRL).
1024ae1f36dSjonathan */
1034ae1f36dSjonathan
1044ae1f36dSjonathan #include <sys/cdefs.h>
105*a1d8c752Smaxv __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $");
1064ae1f36dSjonathan
1071c4a50f1Spooka #ifdef _KERNEL_OPT
1084ae1f36dSjonathan #include "opt_inet.h"
1094ae1f36dSjonathan #include "opt_inet_csum.h"
1104ae1f36dSjonathan #include "opt_tcp_debug.h"
11180e1bbb7Syamt #include "opt_ddb.h"
1121c4a50f1Spooka #endif
1134ae1f36dSjonathan
1144ae1f36dSjonathan #include <sys/param.h>
1154ae1f36dSjonathan #include <sys/systm.h>
1164ae1f36dSjonathan #include <sys/mbuf.h>
1174ae1f36dSjonathan #include <sys/protosw.h>
1184ae1f36dSjonathan #include <sys/socket.h>
1194ae1f36dSjonathan #include <sys/socketvar.h>
1204ae1f36dSjonathan #include <sys/errno.h>
1214ae1f36dSjonathan #include <sys/syslog.h>
1224ae1f36dSjonathan #include <sys/pool.h>
1234ae1f36dSjonathan #include <sys/domain.h>
1244ae1f36dSjonathan #include <sys/kernel.h>
1254ae1f36dSjonathan
1264ae1f36dSjonathan #include <net/if.h>
1274ae1f36dSjonathan #include <net/route.h>
1284ae1f36dSjonathan #include <net/if_types.h>
1294ae1f36dSjonathan
1304ae1f36dSjonathan #include <netinet/in.h>
1314ae1f36dSjonathan #include <netinet/in_systm.h>
1324ae1f36dSjonathan #include <netinet/ip.h>
1334ae1f36dSjonathan #include <netinet/in_pcb.h>
1344ae1f36dSjonathan #include <netinet/in_var.h>
1354ae1f36dSjonathan #include <netinet/ip_var.h>
1364ae1f36dSjonathan
1374ae1f36dSjonathan #ifdef INET6
1384ae1f36dSjonathan #include <netinet/ip6.h>
1394ae1f36dSjonathan #include <netinet6/ip6_var.h>
1404ae1f36dSjonathan #include <netinet6/in6_pcb.h>
1414ae1f36dSjonathan #include <netinet6/ip6_var.h>
1424ae1f36dSjonathan #include <netinet6/in6_var.h>
1434ae1f36dSjonathan #include <netinet/icmp6.h>
1444ae1f36dSjonathan #endif
1454ae1f36dSjonathan
1464ae1f36dSjonathan #ifndef INET6
1474ae1f36dSjonathan #include <netinet/ip6.h>
1484ae1f36dSjonathan #endif
1494ae1f36dSjonathan
1504ae1f36dSjonathan #include <netinet/tcp.h>
1514ae1f36dSjonathan #include <netinet/tcp_fsm.h>
1524ae1f36dSjonathan #include <netinet/tcp_seq.h>
1534ae1f36dSjonathan #include <netinet/tcp_timer.h>
1544ae1f36dSjonathan #include <netinet/tcp_var.h>
1554ae1f36dSjonathan #include <netinet/tcp_debug.h>
1564ae1f36dSjonathan
1574ae1f36dSjonathan /* SACK block pool. */
1589d2101a2Spooka static struct pool sackhole_pool;
1599d2101a2Spooka
1609d2101a2Spooka void
tcp_sack_init(void)1614b50cb78Smatt tcp_sack_init(void)
1629d2101a2Spooka {
1639d2101a2Spooka
1649d2101a2Spooka pool_init(&sackhole_pool, sizeof(struct sackhole), 0, 0, 0,
1659d2101a2Spooka "sackholepl", NULL, IPL_SOFTNET);
1669d2101a2Spooka }
16794e70819Syamt
16894e70819Syamt static struct sackhole *
sack_allochole(struct tcpcb * tp)16994e70819Syamt sack_allochole(struct tcpcb *tp)
17094e70819Syamt {
17194e70819Syamt struct sackhole *hole;
17294e70819Syamt
17394e70819Syamt if (tp->snd_numholes >= tcp_sack_tp_maxholes ||
17494e70819Syamt tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
17594e70819Syamt return NULL;
17694e70819Syamt }
17794e70819Syamt hole = pool_get(&sackhole_pool, PR_NOWAIT);
17894e70819Syamt if (hole == NULL) {
17994e70819Syamt return NULL;
18094e70819Syamt }
18194e70819Syamt tp->snd_numholes++;
18294e70819Syamt tcp_sack_globalholes++;
18394e70819Syamt
18494e70819Syamt return hole;
18594e70819Syamt }
18694e70819Syamt
18794e70819Syamt static struct sackhole *
sack_inserthole(struct tcpcb * tp,tcp_seq start,tcp_seq end,struct sackhole * prev)18894e70819Syamt sack_inserthole(struct tcpcb *tp, tcp_seq start, tcp_seq end,
18994e70819Syamt struct sackhole *prev)
19094e70819Syamt {
19194e70819Syamt struct sackhole *hole;
19294e70819Syamt
19394e70819Syamt hole = sack_allochole(tp);
19494e70819Syamt if (hole == NULL) {
19594e70819Syamt return NULL;
19694e70819Syamt }
19794e70819Syamt hole->start = hole->rxmit = start;
19894e70819Syamt hole->end = end;
19994e70819Syamt if (prev != NULL) {
20094e70819Syamt TAILQ_INSERT_AFTER(&tp->snd_holes, prev, hole, sackhole_q);
20194e70819Syamt } else {
20294e70819Syamt TAILQ_INSERT_TAIL(&tp->snd_holes, hole, sackhole_q);
20394e70819Syamt }
20494e70819Syamt return hole;
20594e70819Syamt }
20694e70819Syamt
20794e70819Syamt static struct sackhole *
sack_removehole(struct tcpcb * tp,struct sackhole * hole)20894e70819Syamt sack_removehole(struct tcpcb *tp, struct sackhole *hole)
20994e70819Syamt {
21094e70819Syamt struct sackhole *next;
21194e70819Syamt
21294e70819Syamt next = TAILQ_NEXT(hole, sackhole_q);
21394e70819Syamt tp->snd_numholes--;
21494e70819Syamt tcp_sack_globalholes--;
21594e70819Syamt TAILQ_REMOVE(&tp->snd_holes, hole, sackhole_q);
21694e70819Syamt pool_put(&sackhole_pool, hole);
21794e70819Syamt
21894e70819Syamt return next;
21994e70819Syamt }
2204ae1f36dSjonathan
221c9cf49acSyamt /*
222c9cf49acSyamt * tcp_new_dsack: record the reception of a duplicated segment.
223c9cf49acSyamt */
224c9cf49acSyamt
2254ae1f36dSjonathan void
tcp_new_dsack(struct tcpcb * tp,tcp_seq seq,u_int32_t len)2264ae1f36dSjonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
2274ae1f36dSjonathan {
228c9cf49acSyamt
2294ae1f36dSjonathan if (TCP_SACK_ENABLED(tp)) {
2304ae1f36dSjonathan tp->rcv_dsack_block.left = seq;
2314ae1f36dSjonathan tp->rcv_dsack_block.right = seq + len;
2324ae1f36dSjonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
2334ae1f36dSjonathan }
2344ae1f36dSjonathan }
2354ae1f36dSjonathan
236c9cf49acSyamt /*
237c9cf49acSyamt * tcp_sack_option: parse the given SACK option and update the scoreboard.
238c9cf49acSyamt */
239c9cf49acSyamt
2404ae1f36dSjonathan void
tcp_sack_option(struct tcpcb * tp,const struct tcphdr * th,const u_char * cp,int optlen)241c31e2223Syamt tcp_sack_option(struct tcpcb *tp, const struct tcphdr *th, const u_char *cp,
242c31e2223Syamt int optlen)
2434ae1f36dSjonathan {
244ed8b840fSyamt struct sackblk
245ed8b840fSyamt t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
2464ae1f36dSjonathan struct sackblk *sack = NULL;
2474ae1f36dSjonathan struct sackhole *cur = NULL;
2484ae1f36dSjonathan struct sackhole *tmp = NULL;
249c31e2223Syamt const char *lp = cp + 2;
2505a0a4d9dSyamt int i, j, num_sack_blks;
2514ae1f36dSjonathan tcp_seq left, right, acked;
2524ae1f36dSjonathan
2534ae1f36dSjonathan /*
2540eb940bcSkurahone * If we aren't processing SACK responses, this is not an ACK
2550eb940bcSkurahone * or the peer sends us a sack option with invalid length, don't
2564ae1f36dSjonathan * update the scoreboard.
2574ae1f36dSjonathan */
2580eb940bcSkurahone if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
2590eb940bcSkurahone (optlen % 8 != 2 || optlen < 10)) {
2604ae1f36dSjonathan return;
2614ae1f36dSjonathan }
2624ae1f36dSjonathan
263f7707899Skurahone /*
264f7707899Skurahone * If we don't want any SACK holes to be allocated, just return.
265f7707899Skurahone */
266f7707899Skurahone if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
267f7707899Skurahone return;
268f7707899Skurahone }
269f7707899Skurahone
2700eb940bcSkurahone /* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
2710eb940bcSkurahone if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
2720eb940bcSkurahone return;
2730eb940bcSkurahone
2744ae1f36dSjonathan /*
2754ae1f36dSjonathan * Extract SACK blocks.
2764ae1f36dSjonathan *
2774ae1f36dSjonathan * Note that t_sack_block is sorted so that we only need to do
2784ae1f36dSjonathan * one pass over the sequence number space. (SACK "fast-path")
2794ae1f36dSjonathan */
2804ae1f36dSjonathan num_sack_blks = optlen / 8;
2814ae1f36dSjonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
28278f5b5f9Sreinoud for (i = 0; i < num_sack_blks; i++, lp += sizeof(uint32_t) * 2) {
28378f5b5f9Sreinoud memcpy(&left, lp, sizeof(uint32_t));
28478f5b5f9Sreinoud memcpy(&right, lp + sizeof(uint32_t), sizeof(uint32_t));
285fd5005e8Syamt left = ntohl(left);
286fd5005e8Syamt right = ntohl(right);
2874ae1f36dSjonathan
288b8690cc2Syamt if (SEQ_LEQ(right, acked) || SEQ_GT(right, tp->snd_max) ||
2894ae1f36dSjonathan SEQ_GEQ(left, right)) {
2904ae1f36dSjonathan /* SACK entry that's old, or invalid. */
2914ae1f36dSjonathan i--;
2924ae1f36dSjonathan num_sack_blks--;
2934ae1f36dSjonathan continue;
2944ae1f36dSjonathan }
2954ae1f36dSjonathan
2964ae1f36dSjonathan /* Insertion sort. */
2971152380aSyamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
2981152380aSyamt j--) {
2994ae1f36dSjonathan t_sack_block[j].left = t_sack_block[j - 1].left;
3004ae1f36dSjonathan t_sack_block[j].right = t_sack_block[j - 1].right;
3014ae1f36dSjonathan }
3024ae1f36dSjonathan t_sack_block[j].left = left;
3034ae1f36dSjonathan t_sack_block[j].right = right;
3044ae1f36dSjonathan }
3054ae1f36dSjonathan
3064ae1f36dSjonathan /* Update the scoreboard. */
3074ae1f36dSjonathan cur = TAILQ_FIRST(&tp->snd_holes);
3084ae1f36dSjonathan for (i = 0; i < num_sack_blks; i++) {
3094ae1f36dSjonathan sack = &t_sack_block[i];
3104ae1f36dSjonathan /*
3114ae1f36dSjonathan * FACK TCP. Update snd_fack so we can enter Fast
3124ae1f36dSjonathan * Recovery early.
3134ae1f36dSjonathan */
3144ae1f36dSjonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
3154ae1f36dSjonathan tp->snd_fack = sack->right;
3164ae1f36dSjonathan
3174ae1f36dSjonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
3184ae1f36dSjonathan /* First hole. */
31994e70819Syamt cur = sack_inserthole(tp, th->th_ack, sack->left, NULL);
3204ae1f36dSjonathan if (cur == NULL) {
3214ae1f36dSjonathan /* ENOBUFS, bail out*/
3224ae1f36dSjonathan return;
3234ae1f36dSjonathan }
3244ae1f36dSjonathan tp->rcv_lastsack = sack->right;
3254ae1f36dSjonathan continue; /* With next sack block */
3264ae1f36dSjonathan }
3274ae1f36dSjonathan
3284ae1f36dSjonathan /* Go through the list of holes. */
3294ae1f36dSjonathan while (cur) {
330ff614e11Syamt if (SEQ_LEQ(sack->right, cur->start))
3314ae1f36dSjonathan /* SACKs data before the current hole */
3324ae1f36dSjonathan break; /* No use going through more holes */
3334ae1f36dSjonathan
3344ae1f36dSjonathan if (SEQ_GEQ(sack->left, cur->end)) {
3354ae1f36dSjonathan /* SACKs data beyond the current hole */
3364ae1f36dSjonathan cur = TAILQ_NEXT(cur, sackhole_q);
3374ae1f36dSjonathan continue;
3384ae1f36dSjonathan }
3394ae1f36dSjonathan
3404ae1f36dSjonathan if (SEQ_LEQ(sack->left, cur->start)) {
3414ae1f36dSjonathan /* Data acks at least the beginning of hole */
3424ae1f36dSjonathan if (SEQ_GEQ(sack->right, cur->end)) {
3434ae1f36dSjonathan /* Acks entire hole, so delete hole */
34494e70819Syamt cur = sack_removehole(tp, cur);
3454ae1f36dSjonathan break;
3464ae1f36dSjonathan }
3474ae1f36dSjonathan
3484ae1f36dSjonathan /* Otherwise, move start of hole forward */
3494ae1f36dSjonathan cur->start = sack->right;
3504ae1f36dSjonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
3514ae1f36dSjonathan break;
3524ae1f36dSjonathan }
3534ae1f36dSjonathan
3544ae1f36dSjonathan if (SEQ_GEQ(sack->right, cur->end)) {
3554ae1f36dSjonathan /* Move end of hole backward. */
3564ae1f36dSjonathan cur->end = sack->left;
3574ae1f36dSjonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
3584ae1f36dSjonathan cur = TAILQ_NEXT(cur, sackhole_q);
3594ae1f36dSjonathan break;
3604ae1f36dSjonathan }
3614ae1f36dSjonathan
3624ae1f36dSjonathan if (SEQ_LT(cur->start, sack->left) &&
3634ae1f36dSjonathan SEQ_GT(cur->end, sack->right)) {
3644ae1f36dSjonathan /*
3654ae1f36dSjonathan * ACKs some data in middle of a hole; need to
3664ae1f36dSjonathan * split current hole
3674ae1f36dSjonathan */
36894e70819Syamt tmp = sack_inserthole(tp, sack->right, cur->end,
36994e70819Syamt cur);
3704ae1f36dSjonathan if (tmp == NULL) {
3714ae1f36dSjonathan return;
3724ae1f36dSjonathan }
3734ae1f36dSjonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
3744ae1f36dSjonathan cur->end = sack->left;
3754ae1f36dSjonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
376e55b9169Syamt cur = tmp;
3774ae1f36dSjonathan break;
3784ae1f36dSjonathan }
3794ae1f36dSjonathan }
3804ae1f36dSjonathan
3814ae1f36dSjonathan /* At this point, we have reached the tail of the list. */
3824ae1f36dSjonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
3834ae1f36dSjonathan /*
3844ae1f36dSjonathan * Need to append new hole at end.
3854ae1f36dSjonathan */
38694e70819Syamt cur = sack_inserthole(tp, tp->rcv_lastsack, sack->left,
38794e70819Syamt NULL);
38894e70819Syamt if (cur == NULL) {
389f7707899Skurahone return;
390f7707899Skurahone }
3914ae1f36dSjonathan }
392a0f802e2Syamt if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
393a0f802e2Syamt tp->rcv_lastsack = sack->right;
394a0f802e2Syamt }
3954ae1f36dSjonathan }
3964ae1f36dSjonathan }
3974ae1f36dSjonathan
398c9cf49acSyamt /*
399c9cf49acSyamt * tcp_del_sackholes: remove holes covered by a cumulative ACK.
400c9cf49acSyamt */
401c9cf49acSyamt
4024ae1f36dSjonathan void
tcp_del_sackholes(struct tcpcb * tp,const struct tcphdr * th)403c31e2223Syamt tcp_del_sackholes(struct tcpcb *tp, const struct tcphdr *th)
4044ae1f36dSjonathan {
4054ae1f36dSjonathan /* Max because this could be an older ack that just arrived. */
4064ae1f36dSjonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
4074ae1f36dSjonathan th->th_ack : tp->snd_una;
4084ae1f36dSjonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
4094ae1f36dSjonathan
4104ae1f36dSjonathan while (cur) {
4114ae1f36dSjonathan if (SEQ_LEQ(cur->end, lastack)) {
41294e70819Syamt cur = sack_removehole(tp, cur);
4134ae1f36dSjonathan } else if (SEQ_LT(cur->start, lastack)) {
4144ae1f36dSjonathan cur->start = lastack;
4154ae1f36dSjonathan if (SEQ_LT(cur->rxmit, cur->start))
4164ae1f36dSjonathan cur->rxmit = cur->start;
4174ae1f36dSjonathan break;
4184ae1f36dSjonathan } else
4194ae1f36dSjonathan break;
4204ae1f36dSjonathan }
4214ae1f36dSjonathan }
4224ae1f36dSjonathan
423c9cf49acSyamt /*
424c9cf49acSyamt * tcp_free_sackholes: clear the scoreboard.
425c9cf49acSyamt */
426c9cf49acSyamt
4274ae1f36dSjonathan void
tcp_free_sackholes(struct tcpcb * tp)4284ae1f36dSjonathan tcp_free_sackholes(struct tcpcb *tp)
4294ae1f36dSjonathan {
4304ae1f36dSjonathan struct sackhole *sack;
4314ae1f36dSjonathan
4324ae1f36dSjonathan /* Free up the SACK hole list. */
43394e70819Syamt while ((sack = TAILQ_FIRST(&tp->snd_holes)) != NULL) {
43494e70819Syamt sack_removehole(tp, sack);
4354ae1f36dSjonathan }
43694e70819Syamt KASSERT(tp->snd_numholes == 0);
4374ae1f36dSjonathan }
4384ae1f36dSjonathan
4394ae1f36dSjonathan /*
4404ae1f36dSjonathan * Returns pointer to a sackhole if there are any pending retransmissions;
4414ae1f36dSjonathan * NULL otherwise.
4424ae1f36dSjonathan */
4434ae1f36dSjonathan struct sackhole *
tcp_sack_output(struct tcpcb * tp,int * sack_bytes_rexmt)4444ae1f36dSjonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
4454ae1f36dSjonathan {
4464ae1f36dSjonathan struct sackhole *cur = NULL;
4474ae1f36dSjonathan
4484ae1f36dSjonathan if (!TCP_SACK_ENABLED(tp))
4494ae1f36dSjonathan return (NULL);
4504ae1f36dSjonathan
4514ae1f36dSjonathan *sack_bytes_rexmt = 0;
4524ae1f36dSjonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
4534ae1f36dSjonathan if (SEQ_LT(cur->rxmit, cur->end)) {
4541152380aSyamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
4551152380aSyamt /* old SACK hole */
4564ae1f36dSjonathan continue;
4574ae1f36dSjonathan }
4584ae1f36dSjonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
4594ae1f36dSjonathan break;
4604ae1f36dSjonathan }
4614ae1f36dSjonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
4624ae1f36dSjonathan }
4634ae1f36dSjonathan
4644ae1f36dSjonathan return (cur);
4654ae1f36dSjonathan }
4664ae1f36dSjonathan
4674ae1f36dSjonathan /*
4684ae1f36dSjonathan * After a timeout, the SACK list may be rebuilt. This SACK information
4694ae1f36dSjonathan * should be used to avoid retransmitting SACKed data. This function
4704ae1f36dSjonathan * traverses the SACK list to see if snd_nxt should be moved forward.
4714ae1f36dSjonathan */
4724ae1f36dSjonathan void
tcp_sack_adjust(struct tcpcb * tp)4734ae1f36dSjonathan tcp_sack_adjust(struct tcpcb *tp)
4744ae1f36dSjonathan {
4754ae1f36dSjonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
4764ae1f36dSjonathan struct sackhole *n = NULL;
4774ae1f36dSjonathan
4784ae1f36dSjonathan if (TAILQ_EMPTY(&tp->snd_holes))
4794ae1f36dSjonathan return; /* No holes */
4804ae1f36dSjonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
4814ae1f36dSjonathan return; /* We're already beyond any SACKed blocks */
4824ae1f36dSjonathan
4834ae1f36dSjonathan /*
4844ae1f36dSjonathan * Two cases for which we want to advance snd_nxt:
4854ae1f36dSjonathan * i) snd_nxt lies between end of one hole and beginning of another
4864ae1f36dSjonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
4874ae1f36dSjonathan */
4884ae1f36dSjonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
4894ae1f36dSjonathan if (SEQ_LT(tp->snd_nxt, cur->end))
4904ae1f36dSjonathan return;
4914ae1f36dSjonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
4924ae1f36dSjonathan cur = n;
4934ae1f36dSjonathan else {
4944ae1f36dSjonathan tp->snd_nxt = n->start;
4954ae1f36dSjonathan return;
4964ae1f36dSjonathan }
4974ae1f36dSjonathan }
4984ae1f36dSjonathan if (SEQ_LT(tp->snd_nxt, cur->end))
4994ae1f36dSjonathan return;
5004ae1f36dSjonathan tp->snd_nxt = tp->rcv_lastsack;
5014ae1f36dSjonathan
5024ae1f36dSjonathan return;
5034ae1f36dSjonathan }
5040446b7c3Syamt
505c9cf49acSyamt /*
506c9cf49acSyamt * tcp_sack_numblks: return the number of SACK blocks to send.
507c9cf49acSyamt */
508c9cf49acSyamt
5090446b7c3Syamt int
tcp_sack_numblks(const struct tcpcb * tp)510df05ca70Syamt tcp_sack_numblks(const struct tcpcb *tp)
5110446b7c3Syamt {
512df05ca70Syamt int numblks;
5130446b7c3Syamt
514df05ca70Syamt if (!TCP_SACK_ENABLED(tp)) {
5150446b7c3Syamt return 0;
5160446b7c3Syamt }
5170446b7c3Syamt
518df05ca70Syamt numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
519df05ca70Syamt tp->t_segqlen;
520df05ca70Syamt
521df05ca70Syamt if (numblks == 0) {
522df05ca70Syamt return 0;
523df05ca70Syamt }
524df05ca70Syamt
525df05ca70Syamt if (numblks > TCP_SACK_MAX) {
526df05ca70Syamt numblks = TCP_SACK_MAX;
527df05ca70Syamt }
528df05ca70Syamt
529df05ca70Syamt return numblks;
5300446b7c3Syamt }
53180e1bbb7Syamt
53280e1bbb7Syamt #if defined(DDB)
53380e1bbb7Syamt void sack_dump(const struct tcpcb *);
53480e1bbb7Syamt
53580e1bbb7Syamt void
sack_dump(const struct tcpcb * tp)53680e1bbb7Syamt sack_dump(const struct tcpcb *tp)
53780e1bbb7Syamt {
53880e1bbb7Syamt const struct sackhole *cur;
53980e1bbb7Syamt
54080e1bbb7Syamt printf("snd_una=%" PRIu32 ", snd_max=%" PRIu32 "\n",
54180e1bbb7Syamt tp->snd_una, tp->snd_max);
54280e1bbb7Syamt printf("rcv_lastsack=%" PRIu32 ", snd_fack=%" PRIu32 "\n",
54380e1bbb7Syamt tp->rcv_lastsack, tp->snd_fack);
54480e1bbb7Syamt printf("numholes=%d\n", tp->snd_numholes);
54580e1bbb7Syamt TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
54680e1bbb7Syamt printf("\t%" PRIu32 "-%" PRIu32 ", rxmit=%" PRIu32 "\n",
54780e1bbb7Syamt cur->start, cur->end, cur->rxmit);
54880e1bbb7Syamt }
54980e1bbb7Syamt }
55080e1bbb7Syamt #endif /* defined(DDB) */
551