xref: /netbsd-src/sys/netinet/tcp_sack.c (revision a1d8c752e75fcdde41a6259b4581cc96e7d480b3)
1*a1d8c752Smaxv /* $NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $ */
24ae1f36dSjonathan 
34ae1f36dSjonathan /*
44ae1f36dSjonathan  * Copyright (c) 2005 The NetBSD Foundation, Inc.
54ae1f36dSjonathan  * All rights reserved.
64ae1f36dSjonathan  *
74ae1f36dSjonathan  * This code is derived from software contributed to The NetBSD Foundation
84ae1f36dSjonathan  * by Kentaro A. Kurahone.
94ae1f36dSjonathan  *
104ae1f36dSjonathan  * Redistribution and use in source and binary forms, with or without
114ae1f36dSjonathan  * modification, are permitted provided that the following conditions
124ae1f36dSjonathan  * are met:
134ae1f36dSjonathan  * 1. Redistributions of source code must retain the above copyright
144ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer.
154ae1f36dSjonathan  * 2. Redistributions in binary form must reproduce the above copyright
164ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer in the
174ae1f36dSjonathan  *    documentation and/or other materials provided with the distribution.
184ae1f36dSjonathan  *
194ae1f36dSjonathan  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
204ae1f36dSjonathan  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
214ae1f36dSjonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
224ae1f36dSjonathan  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
234ae1f36dSjonathan  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
244ae1f36dSjonathan  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
254ae1f36dSjonathan  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
264ae1f36dSjonathan  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
274ae1f36dSjonathan  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
284ae1f36dSjonathan  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
294ae1f36dSjonathan  * POSSIBILITY OF SUCH DAMAGE.
304ae1f36dSjonathan  */
314ae1f36dSjonathan 
324ae1f36dSjonathan /*
334ae1f36dSjonathan  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
344ae1f36dSjonathan  *	The Regents of the University of California.  All rights reserved.
354ae1f36dSjonathan  *
364ae1f36dSjonathan  * Redistribution and use in source and binary forms, with or without
374ae1f36dSjonathan  * modification, are permitted provided that the following conditions
384ae1f36dSjonathan  * are met:
394ae1f36dSjonathan  * 1. Redistributions of source code must retain the above copyright
404ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer.
414ae1f36dSjonathan  * 2. Redistributions in binary form must reproduce the above copyright
424ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer in the
434ae1f36dSjonathan  *    documentation and/or other materials provided with the distribution.
444ae1f36dSjonathan  * 4. Neither the name of the University nor the names of its contributors
454ae1f36dSjonathan  *    may be used to endorse or promote products derived from this software
464ae1f36dSjonathan  *    without specific prior written permission.
474ae1f36dSjonathan  *
484ae1f36dSjonathan  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
494ae1f36dSjonathan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
504ae1f36dSjonathan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
514ae1f36dSjonathan  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
524ae1f36dSjonathan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
534ae1f36dSjonathan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
544ae1f36dSjonathan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
554ae1f36dSjonathan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
564ae1f36dSjonathan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
574ae1f36dSjonathan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
584ae1f36dSjonathan  * SUCH DAMAGE.
594ae1f36dSjonathan  *
604ae1f36dSjonathan  *	@(#)tcp_sack.c	8.12 (Berkeley) 5/24/95
614ae1f36dSjonathan  * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
624ae1f36dSjonathan  */
634ae1f36dSjonathan 
644ae1f36dSjonathan /*
654ae1f36dSjonathan  *	@@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
664ae1f36dSjonathan  *
674ae1f36dSjonathan  * NRL grants permission for redistribution and use in source and binary
684ae1f36dSjonathan  * forms, with or without modification, of the software and documentation
694ae1f36dSjonathan  * created at NRL provided that the following conditions are met:
704ae1f36dSjonathan  *
714ae1f36dSjonathan  * 1. Redistributions of source code must retain the above copyright
724ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer.
734ae1f36dSjonathan  * 2. Redistributions in binary form must reproduce the above copyright
744ae1f36dSjonathan  *    notice, this list of conditions and the following disclaimer in the
754ae1f36dSjonathan  *    documentation and/or other materials provided with the distribution.
764ae1f36dSjonathan  * 3. All advertising materials mentioning features or use of this software
774ae1f36dSjonathan  *    must display the following acknowledgements:
784ae1f36dSjonathan  *	This product includes software developed by the University of
794ae1f36dSjonathan  *	California, Berkeley and its contributors.
804ae1f36dSjonathan  *	This product includes software developed at the Information
814ae1f36dSjonathan  *	Technology Division, US Naval Research Laboratory.
824ae1f36dSjonathan  * 4. Neither the name of the NRL nor the names of its contributors
834ae1f36dSjonathan  *    may be used to endorse or promote products derived from this software
844ae1f36dSjonathan  *    without specific prior written permission.
854ae1f36dSjonathan  *
864ae1f36dSjonathan  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
874ae1f36dSjonathan  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
884ae1f36dSjonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
894ae1f36dSjonathan  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
904ae1f36dSjonathan  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
914ae1f36dSjonathan  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
924ae1f36dSjonathan  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
934ae1f36dSjonathan  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
944ae1f36dSjonathan  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
954ae1f36dSjonathan  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
964ae1f36dSjonathan  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
974ae1f36dSjonathan  *
984ae1f36dSjonathan  * The views and conclusions contained in the software and documentation
994ae1f36dSjonathan  * are those of the authors and should not be interpreted as representing
1004ae1f36dSjonathan  * official policies, either expressed or implied, of the US Naval
1014ae1f36dSjonathan  * Research Laboratory (NRL).
1024ae1f36dSjonathan  */
1034ae1f36dSjonathan 
1044ae1f36dSjonathan #include <sys/cdefs.h>
105*a1d8c752Smaxv __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $");
1064ae1f36dSjonathan 
1071c4a50f1Spooka #ifdef _KERNEL_OPT
1084ae1f36dSjonathan #include "opt_inet.h"
1094ae1f36dSjonathan #include "opt_inet_csum.h"
1104ae1f36dSjonathan #include "opt_tcp_debug.h"
11180e1bbb7Syamt #include "opt_ddb.h"
1121c4a50f1Spooka #endif
1134ae1f36dSjonathan 
1144ae1f36dSjonathan #include <sys/param.h>
1154ae1f36dSjonathan #include <sys/systm.h>
1164ae1f36dSjonathan #include <sys/mbuf.h>
1174ae1f36dSjonathan #include <sys/protosw.h>
1184ae1f36dSjonathan #include <sys/socket.h>
1194ae1f36dSjonathan #include <sys/socketvar.h>
1204ae1f36dSjonathan #include <sys/errno.h>
1214ae1f36dSjonathan #include <sys/syslog.h>
1224ae1f36dSjonathan #include <sys/pool.h>
1234ae1f36dSjonathan #include <sys/domain.h>
1244ae1f36dSjonathan #include <sys/kernel.h>
1254ae1f36dSjonathan 
1264ae1f36dSjonathan #include <net/if.h>
1274ae1f36dSjonathan #include <net/route.h>
1284ae1f36dSjonathan #include <net/if_types.h>
1294ae1f36dSjonathan 
1304ae1f36dSjonathan #include <netinet/in.h>
1314ae1f36dSjonathan #include <netinet/in_systm.h>
1324ae1f36dSjonathan #include <netinet/ip.h>
1334ae1f36dSjonathan #include <netinet/in_pcb.h>
1344ae1f36dSjonathan #include <netinet/in_var.h>
1354ae1f36dSjonathan #include <netinet/ip_var.h>
1364ae1f36dSjonathan 
1374ae1f36dSjonathan #ifdef INET6
1384ae1f36dSjonathan #include <netinet/ip6.h>
1394ae1f36dSjonathan #include <netinet6/ip6_var.h>
1404ae1f36dSjonathan #include <netinet6/in6_pcb.h>
1414ae1f36dSjonathan #include <netinet6/ip6_var.h>
1424ae1f36dSjonathan #include <netinet6/in6_var.h>
1434ae1f36dSjonathan #include <netinet/icmp6.h>
1444ae1f36dSjonathan #endif
1454ae1f36dSjonathan 
1464ae1f36dSjonathan #ifndef INET6
1474ae1f36dSjonathan #include <netinet/ip6.h>
1484ae1f36dSjonathan #endif
1494ae1f36dSjonathan 
1504ae1f36dSjonathan #include <netinet/tcp.h>
1514ae1f36dSjonathan #include <netinet/tcp_fsm.h>
1524ae1f36dSjonathan #include <netinet/tcp_seq.h>
1534ae1f36dSjonathan #include <netinet/tcp_timer.h>
1544ae1f36dSjonathan #include <netinet/tcp_var.h>
1554ae1f36dSjonathan #include <netinet/tcp_debug.h>
1564ae1f36dSjonathan 
1574ae1f36dSjonathan /* SACK block pool. */
1589d2101a2Spooka static struct pool sackhole_pool;
1599d2101a2Spooka 
1609d2101a2Spooka void
tcp_sack_init(void)1614b50cb78Smatt tcp_sack_init(void)
1629d2101a2Spooka {
1639d2101a2Spooka 
1649d2101a2Spooka 	pool_init(&sackhole_pool, sizeof(struct sackhole), 0, 0, 0,
1659d2101a2Spooka 	    "sackholepl", NULL, IPL_SOFTNET);
1669d2101a2Spooka }
16794e70819Syamt 
16894e70819Syamt static struct sackhole *
sack_allochole(struct tcpcb * tp)16994e70819Syamt sack_allochole(struct tcpcb *tp)
17094e70819Syamt {
17194e70819Syamt 	struct sackhole *hole;
17294e70819Syamt 
17394e70819Syamt 	if (tp->snd_numholes >= tcp_sack_tp_maxholes ||
17494e70819Syamt 	    tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
17594e70819Syamt 		return NULL;
17694e70819Syamt 	}
17794e70819Syamt 	hole = pool_get(&sackhole_pool, PR_NOWAIT);
17894e70819Syamt 	if (hole == NULL) {
17994e70819Syamt 		return NULL;
18094e70819Syamt 	}
18194e70819Syamt 	tp->snd_numholes++;
18294e70819Syamt 	tcp_sack_globalholes++;
18394e70819Syamt 
18494e70819Syamt 	return hole;
18594e70819Syamt }
18694e70819Syamt 
18794e70819Syamt static struct sackhole *
sack_inserthole(struct tcpcb * tp,tcp_seq start,tcp_seq end,struct sackhole * prev)18894e70819Syamt sack_inserthole(struct tcpcb *tp, tcp_seq start, tcp_seq end,
18994e70819Syamt     struct sackhole *prev)
19094e70819Syamt {
19194e70819Syamt 	struct sackhole *hole;
19294e70819Syamt 
19394e70819Syamt 	hole = sack_allochole(tp);
19494e70819Syamt 	if (hole == NULL) {
19594e70819Syamt 		return NULL;
19694e70819Syamt 	}
19794e70819Syamt 	hole->start = hole->rxmit = start;
19894e70819Syamt 	hole->end = end;
19994e70819Syamt 	if (prev != NULL) {
20094e70819Syamt 		TAILQ_INSERT_AFTER(&tp->snd_holes, prev, hole, sackhole_q);
20194e70819Syamt 	} else {
20294e70819Syamt 		TAILQ_INSERT_TAIL(&tp->snd_holes, hole, sackhole_q);
20394e70819Syamt 	}
20494e70819Syamt 	return hole;
20594e70819Syamt }
20694e70819Syamt 
20794e70819Syamt static struct sackhole *
sack_removehole(struct tcpcb * tp,struct sackhole * hole)20894e70819Syamt sack_removehole(struct tcpcb *tp, struct sackhole *hole)
20994e70819Syamt {
21094e70819Syamt 	struct sackhole *next;
21194e70819Syamt 
21294e70819Syamt 	next = TAILQ_NEXT(hole, sackhole_q);
21394e70819Syamt 	tp->snd_numholes--;
21494e70819Syamt 	tcp_sack_globalholes--;
21594e70819Syamt 	TAILQ_REMOVE(&tp->snd_holes, hole, sackhole_q);
21694e70819Syamt 	pool_put(&sackhole_pool, hole);
21794e70819Syamt 
21894e70819Syamt 	return next;
21994e70819Syamt }
2204ae1f36dSjonathan 
221c9cf49acSyamt /*
222c9cf49acSyamt  * tcp_new_dsack: record the reception of a duplicated segment.
223c9cf49acSyamt  */
224c9cf49acSyamt 
2254ae1f36dSjonathan void
tcp_new_dsack(struct tcpcb * tp,tcp_seq seq,u_int32_t len)2264ae1f36dSjonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
2274ae1f36dSjonathan {
228c9cf49acSyamt 
2294ae1f36dSjonathan 	if (TCP_SACK_ENABLED(tp)) {
2304ae1f36dSjonathan 		tp->rcv_dsack_block.left = seq;
2314ae1f36dSjonathan 		tp->rcv_dsack_block.right = seq + len;
2324ae1f36dSjonathan 		tp->rcv_sack_flags |= TCPSACK_HAVED;
2334ae1f36dSjonathan 	}
2344ae1f36dSjonathan }
2354ae1f36dSjonathan 
236c9cf49acSyamt /*
237c9cf49acSyamt  * tcp_sack_option: parse the given SACK option and update the scoreboard.
238c9cf49acSyamt  */
239c9cf49acSyamt 
2404ae1f36dSjonathan void
tcp_sack_option(struct tcpcb * tp,const struct tcphdr * th,const u_char * cp,int optlen)241c31e2223Syamt tcp_sack_option(struct tcpcb *tp, const struct tcphdr *th, const u_char *cp,
242c31e2223Syamt     int optlen)
2434ae1f36dSjonathan {
244ed8b840fSyamt 	struct sackblk
245ed8b840fSyamt 	    t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
2464ae1f36dSjonathan 	struct sackblk *sack = NULL;
2474ae1f36dSjonathan 	struct sackhole *cur = NULL;
2484ae1f36dSjonathan 	struct sackhole *tmp = NULL;
249c31e2223Syamt 	const char *lp = cp + 2;
2505a0a4d9dSyamt 	int i, j, num_sack_blks;
2514ae1f36dSjonathan 	tcp_seq left, right, acked;
2524ae1f36dSjonathan 
2534ae1f36dSjonathan 	/*
2540eb940bcSkurahone 	 * If we aren't processing SACK responses, this is not an ACK
2550eb940bcSkurahone 	 * or the peer sends us a sack option with invalid length, don't
2564ae1f36dSjonathan 	 * update the scoreboard.
2574ae1f36dSjonathan 	 */
2580eb940bcSkurahone 	if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
2590eb940bcSkurahone 			(optlen % 8 != 2 || optlen < 10)) {
2604ae1f36dSjonathan 		return;
2614ae1f36dSjonathan 	}
2624ae1f36dSjonathan 
263f7707899Skurahone 	/*
264f7707899Skurahone 	 * If we don't want any SACK holes to be allocated, just return.
265f7707899Skurahone 	 */
266f7707899Skurahone 	if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
267f7707899Skurahone 		return;
268f7707899Skurahone 	}
269f7707899Skurahone 
2700eb940bcSkurahone 	/* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
2710eb940bcSkurahone 	if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
2720eb940bcSkurahone 		return;
2730eb940bcSkurahone 
2744ae1f36dSjonathan 	/*
2754ae1f36dSjonathan 	 * Extract SACK blocks.
2764ae1f36dSjonathan 	 *
2774ae1f36dSjonathan 	 * Note that t_sack_block is sorted so that we only need to do
2784ae1f36dSjonathan 	 * one pass over the sequence number space. (SACK "fast-path")
2794ae1f36dSjonathan 	 */
2804ae1f36dSjonathan 	num_sack_blks = optlen / 8;
2814ae1f36dSjonathan 	acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
28278f5b5f9Sreinoud 	for (i = 0; i < num_sack_blks; i++, lp += sizeof(uint32_t) * 2) {
28378f5b5f9Sreinoud 		memcpy(&left, lp, sizeof(uint32_t));
28478f5b5f9Sreinoud 		memcpy(&right, lp + sizeof(uint32_t), sizeof(uint32_t));
285fd5005e8Syamt 		left = ntohl(left);
286fd5005e8Syamt 		right = ntohl(right);
2874ae1f36dSjonathan 
288b8690cc2Syamt 		if (SEQ_LEQ(right, acked) || SEQ_GT(right, tp->snd_max) ||
2894ae1f36dSjonathan 		    SEQ_GEQ(left, right)) {
2904ae1f36dSjonathan 			/* SACK entry that's old, or invalid. */
2914ae1f36dSjonathan 			i--;
2924ae1f36dSjonathan 			num_sack_blks--;
2934ae1f36dSjonathan 			continue;
2944ae1f36dSjonathan 		}
2954ae1f36dSjonathan 
2964ae1f36dSjonathan 		/* Insertion sort. */
2971152380aSyamt 		for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
2981152380aSyamt 		    j--) {
2994ae1f36dSjonathan 			t_sack_block[j].left = t_sack_block[j - 1].left;
3004ae1f36dSjonathan 			t_sack_block[j].right = t_sack_block[j - 1].right;
3014ae1f36dSjonathan 		}
3024ae1f36dSjonathan 		t_sack_block[j].left = left;
3034ae1f36dSjonathan 		t_sack_block[j].right = right;
3044ae1f36dSjonathan 	}
3054ae1f36dSjonathan 
3064ae1f36dSjonathan 	/* Update the scoreboard. */
3074ae1f36dSjonathan 	cur = TAILQ_FIRST(&tp->snd_holes);
3084ae1f36dSjonathan 	for (i = 0; i < num_sack_blks; i++) {
3094ae1f36dSjonathan 		sack = &t_sack_block[i];
3104ae1f36dSjonathan 		/*
3114ae1f36dSjonathan 		 * FACK TCP.  Update snd_fack so we can enter Fast
3124ae1f36dSjonathan 		 * Recovery early.
3134ae1f36dSjonathan 		 */
3144ae1f36dSjonathan 		if (SEQ_GEQ(sack->right, tp->snd_fack))
3154ae1f36dSjonathan 			tp->snd_fack = sack->right;
3164ae1f36dSjonathan 
3174ae1f36dSjonathan 		if (TAILQ_EMPTY(&tp->snd_holes)) {
3184ae1f36dSjonathan 			/* First hole. */
31994e70819Syamt 			cur = sack_inserthole(tp, th->th_ack, sack->left, NULL);
3204ae1f36dSjonathan 			if (cur == NULL) {
3214ae1f36dSjonathan 				/* ENOBUFS, bail out*/
3224ae1f36dSjonathan 				return;
3234ae1f36dSjonathan 			}
3244ae1f36dSjonathan 			tp->rcv_lastsack = sack->right;
3254ae1f36dSjonathan 			continue; /* With next sack block */
3264ae1f36dSjonathan 		}
3274ae1f36dSjonathan 
3284ae1f36dSjonathan 		/* Go through the list of holes. */
3294ae1f36dSjonathan 		while (cur) {
330ff614e11Syamt 			if (SEQ_LEQ(sack->right, cur->start))
3314ae1f36dSjonathan 				/* SACKs data before the current hole */
3324ae1f36dSjonathan 				break; /* No use going through more holes */
3334ae1f36dSjonathan 
3344ae1f36dSjonathan 			if (SEQ_GEQ(sack->left, cur->end)) {
3354ae1f36dSjonathan 				/* SACKs data beyond the current hole */
3364ae1f36dSjonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
3374ae1f36dSjonathan 				continue;
3384ae1f36dSjonathan 			}
3394ae1f36dSjonathan 
3404ae1f36dSjonathan 			if (SEQ_LEQ(sack->left, cur->start)) {
3414ae1f36dSjonathan 				/* Data acks at least the beginning of hole */
3424ae1f36dSjonathan 				if (SEQ_GEQ(sack->right, cur->end)) {
3434ae1f36dSjonathan 					/* Acks entire hole, so delete hole */
34494e70819Syamt 					cur = sack_removehole(tp, cur);
3454ae1f36dSjonathan 					break;
3464ae1f36dSjonathan 				}
3474ae1f36dSjonathan 
3484ae1f36dSjonathan 				/* Otherwise, move start of hole forward */
3494ae1f36dSjonathan 				cur->start = sack->right;
3504ae1f36dSjonathan 				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
3514ae1f36dSjonathan 				break;
3524ae1f36dSjonathan 			}
3534ae1f36dSjonathan 
3544ae1f36dSjonathan 			if (SEQ_GEQ(sack->right, cur->end)) {
3554ae1f36dSjonathan 				/* Move end of hole backward. */
3564ae1f36dSjonathan 				cur->end = sack->left;
3574ae1f36dSjonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
3584ae1f36dSjonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
3594ae1f36dSjonathan 				break;
3604ae1f36dSjonathan 			}
3614ae1f36dSjonathan 
3624ae1f36dSjonathan 			if (SEQ_LT(cur->start, sack->left) &&
3634ae1f36dSjonathan 			    SEQ_GT(cur->end, sack->right)) {
3644ae1f36dSjonathan 				/*
3654ae1f36dSjonathan 				 * ACKs some data in middle of a hole; need to
3664ae1f36dSjonathan 				 * split current hole
3674ae1f36dSjonathan 				 */
36894e70819Syamt 				tmp = sack_inserthole(tp, sack->right, cur->end,
36994e70819Syamt 				    cur);
3704ae1f36dSjonathan 				if (tmp == NULL) {
3714ae1f36dSjonathan 					return;
3724ae1f36dSjonathan 				}
3734ae1f36dSjonathan 				tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
3744ae1f36dSjonathan 				cur->end = sack->left;
3754ae1f36dSjonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
376e55b9169Syamt 				cur = tmp;
3774ae1f36dSjonathan 				break;
3784ae1f36dSjonathan 			}
3794ae1f36dSjonathan 		}
3804ae1f36dSjonathan 
3814ae1f36dSjonathan 		/* At this point, we have reached the tail of the list. */
3824ae1f36dSjonathan 		if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
3834ae1f36dSjonathan 			/*
3844ae1f36dSjonathan 			 * Need to append new hole at end.
3854ae1f36dSjonathan 			 */
38694e70819Syamt 			cur = sack_inserthole(tp, tp->rcv_lastsack, sack->left,
38794e70819Syamt 			    NULL);
38894e70819Syamt 			if (cur == NULL) {
389f7707899Skurahone 				return;
390f7707899Skurahone 			}
3914ae1f36dSjonathan 		}
392a0f802e2Syamt 		if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
393a0f802e2Syamt 			tp->rcv_lastsack = sack->right;
394a0f802e2Syamt 		}
3954ae1f36dSjonathan 	}
3964ae1f36dSjonathan }
3974ae1f36dSjonathan 
398c9cf49acSyamt /*
399c9cf49acSyamt  * tcp_del_sackholes: remove holes covered by a cumulative ACK.
400c9cf49acSyamt  */
401c9cf49acSyamt 
4024ae1f36dSjonathan void
tcp_del_sackholes(struct tcpcb * tp,const struct tcphdr * th)403c31e2223Syamt tcp_del_sackholes(struct tcpcb *tp, const struct tcphdr *th)
4044ae1f36dSjonathan {
4054ae1f36dSjonathan 	/* Max because this could be an older ack that just arrived. */
4064ae1f36dSjonathan 	tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
4074ae1f36dSjonathan 		th->th_ack : tp->snd_una;
4084ae1f36dSjonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
4094ae1f36dSjonathan 
4104ae1f36dSjonathan 	while (cur) {
4114ae1f36dSjonathan 		if (SEQ_LEQ(cur->end, lastack)) {
41294e70819Syamt 			cur = sack_removehole(tp, cur);
4134ae1f36dSjonathan 		} else if (SEQ_LT(cur->start, lastack)) {
4144ae1f36dSjonathan 			cur->start = lastack;
4154ae1f36dSjonathan 			if (SEQ_LT(cur->rxmit, cur->start))
4164ae1f36dSjonathan 				cur->rxmit = cur->start;
4174ae1f36dSjonathan 			break;
4184ae1f36dSjonathan 		} else
4194ae1f36dSjonathan 			break;
4204ae1f36dSjonathan 	}
4214ae1f36dSjonathan }
4224ae1f36dSjonathan 
423c9cf49acSyamt /*
424c9cf49acSyamt  * tcp_free_sackholes: clear the scoreboard.
425c9cf49acSyamt  */
426c9cf49acSyamt 
4274ae1f36dSjonathan void
tcp_free_sackholes(struct tcpcb * tp)4284ae1f36dSjonathan tcp_free_sackholes(struct tcpcb *tp)
4294ae1f36dSjonathan {
4304ae1f36dSjonathan 	struct sackhole *sack;
4314ae1f36dSjonathan 
4324ae1f36dSjonathan 	/* Free up the SACK hole list. */
43394e70819Syamt 	while ((sack = TAILQ_FIRST(&tp->snd_holes)) != NULL) {
43494e70819Syamt 		sack_removehole(tp, sack);
4354ae1f36dSjonathan 	}
43694e70819Syamt 	KASSERT(tp->snd_numholes == 0);
4374ae1f36dSjonathan }
4384ae1f36dSjonathan 
4394ae1f36dSjonathan /*
4404ae1f36dSjonathan  * Returns pointer to a sackhole if there are any pending retransmissions;
4414ae1f36dSjonathan  * NULL otherwise.
4424ae1f36dSjonathan  */
4434ae1f36dSjonathan struct sackhole *
tcp_sack_output(struct tcpcb * tp,int * sack_bytes_rexmt)4444ae1f36dSjonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
4454ae1f36dSjonathan {
4464ae1f36dSjonathan 	struct sackhole *cur = NULL;
4474ae1f36dSjonathan 
4484ae1f36dSjonathan 	if (!TCP_SACK_ENABLED(tp))
4494ae1f36dSjonathan 		return (NULL);
4504ae1f36dSjonathan 
4514ae1f36dSjonathan 	*sack_bytes_rexmt = 0;
4524ae1f36dSjonathan 	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
4534ae1f36dSjonathan 		if (SEQ_LT(cur->rxmit, cur->end)) {
4541152380aSyamt 			if (SEQ_LT(cur->rxmit, tp->snd_una)) {
4551152380aSyamt 				/* old SACK hole */
4564ae1f36dSjonathan 				continue;
4574ae1f36dSjonathan 			}
4584ae1f36dSjonathan 			*sack_bytes_rexmt += (cur->rxmit - cur->start);
4594ae1f36dSjonathan 			break;
4604ae1f36dSjonathan 		}
4614ae1f36dSjonathan 		*sack_bytes_rexmt += (cur->rxmit - cur->start);
4624ae1f36dSjonathan 	}
4634ae1f36dSjonathan 
4644ae1f36dSjonathan 	return (cur);
4654ae1f36dSjonathan }
4664ae1f36dSjonathan 
4674ae1f36dSjonathan /*
4684ae1f36dSjonathan  * After a timeout, the SACK list may be rebuilt.  This SACK information
4694ae1f36dSjonathan  * should be used to avoid retransmitting SACKed data.  This function
4704ae1f36dSjonathan  * traverses the SACK list to see if snd_nxt should be moved forward.
4714ae1f36dSjonathan  */
4724ae1f36dSjonathan void
tcp_sack_adjust(struct tcpcb * tp)4734ae1f36dSjonathan tcp_sack_adjust(struct tcpcb *tp)
4744ae1f36dSjonathan {
4754ae1f36dSjonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
4764ae1f36dSjonathan 	struct sackhole *n = NULL;
4774ae1f36dSjonathan 
4784ae1f36dSjonathan 	if (TAILQ_EMPTY(&tp->snd_holes))
4794ae1f36dSjonathan 		return; /* No holes */
4804ae1f36dSjonathan 	if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
4814ae1f36dSjonathan 		return; /* We're already beyond any SACKed blocks */
4824ae1f36dSjonathan 
4834ae1f36dSjonathan 	/*
4844ae1f36dSjonathan 	 * Two cases for which we want to advance snd_nxt:
4854ae1f36dSjonathan 	 * i) snd_nxt lies between end of one hole and beginning of another
4864ae1f36dSjonathan 	 * ii) snd_nxt lies between end of last hole and rcv_lastsack
4874ae1f36dSjonathan 	 */
4884ae1f36dSjonathan 	while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
4894ae1f36dSjonathan 		if (SEQ_LT(tp->snd_nxt, cur->end))
4904ae1f36dSjonathan 			return;
4914ae1f36dSjonathan 		if (SEQ_GEQ(tp->snd_nxt, n->start))
4924ae1f36dSjonathan 			cur = n;
4934ae1f36dSjonathan 		else {
4944ae1f36dSjonathan 			tp->snd_nxt = n->start;
4954ae1f36dSjonathan 			return;
4964ae1f36dSjonathan 		}
4974ae1f36dSjonathan 	}
4984ae1f36dSjonathan 	if (SEQ_LT(tp->snd_nxt, cur->end))
4994ae1f36dSjonathan 		return;
5004ae1f36dSjonathan 	tp->snd_nxt = tp->rcv_lastsack;
5014ae1f36dSjonathan 
5024ae1f36dSjonathan 	return;
5034ae1f36dSjonathan }
5040446b7c3Syamt 
505c9cf49acSyamt /*
506c9cf49acSyamt  * tcp_sack_numblks: return the number of SACK blocks to send.
507c9cf49acSyamt  */
508c9cf49acSyamt 
5090446b7c3Syamt int
tcp_sack_numblks(const struct tcpcb * tp)510df05ca70Syamt tcp_sack_numblks(const struct tcpcb *tp)
5110446b7c3Syamt {
512df05ca70Syamt 	int numblks;
5130446b7c3Syamt 
514df05ca70Syamt 	if (!TCP_SACK_ENABLED(tp)) {
5150446b7c3Syamt 		return 0;
5160446b7c3Syamt 	}
5170446b7c3Syamt 
518df05ca70Syamt 	numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
519df05ca70Syamt 	    tp->t_segqlen;
520df05ca70Syamt 
521df05ca70Syamt 	if (numblks == 0) {
522df05ca70Syamt 		return 0;
523df05ca70Syamt 	}
524df05ca70Syamt 
525df05ca70Syamt 	if (numblks > TCP_SACK_MAX) {
526df05ca70Syamt 		numblks = TCP_SACK_MAX;
527df05ca70Syamt 	}
528df05ca70Syamt 
529df05ca70Syamt 	return numblks;
5300446b7c3Syamt }
53180e1bbb7Syamt 
53280e1bbb7Syamt #if defined(DDB)
53380e1bbb7Syamt void sack_dump(const struct tcpcb *);
53480e1bbb7Syamt 
53580e1bbb7Syamt void
sack_dump(const struct tcpcb * tp)53680e1bbb7Syamt sack_dump(const struct tcpcb *tp)
53780e1bbb7Syamt {
53880e1bbb7Syamt 	const struct sackhole *cur;
53980e1bbb7Syamt 
54080e1bbb7Syamt 	printf("snd_una=%" PRIu32 ", snd_max=%" PRIu32 "\n",
54180e1bbb7Syamt 	    tp->snd_una, tp->snd_max);
54280e1bbb7Syamt 	printf("rcv_lastsack=%" PRIu32 ", snd_fack=%" PRIu32 "\n",
54380e1bbb7Syamt 	    tp->rcv_lastsack, tp->snd_fack);
54480e1bbb7Syamt 	printf("numholes=%d\n", tp->snd_numholes);
54580e1bbb7Syamt 	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
54680e1bbb7Syamt 		printf("\t%" PRIu32 "-%" PRIu32 ", rxmit=%" PRIu32 "\n",
54780e1bbb7Syamt 		    cur->start, cur->end, cur->rxmit);
54880e1bbb7Syamt 	}
54980e1bbb7Syamt }
55080e1bbb7Syamt #endif /* defined(DDB) */
551