xref: /openbsd-src/sys/netinet/tcp_input.c (revision 1fc27e414118cd8922c6b93fbaeb7a5246bfd593)
1 /*	$OpenBSD: tcp_input.c,v 1.57 2000/02/21 21:42:13 provos Exp $	*/
2 /*	$NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)tcp_input.c	8.5 (Berkeley) 4/10/94
37  */
38 
39 /*
40 %%% portions-copyright-nrl-95
41 Portions of this software are Copyright 1995-1998 by Randall Atkinson,
42 Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights
43 Reserved. All rights under this copyright have been assigned to the US
44 Naval Research Laboratory (NRL). The NRL Copyright Notice and License
45 Agreement Version 1.1 (January 17, 1995) applies to these portions of the
46 software.
47 You should have received a copy of the license with this software. If you
48 didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>.
49 */
50 
51 #ifndef TUBA_INCLUDE
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/protosw.h>
57 #include <sys/socket.h>
58 #include <sys/socketvar.h>
59 #include <sys/errno.h>
60 
61 #include <net/if.h>
62 #include <net/route.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip.h>
67 #include <netinet/in_pcb.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_fsm.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/tcp_timer.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcpip.h>
75 #include <netinet/tcp_debug.h>
76 #include <dev/rndvar.h>
77 #include <machine/stdarg.h>
78 #include <sys/md5k.h>
79 
80 #ifdef IPSEC
81 #include <netinet/ip_ipsp.h>
82 #endif /* IPSEC */
83 
84 #ifdef INET6
85 #ifndef INET
86 #include <netinet/in.h>
87 #endif
88 #include <sys/domain.h>
89 #include <netinet6/in6_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet6/ip6_var.h>
92 #include <netinet6/tcpipv6.h>
93 #include <netinet/icmp6.h>
94 #include <netinet6/nd6.h>
95 
96 #ifndef CREATE_IPV6_MAPPED
97 #define CREATE_IPV6_MAPPED(a6, a4) \
98 do { \
99 	bzero(&(a6), sizeof(a6));			\
100 	(a6).s6_addr[10] = (a6).s6_addr[11] = 0xff;	\
101 	*(u_int32_t *)&(a6).s6_addr[12] = (a4);		\
102 } while (0)
103 #endif
104 
105 struct	tcpiphdr tcp_saveti;
106 struct  tcpipv6hdr tcp_saveti6;
107 
108 /* for the packet header length in the mbuf */
109 #define M_PH_LEN(m)      (((struct mbuf *)(m))->m_pkthdr.len)
110 #define M_V6_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip6_hdr))
111 #define M_V4_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip))
112 #endif /* INET6 */
113 
114 int	tcprexmtthresh = 3;
115 struct	tcpiphdr tcp_saveti;
116 int	tcptv_keep_init = TCPTV_KEEP_INIT;
117 
118 extern u_long sb_max;
119 
120 #endif /* TUBA_INCLUDE */
121 #define TCP_PAWS_IDLE	(24 * 24 * 60 * 60 * PR_SLOWHZ)
122 
123 /* for modulo comparisons of timestamps */
124 #define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
125 #define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
126 
127 /*
128  * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint.
129  */
130 #ifdef INET6
131 #define ND6_HINT(tp) \
132 do { \
133 	if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) \
134 	 && !(tp->t_inpcb->inp_flags & INP_IPV6_MAPPED) \
135 	 && tp->t_inpcb->inp_route6.ro_rt) { \
136 		nd6_nud_hint(tp->t_inpcb->inp_route6.ro_rt, NULL); \
137 	} \
138 } while (0)
139 #else
140 #define ND6_HINT(tp)
141 #endif
142 
143 /*
144  * Insert segment ti into reassembly queue of tcp with
145  * control block tp.  Return TH_FIN if reassembly now includes
146  * a segment with FIN.  The macro form does the common case inline
147  * (segment is the next to be received on an established connection,
148  * and the queue is empty), avoiding linkage into and removal
149  * from the queue and repetition of various conversions.
150  * Set DELACK for segments received in order, but ack immediately
151  * when segments are out of order (so fast retransmit can work).
152  */
153 
154 #ifndef TUBA_INCLUDE
155 
156 int
157 tcp_reass(tp, th, m, tlen)
158 	register struct tcpcb *tp;
159 	register struct tcphdr *th;
160 	struct mbuf *m;
161 	int *tlen;
162 {
163 	register struct ipqent *p, *q, *nq, *tiqe;
164 	struct socket *so = tp->t_inpcb->inp_socket;
165 	int flags;
166 
167 	/*
168 	 * Call with th==0 after become established to
169 	 * force pre-ESTABLISHED data up to user socket.
170 	 */
171 	if (th == 0)
172 		goto present;
173 
174 	/*
175 	 * Allocate a new queue entry, before we throw away any data.
176 	 * If we can't, just drop the packet.  XXX
177 	 */
178 	MALLOC(tiqe, struct ipqent *, sizeof (struct ipqent), M_IPQ, M_NOWAIT);
179 	if (tiqe == NULL) {
180 		tcpstat.tcps_rcvmemdrop++;
181 		m_freem(m);
182 		return (0);
183 	}
184 
185 	/*
186 	 * Find a segment which begins after this one does.
187 	 */
188 	for (p = NULL, q = tp->segq.lh_first; q != NULL;
189 	    p = q, q = q->ipqe_q.le_next)
190 		if (SEQ_GT(q->ipqe_tcp->th_seq, th->th_seq))
191 			break;
192 
193 	/*
194 	 * If there is a preceding segment, it may provide some of
195 	 * our data already.  If so, drop the data from the incoming
196 	 * segment.  If it provides all of our data, drop us.
197 	 */
198 	if (p != NULL) {
199 		register struct tcphdr *phdr = p->ipqe_tcp;
200 		register int i;
201 
202 		/* conversion to int (in i) handles seq wraparound */
203 		i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
204 		if (i > 0) {
205 		        if (i >= *tlen) {
206 				tcpstat.tcps_rcvduppack++;
207 				tcpstat.tcps_rcvdupbyte += *tlen;
208 				m_freem(m);
209 				FREE(tiqe, M_IPQ);
210 				return (0);
211 			}
212 			m_adj(m, i);
213 			*tlen -= i;
214 			th->th_seq += i;
215 		}
216 	}
217 	tcpstat.tcps_rcvoopack++;
218 	tcpstat.tcps_rcvoobyte += *tlen;
219 
220 	/*
221 	 * While we overlap succeeding segments trim them or,
222 	 * if they are completely covered, dequeue them.
223 	 */
224 	for (; q != NULL; q = nq) {
225 		register struct tcphdr *qhdr = q->ipqe_tcp;
226 		register int i = (th->th_seq + *tlen) - qhdr->th_seq;
227 
228 		if (i <= 0)
229 			break;
230 		if (i < qhdr->th_reseqlen) {
231 			qhdr->th_seq += i;
232 			qhdr->th_reseqlen -= i;
233 			m_adj(q->ipqe_m, i);
234 			break;
235 		}
236 		nq = q->ipqe_q.le_next;
237 		m_freem(q->ipqe_m);
238 		LIST_REMOVE(q, ipqe_q);
239 		FREE(q, M_IPQ);
240 	}
241 
242 	/* Insert the new fragment queue entry into place. */
243 	tiqe->ipqe_m = m;
244 	th->th_reseqlen = *tlen;
245 	tiqe->ipqe_tcp = th;
246 	if (p == NULL) {
247 		LIST_INSERT_HEAD(&tp->segq, tiqe, ipqe_q);
248 	} else {
249 		LIST_INSERT_AFTER(p, tiqe, ipqe_q);
250 	}
251 
252 present:
253 	/*
254 	 * Present data to user, advancing rcv_nxt through
255 	 * completed sequence space.
256 	 */
257 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
258 		return (0);
259 	q = tp->segq.lh_first;
260 	if (q == NULL || q->ipqe_tcp->th_seq != tp->rcv_nxt)
261 		return (0);
262 	if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_tcp->th_reseqlen)
263 		return (0);
264 	do {
265 		tp->rcv_nxt += q->ipqe_tcp->th_reseqlen;
266 		flags = q->ipqe_tcp->th_flags & TH_FIN;
267 
268 		nq = q->ipqe_q.le_next;
269 		LIST_REMOVE(q, ipqe_q);
270 		ND6_HINT(tp);
271 		if (so->so_state & SS_CANTRCVMORE)
272 			m_freem(q->ipqe_m);
273 		else
274 			sbappend(&so->so_rcv, q->ipqe_m);
275 		FREE(q, M_IPQ);
276 		q = nq;
277 	} while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt);
278 	sorwakeup(so);
279 	return (flags);
280 }
281 
282 /*
283  * First check for a port-specific bomb. We do not want to drop half-opens
284  * for other ports if this is the only port being bombed.  We only check
285  * the bottom 40 half open connections, to avoid wasting too much time.
286  *
287  * Or, otherwise it is more likely a generic syn bomb, so delete the oldest
288  * half-open connection.
289  */
290 void
291 tcpdropoldhalfopen(avoidtp, port)
292 	struct tcpcb *avoidtp;
293 	u_int16_t port;
294 {
295 	register struct inpcb *inp;
296 	register struct tcpcb *tp;
297 	int ncheck = 40;
298 	int s;
299 
300 	s = splnet();
301 	inp = tcbtable.inpt_queue.cqh_first;
302 	if (inp)						/* XXX */
303 	for (; inp != (struct inpcb *)&tcbtable.inpt_queue && --ncheck;
304 	    inp = inp->inp_queue.cqe_prev) {
305 		if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
306 		    tp != avoidtp &&
307 		    tp->t_state == TCPS_SYN_RECEIVED &&
308 		    port == inp->inp_lport) {
309 			tcp_close(tp);
310 			goto done;
311 		}
312 	}
313 
314 	inp = tcbtable.inpt_queue.cqh_first;
315 	if (inp)						/* XXX */
316 	for (; inp != (struct inpcb *)&tcbtable.inpt_queue;
317 	    inp = inp->inp_queue.cqe_prev) {
318 		if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
319 		    tp != avoidtp &&
320 		    tp->t_state == TCPS_SYN_RECEIVED) {
321 			tcp_close(tp);
322 			goto done;
323 		}
324 	}
325 done:
326 	splx(s);
327 }
328 
329 #if defined(INET6) && !defined(TCP6)
330 int
331 tcp6_input(mp, offp, proto)
332 	struct mbuf **mp;
333 	int *offp, proto;
334 {
335 	struct mbuf *m = *mp;
336 
337 #if defined(NFAITH) && 0 < NFAITH
338 	if (m->m_pkthdr.rcvif) {
339 		if (m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
340 			/* XXX send icmp6 host/port unreach? */
341 			m_freem(m);
342 			return IPPROTO_DONE;
343 		}
344 	}
345 #endif
346 
347 	/*
348 	 * draft-itojun-ipv6-tcp-to-anycast
349 	 * better place to put this in?
350 	 */
351 	if (m->m_flags & M_ANYCAST6) {
352 		if (m->m_len >= sizeof(struct ip6_hdr)) {
353 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
354 			icmp6_error(m, ICMP6_DST_UNREACH,
355 				ICMP6_DST_UNREACH_ADDR,
356 				(caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
357 		} else
358 			m_freem(m);
359 		return IPPROTO_DONE;
360 	}
361 
362 	tcp_input(m, *offp, proto);
363 	return IPPROTO_DONE;
364 }
365 #endif
366 
367 /*
368  * TCP input routine, follows pages 65-76 of the
369  * protocol specification dated September, 1981 very closely.
370  */
371 void
372 #if __STDC__
373 tcp_input(struct mbuf *m, ...)
374 #else
375 tcp_input(m, va_alist)
376 	register struct mbuf *m;
377 #endif
378 {
379 	register struct tcpiphdr *ti;
380 	register struct inpcb *inp;
381 	caddr_t optp = NULL;
382 	int optlen = 0;
383 	int len, tlen, off;
384 	register struct tcpcb *tp = 0;
385 	register int tiflags;
386 	struct socket *so = NULL;
387 	int todrop, acked, ourfinisacked, needoutput = 0;
388 	int hdroptlen = 0;
389 	short ostate = 0;
390 	struct in_addr laddr;
391 	int dropsocket = 0;
392 	int iss = 0;
393 	u_long tiwin;
394 	u_int32_t ts_val, ts_ecr;
395 	int ts_present = 0;
396 	int iphlen;
397 	va_list ap;
398 	register struct tcphdr *th;
399 #ifdef IPSEC
400 	struct tdb *tdb = NULL;
401 #endif /* IPSEC */
402 #ifdef INET6
403 	struct in6_addr laddr6;
404 	unsigned short is_ipv6;     /* Type of incoming datagram. */
405 	struct ip6_hdr *ipv6 = NULL;
406 #endif /* INET6 */
407 
408 	va_start(ap, m);
409 	iphlen = va_arg(ap, int);
410 	va_end(ap);
411 
412 	tcpstat.tcps_rcvtotal++;
413 
414 #ifdef IPSEC
415 	/* Save the last SA which was used to process the mbuf */
416 	if ((m->m_flags & (M_CONF|M_AUTH)) && m->m_pkthdr.tdbi) {
417 		struct tdb_ident *tdbi = m->m_pkthdr.tdbi;
418 		/* XXX gettdb() should really be called at spltdb().      */
419 		/* XXX this is splsoftnet(), currently they are the same. */
420 		tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
421 		free(m->m_pkthdr.tdbi, M_TEMP);
422 		m->m_pkthdr.tdbi = NULL;
423 	}
424 #endif /* IPSEC */
425 #ifdef INET6
426 	/*
427 	 * Before we do ANYTHING, we have to figure out if it's TCP/IPv6 or
428 	 * TCP/IPv4.
429  	 */
430 	is_ipv6 = mtod(m, struct ip *)->ip_v == 6;
431 #endif /* INET6 */
432 
433 	/*
434 	 * Get IP and TCP header together in first mbuf.
435 	 * Note: IP leaves IP header in first mbuf.
436 	 */
437 #ifndef INET6
438 	ti = mtod(m, struct tcpiphdr *);
439 #else /* INET6 */
440 	if (!is_ipv6)
441 #endif /* INET6 */
442 	if (iphlen > sizeof (struct ip)) {
443 #if 0	/*XXX*/
444 		ip_stripoptions(m, (struct mbuf *)0);
445 #else
446 		printf("extension headers are not allowed\n");
447 		m_freem(m);
448 		return;
449 #endif
450 	}
451 	if (m->m_len < iphlen + sizeof(struct tcphdr)) {
452 		if ((m = m_pullup2(m, iphlen + sizeof(struct tcphdr))) == 0) {
453 			tcpstat.tcps_rcvshort++;
454 			return;
455 		}
456 #ifndef INET6
457 		ti = mtod(m, struct tcpiphdr *);
458 #endif /* INET6 */
459 	}
460 
461 	tlen = m->m_pkthdr.len - iphlen;
462 
463 #ifdef INET6
464 	/*
465 	 * After that, do initial segment processing which is still very
466 	 * dependent on what IP version you're using.
467 	 */
468 
469 	if (is_ipv6) {
470 #ifdef DIAGNOSTIC
471 	  if (iphlen < sizeof(struct ip6_hdr)) {
472 	    m_freem(m);
473 	    return;
474 	  }
475 #endif /* DIAGNOSTIC */
476 
477 	  /* strip off any options */
478 	  if (iphlen > sizeof(struct ip6_hdr)) {
479 #if 0 /*XXX*/
480 	    ipv6_stripoptions(m, iphlen);
481 #else
482 		printf("extension headers are not allowed\n");
483 		m_freem(m);
484 		return;
485 #endif
486 	    iphlen = sizeof(struct ip6_hdr);
487 	  }
488 
489 	  ti = NULL;
490 	  ipv6 = mtod(m, struct ip6_hdr *);
491 
492 		/* Be proactive about malicious use of IPv4 mapped address */
493 		if (IN6_IS_ADDR_V4MAPPED(&ipv6->ip6_src) ||
494 		    IN6_IS_ADDR_V4MAPPED(&ipv6->ip6_dst)) {
495 			/* XXX stat */
496 			goto drop;
497 		}
498 
499 	  if (in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen)) {
500 	    tcpstat.tcps_rcvbadsum++;
501 	    goto drop;
502 	  } /* endif in6_cksum */
503 	} else {
504 	  ti = mtod(m, struct tcpiphdr *);
505 #endif /* INET6 */
506 
507 	/*
508 	 * Checksum extended TCP header and data.
509 	 */
510 #ifndef INET6
511 	tlen = ((struct ip *)ti)->ip_len;
512 #endif /* INET6 */
513 	len = sizeof (struct ip) + tlen;
514 	bzero(ti->ti_x1, sizeof ti->ti_x1);
515 	ti->ti_len = (u_int16_t)tlen;
516 	HTONS(ti->ti_len);
517 	if ((ti->ti_sum = in_cksum(m, len)) != 0) {
518 		tcpstat.tcps_rcvbadsum++;
519 		goto drop;
520 	}
521 #ifdef INET6
522 	}
523 #endif /* INET6 */
524 #endif /* TUBA_INCLUDE */
525 
526 	th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen);
527 
528 	/*
529 	 * Check that TCP offset makes sense,
530 	 * pull out TCP options and adjust length.		XXX
531 	 */
532 	off = th->th_off << 2;
533 	if (off < sizeof (struct tcphdr) || off > tlen) {
534 		tcpstat.tcps_rcvbadoff++;
535 		goto drop;
536 	}
537 	tlen -= off;
538 	if (off > sizeof (struct tcphdr)) {
539 		if (m->m_len < iphlen + off) {
540 			if ((m = m_pullup2(m, iphlen + off)) == 0) {
541 				tcpstat.tcps_rcvshort++;
542 				return;
543 			}
544 #ifdef INET6
545 			if (is_ipv6)
546 			  ipv6 = mtod(m, struct ip6_hdr *);
547 			else
548 #endif /* INET6 */
549 			ti = mtod(m, struct tcpiphdr *);
550 			th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen);
551 		}
552 		optlen = off - sizeof (struct tcphdr);
553 		optp = mtod(m, caddr_t) + iphlen + sizeof(struct tcphdr);
554 		/*
555 		 * Do quick retrieval of timestamp options ("options
556 		 * prediction?").  If timestamp is the only option and it's
557 		 * formatted as recommended in RFC 1323 appendix A, we
558 		 * quickly get the values now and not bother calling
559 		 * tcp_dooptions(), etc.
560 		 */
561 		if ((optlen == TCPOLEN_TSTAMP_APPA ||
562 		     (optlen > TCPOLEN_TSTAMP_APPA &&
563 			optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
564 		     *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
565 		     (th->th_flags & TH_SYN) == 0) {
566 			ts_present = 1;
567 			ts_val = ntohl(*(u_int32_t *)(optp + 4));
568 			ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
569 			optp = NULL;	/* we've parsed the options */
570 		}
571 	}
572 	tiflags = th->th_flags;
573 
574 	/*
575 	 * Convert TCP protocol specific fields to host format.
576 	 */
577 	NTOHL(th->th_seq);
578 	NTOHL(th->th_ack);
579 	NTOHS(th->th_win);
580 	NTOHS(th->th_urp);
581 
582 	/*
583 	 * Locate pcb for segment.
584 	 */
585 findpcb:
586 #ifdef INET6
587 	if (is_ipv6) {
588 	  inp = in6_pcbhashlookup(&tcbtable, &ipv6->ip6_src, th->th_sport,
589 				 &ipv6->ip6_dst, th->th_dport);
590 	} else
591 #endif /* INET6 */
592 	inp = in_pcbhashlookup(&tcbtable, ti->ti_src, ti->ti_sport,
593 	    ti->ti_dst, ti->ti_dport);
594 	if (inp == 0) {
595 		++tcpstat.tcps_pcbhashmiss;
596 #ifdef INET6
597 		if (is_ipv6)
598 			inp = in_pcblookup(&tcbtable, &ipv6->ip6_src,
599 			    th->th_sport, &ipv6->ip6_dst, th->th_dport,
600 			    INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
601 		else
602 #endif /* INET6 */
603 		inp = in_pcblookup(&tcbtable, &ti->ti_src, ti->ti_sport,
604 		    &ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
605 		/*
606 		 * If the state is CLOSED (i.e., TCB does not exist) then
607 		 * all data in the incoming segment is discarded.
608 		 * If the TCB exists but is in CLOSED state, it is embryonic,
609 		 * but should either do a listen or a connect soon.
610 		 */
611 		if (inp == 0) {
612 			++tcpstat.tcps_noport;
613 			goto dropwithreset;
614 		}
615 	}
616 
617 	tp = intotcpcb(inp);
618 	if (tp == 0)
619 		goto dropwithreset;
620 	if (tp->t_state == TCPS_CLOSED)
621 		goto drop;
622 
623 	/* Unscale the window into a 32-bit value. */
624 	if ((tiflags & TH_SYN) == 0)
625 		tiwin = th->th_win << tp->snd_scale;
626 	else
627 		tiwin = th->th_win;
628 
629 	so = inp->inp_socket;
630 	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
631 		if (so->so_options & SO_DEBUG) {
632 			ostate = tp->t_state;
633 #ifdef INET6
634 			if (is_ipv6)
635 			  tcp_saveti6 = *(mtod(m, struct tcpipv6hdr *));
636 			else
637 #endif /* INET6 */
638 			tcp_saveti = *ti;
639 		}
640 		if (so->so_options & SO_ACCEPTCONN) {
641 			struct socket *so1;
642 
643 			so1 = sonewconn(so, 0);
644 			if (so1 == NULL) {
645 				tcpdropoldhalfopen(tp, th->th_dport);
646 				so1 = sonewconn(so, 0);
647 				if (so1 == NULL)
648 					goto drop;
649 			}
650 			so = so1;
651 			/*
652 			 * This is ugly, but ....
653 			 *
654 			 * Mark socket as temporary until we're
655 			 * committed to keeping it.  The code at
656 			 * ``drop'' and ``dropwithreset'' check the
657 			 * flag dropsocket to see if the temporary
658 			 * socket created here should be discarded.
659 			 * We mark the socket as discardable until
660 			 * we're committed to it below in TCPS_LISTEN.
661 			 */
662 			dropsocket++;
663 #ifdef IPSEC
664 			/*
665 			 * We need to copy the required security levels
666 			 * from the old pcb.
667 			 */
668 			{
669 			  struct inpcb *newinp = (struct inpcb *)so->so_pcb;
670 			  bcopy(inp->inp_seclevel, newinp->inp_seclevel,
671 				sizeof(inp->inp_seclevel));
672 			  newinp->inp_secrequire = inp->inp_secrequire;
673 			}
674 #endif /* IPSEC */
675 #ifdef INET6
676 			/*
677 			 * inp still has the OLD in_pcb stuff, set the
678 			 * v6-related flags on the new guy, too.   This is
679 			 * done particularly for the case where an AF_INET6
680 			 * socket is bound only to a port, and a v4 connection
681 			 * comes in on that port.
682 			 * we also copy the flowinfo from the original pcb
683 			 * to the new one.
684 			 */
685 			{
686 			  int flags = inp->inp_flags;
687 			  struct inpcb *oldinpcb = inp;
688 
689 			  inp = (struct inpcb *)so->so_pcb;
690 			  inp->inp_flags |= (flags & (INP_IPV6 | INP_IPV6_UNDEC
691 						      | INP_IPV6_MAPPED));
692 			  if ((inp->inp_flags & INP_IPV6) &&
693 			      !(inp->inp_flags & INP_IPV6_MAPPED)) {
694 			    inp->inp_ipv6.ip6_hlim =
695 			      oldinpcb->inp_ipv6.ip6_hlim;
696 			    inp->inp_ipv6.ip6_flow =
697 			      oldinpcb->inp_ipv6.ip6_flow;
698 			  }
699 			}
700 #else /* INET6 */
701 			inp = (struct inpcb *)so->so_pcb;
702 #endif /* INET6 */
703 			inp->inp_lport = th->th_dport;
704 #ifdef INET6
705 			if (is_ipv6) {
706 			  inp->inp_laddr6 = ipv6->ip6_dst;
707 			  inp->inp_fflowinfo = htonl(0x0fffffff) &
708 			    ipv6->ip6_flow;
709 
710 			  /*inp->inp_options = ip6_srcroute();*/ /* soon. */
711 			  /* still need to tweak outbound options
712 			     processing to include this mbuf in
713 			     the right place and put the correct
714 			     NextHdr values in the right places.
715 			     XXX  rja */
716 			} else {
717 			  if (inp->inp_flags & INP_IPV6) {/* v4 to v6 socket */
718 			    CREATE_IPV6_MAPPED(inp->inp_laddr6,
719 			      ti->ti_dst.s_addr);
720 			  } else {
721 #endif /* INET6 */
722 			    inp->inp_laddr = ti->ti_dst;
723 			    inp->inp_options = ip_srcroute();
724 #if INET6
725 			  }
726 			}
727 #endif /* INET6 */
728 			in_pcbrehash(inp);
729 			tp = intotcpcb(inp);
730 			tp->t_state = TCPS_LISTEN;
731 
732 			/* Compute proper scaling value from buffer space
733 			 */
734 			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
735 			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
736 				tp->request_r_scale++;
737 		}
738 	}
739 
740 #ifdef IPSEC
741 	/* Check if this socket requires security for incoming packets */
742 	if ((inp->inp_seclevel[SL_AUTH] >= IPSEC_LEVEL_REQUIRE &&
743 	     !(m->m_flags & M_AUTH)) ||
744 	    (inp->inp_seclevel[SL_ESP_TRANS] >= IPSEC_LEVEL_REQUIRE &&
745 	     !(m->m_flags & M_CONF))) {
746 #ifdef notyet
747 #ifdef INET6
748 		if (is_ipv6)
749 			icmp6_error(m, ICMPV6_BLAH, ICMPV6_BLAH, 0);
750 		else
751 #endif /* INET6 */
752 		icmp_error(m, ICMP_BLAH, ICMP_BLAH, 0, 0);
753 #endif /* notyet */
754 		tcpstat.tcps_rcvnosec++;
755 		goto drop;
756 	}
757 	/* Use tdb_bind_out for this inp's outbound communication */
758 	if (tdb)
759 		tdb_add_inp(tdb, inp);
760 #endif /*IPSEC */
761 
762 	/*
763 	 * Segment received on connection.
764 	 * Reset idle time and keep-alive timer.
765 	 */
766 	tp->t_idle = 0;
767 	if (tp->t_state != TCPS_SYN_RECEIVED)
768 		tp->t_timer[TCPT_KEEP] = tcp_keepidle;
769 
770 #ifdef TCP_SACK
771 	if (!tp->sack_disable)
772 		tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
773 #endif /* TCP_SACK */
774 
775 	/*
776 	 * Process options if not in LISTEN state,
777 	 * else do it below (after getting remote address).
778 	 */
779 	if (optp && tp->t_state != TCPS_LISTEN)
780 		tcp_dooptions(tp, optp, optlen, th,
781 			&ts_present, &ts_val, &ts_ecr);
782 
783 #ifdef TCP_SACK
784 	if (!tp->sack_disable) {
785 		tp->rcv_laststart = th->th_seq; /* last rec'vd segment*/
786 		tp->rcv_lastend = th->th_seq + tlen;
787 	}
788 #endif /* TCP_SACK */
789 	/*
790 	 * Header prediction: check for the two common cases
791 	 * of a uni-directional data xfer.  If the packet has
792 	 * no control flags, is in-sequence, the window didn't
793 	 * change and we're not retransmitting, it's a
794 	 * candidate.  If the length is zero and the ack moved
795 	 * forward, we're the sender side of the xfer.  Just
796 	 * free the data acked & wake any higher level process
797 	 * that was blocked waiting for space.  If the length
798 	 * is non-zero and the ack didn't move, we're the
799 	 * receiver side.  If we're getting packets in-order
800 	 * (the reassembly queue is empty), add the data to
801 	 * the socket buffer and note that we need a delayed ack.
802 	 */
803 	if (tp->t_state == TCPS_ESTABLISHED &&
804 	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
805 	    (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
806 	    th->th_seq == tp->rcv_nxt &&
807 	    tiwin && tiwin == tp->snd_wnd &&
808 	    tp->snd_nxt == tp->snd_max) {
809 
810 		/*
811 		 * If last ACK falls within this segment's sequence numbers,
812 		 *  record the timestamp.
813 		 * Fix from Braden, see Stevens p. 870
814 		 */
815 		if (ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
816 			tp->ts_recent_age = tcp_now;
817 			tp->ts_recent = ts_val;
818 		}
819 
820 		if (tlen == 0) {
821 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
822 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
823 			    tp->snd_cwnd >= tp->snd_wnd &&
824 			    tp->t_dupacks == 0) {
825 				/*
826 				 * this is a pure ack for outstanding data.
827 				 */
828 				++tcpstat.tcps_predack;
829 				if (ts_present)
830 					tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
831 				else if (tp->t_rtt &&
832 					    SEQ_GT(th->th_ack, tp->t_rtseq))
833 					tcp_xmit_timer(tp, tp->t_rtt);
834 				acked = th->th_ack - tp->snd_una;
835 				tcpstat.tcps_rcvackpack++;
836 				tcpstat.tcps_rcvackbyte += acked;
837 				ND6_HINT(tp);
838 				sbdrop(&so->so_snd, acked);
839 				tp->snd_una = th->th_ack;
840 #if defined(TCP_SACK)
841 				/*
842 				 * We want snd_last to track snd_una so
843 				 * as to avoid sequence wraparound problems
844 				 * for very large transfers.
845 				 */
846 				tp->snd_last = tp->snd_una;
847 #endif /* TCP_SACK */
848 #if defined(TCP_SACK) && defined(TCP_FACK)
849 				tp->snd_fack = tp->snd_una;
850 				tp->retran_data = 0;
851 #endif /* TCP_FACK */
852 				m_freem(m);
853 
854 				/*
855 				 * If all outstanding data are acked, stop
856 				 * retransmit timer, otherwise restart timer
857 				 * using current (possibly backed-off) value.
858 				 * If process is waiting for space,
859 				 * wakeup/selwakeup/signal.  If data
860 				 * are ready to send, let tcp_output
861 				 * decide between more output or persist.
862 				 */
863 				if (tp->snd_una == tp->snd_max)
864 					tp->t_timer[TCPT_REXMT] = 0;
865 				else if (tp->t_timer[TCPT_PERSIST] == 0)
866 					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
867 
868 				if (sb_notify(&so->so_snd))
869 					sowwakeup(so);
870 				if (so->so_snd.sb_cc)
871 					(void) tcp_output(tp);
872 				return;
873 			}
874 		} else if (th->th_ack == tp->snd_una &&
875 		    tp->segq.lh_first == NULL &&
876 		    tlen <= sbspace(&so->so_rcv)) {
877 			/*
878 			 * This is a pure, in-sequence data packet
879 			 * with nothing on the reassembly queue and
880 			 * we have enough buffer space to take it.
881 			 */
882 #ifdef TCP_SACK
883 			/* Clean receiver SACK report if present */
884 			if (!tp->sack_disable && tp->rcv_numsacks)
885 				tcp_clean_sackreport(tp);
886 #endif /* TCP_SACK */
887 			++tcpstat.tcps_preddat;
888 			tp->rcv_nxt += tlen;
889 			tcpstat.tcps_rcvpack++;
890 			tcpstat.tcps_rcvbyte += tlen;
891 			ND6_HINT(tp);
892 			/*
893 			 * Drop TCP, IP headers and TCP options then add data
894 			 * to socket buffer.
895 			 */
896 			m_adj(m, iphlen + off);
897 			sbappend(&so->so_rcv, m);
898 			sorwakeup(so);
899 			if (th->th_flags & TH_PUSH)
900 				tp->t_flags |= TF_ACKNOW;
901 			else
902 				tp->t_flags |= TF_DELACK;
903 			return;
904 		}
905 	}
906 
907 	/*
908 	 * Compute mbuf offset to TCP data segment.
909 	 */
910 	hdroptlen = iphlen + off;
911 
912 	/*
913 	 * Calculate amount of space in receive window,
914 	 * and then do TCP input processing.
915 	 * Receive window is amount of space in rcv queue,
916 	 * but not less than advertised window.
917 	 */
918 	{ int win;
919 
920 	win = sbspace(&so->so_rcv);
921 	if (win < 0)
922 		win = 0;
923 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
924 	}
925 
926 	switch (tp->t_state) {
927 
928 	/*
929 	 * If the state is LISTEN then ignore segment if it contains an RST.
930 	 * If the segment contains an ACK then it is bad and send a RST.
931 	 * If it does not contain a SYN then it is not interesting; drop it.
932 	 * If it is from this socket, drop it, it must be forged.
933 	 * Don't bother responding if the destination was a broadcast.
934 	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
935 	 * tp->iss, and send a segment:
936 	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
937 	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
938 	 * Fill in remote peer address fields if not previously specified.
939 	 * Enter SYN_RECEIVED state, and process any other fields of this
940 	 * segment in this state.
941 	 */
942 	case TCPS_LISTEN: {
943 		struct mbuf *am;
944 		register struct sockaddr_in *sin;
945 #ifdef INET6
946 		register struct sockaddr_in6 *sin6;
947 #endif /* INET6 */
948 
949 		if (tiflags & TH_RST)
950 			goto drop;
951 		if (tiflags & TH_ACK)
952 			goto dropwithreset;
953 		if ((tiflags & TH_SYN) == 0)
954 			goto drop;
955 		if (th->th_dport == th->th_sport) {
956 #ifdef INET6
957 		  if (is_ipv6) {
958 		    if (IN6_ARE_ADDR_EQUAL(&ipv6->ip6_src, &ipv6->ip6_dst))
959 		      goto drop;
960 		  } else {
961 #endif /* INET6 */
962 		    if (ti->ti_dst.s_addr == ti->ti_src.s_addr)
963 		      goto drop;
964 #ifdef INET6
965 		  }
966 #endif /* INET6 */
967 		}
968 
969 		/*
970 		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
971 		 * in_broadcast() should never return true on a received
972 		 * packet with M_BCAST not set.
973 		 */
974 		if (m->m_flags & (M_BCAST|M_MCAST))
975 		  goto drop;
976 #ifdef INET6
977 		if (is_ipv6) {
978 			/* XXX What about IPv6 Anycasting ?? :-(  rja */
979 			if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst))
980 				goto drop;
981 		} else
982 #endif /* INET6 */
983 		if (IN_MULTICAST(ti->ti_dst.s_addr))
984 			goto drop;
985 		am = m_get(M_DONTWAIT, MT_SONAME);	/* XXX */
986 		if (am == NULL)
987 			goto drop;
988 #ifdef INET6
989 		if (is_ipv6) {
990 		  /*
991 		   * This is probably the place to set the tp->pf value.
992 		   * (Don't forget to do it in the v4 code as well!)
993 		   *
994 		   * Also, remember to blank out things like flowlabel, or
995 		   * set flowlabel for accepted sockets in v6.
996 		   *
997 		   * FURTHERMORE, this is PROBABLY the place where the whole
998 		   * business of key munging is set up for passive
999 		   * connections.
1000 		   */
1001 		  am->m_len = sizeof(struct sockaddr_in6);
1002 		  sin6 = mtod(am, struct sockaddr_in6 *);
1003 		  sin6->sin6_family = AF_INET6;
1004 		  sin6->sin6_len = sizeof(struct sockaddr_in6);
1005 		  sin6->sin6_addr = ipv6->ip6_src;
1006 		  sin6->sin6_port = th->th_sport;
1007 		  sin6->sin6_flowinfo = htonl(0x0fffffff) &
1008 		    inp->inp_ipv6.ip6_flow;
1009 		  laddr6 = inp->inp_laddr6;
1010 		  if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6))
1011 		    inp->inp_laddr6 = ipv6->ip6_dst;
1012 		  /* This is a good optimization. */
1013 		  if (in6_pcbconnect(inp, am)) {
1014 		    inp->inp_laddr6 = laddr6;
1015 		    (void) m_free(am);
1016 		    goto drop;
1017 		  } /* endif in6_pcbconnect() */
1018 		  tp->pf = PF_INET6;
1019 		} else {
1020 		  /*
1021 		   * Letting v4 incoming datagrams to reach valid
1022 		   * PF_INET6 sockets causes some overhead here.
1023 		   */
1024 		  if (inp->inp_flags & INP_IPV6) {
1025 		    if (!(inp->inp_flags & (INP_IPV6_UNDEC|INP_IPV6_MAPPED))) {
1026 		      (void) m_free(am);
1027 		      goto drop;
1028 		    }
1029 
1030 		    am->m_len = sizeof(struct sockaddr_in6);
1031 
1032 		    sin6 = mtod(am, struct sockaddr_in6 *);
1033 		    sin6->sin6_family = AF_INET6;
1034 		    sin6->sin6_len = sizeof(*sin6);
1035 		    CREATE_IPV6_MAPPED(sin6->sin6_addr, ti->ti_src.s_addr);
1036 		    sin6->sin6_port = th->th_sport;
1037 		    sin6->sin6_flowinfo = 0;
1038 
1039 		    laddr6 = inp->inp_laddr6;
1040 		    if (inp->inp_laddr.s_addr == INADDR_ANY)
1041 		      CREATE_IPV6_MAPPED(inp->inp_laddr6, ti->ti_dst.s_addr);
1042 
1043 		    /*
1044 		     * The pcb initially has the v6 default hoplimit
1045 		     * set. We're sending v4 packets so we need to set
1046 		     * the v4 ttl and tos.
1047 		     */
1048 		    inp->inp_ip.ip_ttl = ip_defttl;
1049 		    inp->inp_ip.ip_tos = 0;
1050 
1051 		    if (in6_pcbconnect(inp, am)) {
1052 		      inp->inp_laddr6 = laddr6;
1053 		      (void) m_freem(am);
1054 		      goto drop;
1055 		    }
1056 		    tp->pf = PF_INET;
1057 		  } else {
1058 #endif /* INET6 */
1059 		am->m_len = sizeof (struct sockaddr_in);
1060 		sin = mtod(am, struct sockaddr_in *);
1061 		sin->sin_family = AF_INET;
1062 		sin->sin_len = sizeof(*sin);
1063 		sin->sin_addr = ti->ti_src;
1064 		sin->sin_port = ti->ti_sport;
1065 		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
1066 		laddr = inp->inp_laddr;
1067 		if (inp->inp_laddr.s_addr == INADDR_ANY)
1068 			inp->inp_laddr = ti->ti_dst;
1069 		if (in_pcbconnect(inp, am)) {
1070 			inp->inp_laddr = laddr;
1071 			(void) m_free(am);
1072 			goto drop;
1073 		}
1074 		(void) m_free(am);
1075 		tp->pf = PF_INET;
1076 #ifdef INET6
1077 		  }  /* if (inp->inp_flags & INP_IPV6) */
1078 		} /* if (is_ipv6) */
1079 #endif /* INET6 */
1080 		tp->t_template = tcp_template(tp);
1081 		if (tp->t_template == 0) {
1082 			tp = tcp_drop(tp, ENOBUFS);
1083 			dropsocket = 0;		/* socket is already gone */
1084 			goto drop;
1085 		}
1086 		if (optp)
1087 			tcp_dooptions(tp, optp, optlen, th,
1088 				&ts_present, &ts_val, &ts_ecr);
1089 #ifdef TCP_SACK
1090 		/*
1091 		 * If peer did not send a SACK_PERMITTED option (i.e., if
1092 		 * tcp_dooptions() did not set TF_SACK_PERMIT), set
1093                  * sack_disable to 1 if it is currently 0.
1094                  */
1095                 if (!tp->sack_disable)
1096                         if ((tp->t_flags & TF_SACK_PERMIT) == 0)
1097                                 tp->sack_disable = 1;
1098 #endif
1099 
1100 		if (iss)
1101 			tp->iss = iss;
1102 		else
1103 			tp->iss = tcp_iss;
1104 #ifdef TCP_COMPAT_42
1105 		tcp_iss += TCP_ISSINCR/2;
1106 #else /* TCP_COMPAT_42 */
1107 		tcp_iss += arc4random() % TCP_ISSINCR + 1;
1108 #endif /* !TCP_COMPAT_42 */
1109 		tp->irs = th->th_seq;
1110 		tcp_sendseqinit(tp);
1111 #if defined (TCP_SACK)
1112 		tp->snd_last = tp->snd_una;
1113 #endif /* TCP_SACK */
1114 #if defined(TCP_SACK) && defined(TCP_FACK)
1115 		tp->snd_fack = tp->snd_una;
1116 		tp->retran_data = 0;
1117 		tp->snd_awnd = 0;
1118 #endif /* TCP_FACK */
1119 		tcp_rcvseqinit(tp);
1120 		tp->t_flags |= TF_ACKNOW;
1121 		tp->t_state = TCPS_SYN_RECEIVED;
1122 		tp->t_timer[TCPT_KEEP] = tcptv_keep_init;
1123 		dropsocket = 0;		/* committed to socket */
1124 		tcpstat.tcps_accepts++;
1125 		goto trimthenstep6;
1126 		}
1127 
1128 	/*
1129 	 * If the state is SYN_RECEIVED:
1130 	 * 	if seg contains SYN/ACK, send an RST.
1131 	 *	if seg contains an ACK, but not for our SYN/ACK, send an RST
1132   	 */
1133 
1134 	case TCPS_SYN_RECEIVED:
1135 		if (tiflags & TH_ACK) {
1136 			if (tiflags & TH_SYN) {
1137 				tcpstat.tcps_badsyn++;
1138 				goto dropwithreset;
1139 			}
1140 			if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
1141 			    SEQ_GT(th->th_ack, tp->snd_max))
1142 				goto dropwithreset;
1143 		}
1144 		break;
1145 
1146 	/*
1147 	 * If the state is SYN_SENT:
1148 	 *	if seg contains an ACK, but not for our SYN, drop the input.
1149 	 *	if seg contains a RST, then drop the connection.
1150 	 *	if seg does not contain SYN, then drop it.
1151 	 * Otherwise this is an acceptable SYN segment
1152 	 *	initialize tp->rcv_nxt and tp->irs
1153 	 *	if seg contains ack then advance tp->snd_una
1154 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
1155 	 *	arrange for segment to be acked (eventually)
1156 	 *	continue processing rest of data/controls, beginning with URG
1157 	 */
1158 	case TCPS_SYN_SENT:
1159 		if ((tiflags & TH_ACK) &&
1160 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
1161 		     SEQ_GT(th->th_ack, tp->snd_max)))
1162 			goto dropwithreset;
1163 		if (tiflags & TH_RST) {
1164 			if (tiflags & TH_ACK)
1165 				tp = tcp_drop(tp, ECONNREFUSED);
1166 			goto drop;
1167 		}
1168 		if ((tiflags & TH_SYN) == 0)
1169 			goto drop;
1170 		if (tiflags & TH_ACK) {
1171 			tp->snd_una = th->th_ack;
1172 			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1173 				tp->snd_nxt = tp->snd_una;
1174 		}
1175 		tp->t_timer[TCPT_REXMT] = 0;
1176 		tp->irs = th->th_seq;
1177 		tcp_rcvseqinit(tp);
1178 		tp->t_flags |= TF_ACKNOW;
1179 #ifdef TCP_SACK
1180                 /*
1181                  * If we've sent a SACK_PERMITTED option, and the peer
1182                  * also replied with one, then TF_SACK_PERMIT should have
1183                  * been set in tcp_dooptions().  If it was not, disable SACKs.
1184                  */
1185                 if (!tp->sack_disable)
1186                         if ((tp->t_flags & TF_SACK_PERMIT) == 0)
1187                                 tp->sack_disable = 1;
1188 #endif
1189 		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
1190 			tcpstat.tcps_connects++;
1191 			soisconnected(so);
1192 			tp->t_state = TCPS_ESTABLISHED;
1193 			/* Do window scaling on this connection? */
1194 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1195 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
1196 				tp->snd_scale = tp->requested_s_scale;
1197 				tp->rcv_scale = tp->request_r_scale;
1198 			}
1199 			(void) tcp_reass(tp, (struct tcphdr *)0,
1200 				(struct mbuf *)0, &tlen);
1201 			/*
1202 			 * if we didn't have to retransmit the SYN,
1203 			 * use its rtt as our initial srtt & rtt var.
1204 			 */
1205 			if (tp->t_rtt)
1206 				tcp_xmit_timer(tp, tp->t_rtt);
1207 			/*
1208 			 * Since new data was acked (the SYN), open the
1209 			 * congestion window by one MSS.  We do this
1210 			 * here, because we won't go through the normal
1211 			 * ACK processing below.  And since this is the
1212 			 * start of the connection, we know we are in
1213 			 * the exponential phase of slow-start.
1214 			 */
1215 			tp->snd_cwnd += tp->t_maxseg;
1216 		} else
1217 			tp->t_state = TCPS_SYN_RECEIVED;
1218 
1219 trimthenstep6:
1220 		/*
1221 		 * Advance ti->ti_seq to correspond to first data byte.
1222 		 * If data, trim to stay within window,
1223 		 * dropping FIN if necessary.
1224 		 */
1225 		th->th_seq++;
1226 		if (tlen > tp->rcv_wnd) {
1227 			todrop = tlen - tp->rcv_wnd;
1228 			m_adj(m, -todrop);
1229 			tlen = tp->rcv_wnd;
1230 			tiflags &= ~TH_FIN;
1231 			tcpstat.tcps_rcvpackafterwin++;
1232 			tcpstat.tcps_rcvbyteafterwin += todrop;
1233 		}
1234 		tp->snd_wl1 = th->th_seq - 1;
1235 		tp->rcv_up = th->th_seq;
1236 		goto step6;
1237 	}
1238 
1239 	/*
1240 	 * States other than LISTEN or SYN_SENT.
1241 	 * First check timestamp, if present.
1242 	 * Then check that at least some bytes of segment are within
1243 	 * receive window.  If segment begins before rcv_nxt,
1244 	 * drop leading data (and SYN); if nothing left, just ack.
1245 	 *
1246 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
1247 	 * and it's less than ts_recent, drop it.
1248 	 */
1249 	if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
1250 	    TSTMP_LT(ts_val, tp->ts_recent)) {
1251 
1252 		/* Check to see if ts_recent is over 24 days old.  */
1253 		if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
1254 			/*
1255 			 * Invalidate ts_recent.  If this segment updates
1256 			 * ts_recent, the age will be reset later and ts_recent
1257 			 * will get a valid value.  If it does not, setting
1258 			 * ts_recent to zero will at least satisfy the
1259 			 * requirement that zero be placed in the timestamp
1260 			 * echo reply when ts_recent isn't valid.  The
1261 			 * age isn't reset until we get a valid ts_recent
1262 			 * because we don't want out-of-order segments to be
1263 			 * dropped when ts_recent is old.
1264 			 */
1265 			tp->ts_recent = 0;
1266 		} else {
1267 			tcpstat.tcps_rcvduppack++;
1268 			tcpstat.tcps_rcvdupbyte += tlen;
1269 			tcpstat.tcps_pawsdrop++;
1270 			goto dropafterack;
1271 		}
1272 	}
1273 
1274 	todrop = tp->rcv_nxt - th->th_seq;
1275 	if (todrop > 0) {
1276 		if (tiflags & TH_SYN) {
1277 			tiflags &= ~TH_SYN;
1278 			th->th_seq++;
1279 			if (th->th_urp > 1)
1280 				th->th_urp--;
1281 			else
1282 				tiflags &= ~TH_URG;
1283 			todrop--;
1284 		}
1285 		if (todrop >= tlen ||
1286 		    (todrop == tlen && (tiflags & TH_FIN) == 0)) {
1287 			/*
1288 			 * Any valid FIN must be to the left of the
1289 			 * window.  At this point, FIN must be a
1290 			 * duplicate or out-of-sequence, so drop it.
1291 			 */
1292 			tiflags &= ~TH_FIN;
1293 			/*
1294 			 * Send ACK to resynchronize, and drop any data,
1295 			 * but keep on processing for RST or ACK.
1296 			 */
1297 			tp->t_flags |= TF_ACKNOW;
1298 			tcpstat.tcps_rcvdupbyte += todrop = tlen;
1299 			tcpstat.tcps_rcvduppack++;
1300 		} else {
1301 			tcpstat.tcps_rcvpartduppack++;
1302 			tcpstat.tcps_rcvpartdupbyte += todrop;
1303 		}
1304 		hdroptlen += todrop;	/* drop from head afterwards */
1305 		th->th_seq += todrop;
1306 		tlen -= todrop;
1307 		if (th->th_urp > todrop)
1308 			th->th_urp -= todrop;
1309 		else {
1310 			tiflags &= ~TH_URG;
1311 			th->th_urp = 0;
1312 		}
1313 	}
1314 
1315 	/*
1316 	 * If new data are received on a connection after the
1317 	 * user processes are gone, then RST the other end.
1318 	 */
1319 	if ((so->so_state & SS_NOFDREF) &&
1320 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
1321 		tp = tcp_close(tp);
1322 		tcpstat.tcps_rcvafterclose++;
1323 		goto dropwithreset;
1324 	}
1325 
1326 	/*
1327 	 * If segment ends after window, drop trailing data
1328 	 * (and PUSH and FIN); if nothing left, just ACK.
1329 	 */
1330 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
1331 	if (todrop > 0) {
1332 		tcpstat.tcps_rcvpackafterwin++;
1333 		if (todrop >= tlen) {
1334 			tcpstat.tcps_rcvbyteafterwin += tlen;
1335 			/*
1336 			 * If a new connection request is received
1337 			 * while in TIME_WAIT, drop the old connection
1338 			 * and start over if the sequence numbers
1339 			 * are above the previous ones.
1340 			 */
1341 			if (tiflags & TH_SYN &&
1342 			    tp->t_state == TCPS_TIME_WAIT &&
1343 			    SEQ_GT(th->th_seq, tp->rcv_nxt)) {
1344 				iss = tp->snd_nxt + TCP_ISSINCR;
1345 				tp = tcp_close(tp);
1346 				goto findpcb;
1347 			}
1348 			/*
1349 			 * If window is closed can only take segments at
1350 			 * window edge, and have to drop data and PUSH from
1351 			 * incoming segments.  Continue processing, but
1352 			 * remember to ack.  Otherwise, drop segment
1353 			 * and ack.
1354 			 */
1355 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
1356 				tp->t_flags |= TF_ACKNOW;
1357 				tcpstat.tcps_rcvwinprobe++;
1358 			} else
1359 				goto dropafterack;
1360 		} else
1361 			tcpstat.tcps_rcvbyteafterwin += todrop;
1362 		m_adj(m, -todrop);
1363 		tlen -= todrop;
1364 		tiflags &= ~(TH_PUSH|TH_FIN);
1365 	}
1366 
1367 	/*
1368 	 * If last ACK falls within this segment's sequence numbers,
1369 	 * record its timestamp.
1370 	 * Fix from Braden, see Stevens p. 870
1371 	 */
1372 	if (ts_present && TSTMP_GEQ(ts_val, tp->ts_recent) &&
1373 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
1374 		tp->ts_recent_age = tcp_now;
1375 		tp->ts_recent = ts_val;
1376 	}
1377 
1378 	/*
1379 	 * If the RST bit is set examine the state:
1380 	 *    SYN_RECEIVED STATE:
1381 	 *	If passive open, return to LISTEN state.
1382 	 *	If active open, inform user that connection was refused.
1383 	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
1384 	 *	Inform user that connection was reset, and close tcb.
1385 	 *    CLOSING, LAST_ACK, TIME_WAIT STATES
1386 	 *	Close the tcb.
1387 	 */
1388 	if (tiflags & TH_RST) {
1389 #ifndef INET6
1390 		if (ti->ti_seq != tp->last_ack_sent)
1391 #else
1392 		if (th->th_seq != tp->last_ack_sent)
1393 #endif
1394 			goto drop;
1395 
1396 		switch (tp->t_state) {
1397 		case TCPS_SYN_RECEIVED:
1398 			so->so_error = ECONNREFUSED;
1399 			goto close;
1400 
1401 		case TCPS_ESTABLISHED:
1402 		case TCPS_FIN_WAIT_1:
1403 		case TCPS_FIN_WAIT_2:
1404 		case TCPS_CLOSE_WAIT:
1405 			so->so_error = ECONNRESET;
1406 		close:
1407 			tp->t_state = TCPS_CLOSED;
1408 			tcpstat.tcps_drops++;
1409 			tp = tcp_close(tp);
1410 			goto drop;
1411 		case TCPS_CLOSING:
1412 		case TCPS_LAST_ACK:
1413 		case TCPS_TIME_WAIT:
1414 			tp = tcp_close(tp);
1415 			goto drop;
1416 		}
1417 	}
1418 
1419 	/*
1420 	 * If a SYN is in the window, then this is an
1421 	 * error and we send an RST and drop the connection.
1422 	 */
1423 	if (tiflags & TH_SYN) {
1424 		tp = tcp_drop(tp, ECONNRESET);
1425 		goto dropwithreset;
1426 	}
1427 
1428 	/*
1429 	 * If the ACK bit is off we drop the segment and return.
1430 	 */
1431 	if ((tiflags & TH_ACK) == 0) {
1432 		if (tp->t_flags & TF_ACKNOW)
1433 			goto dropafterack;
1434 		else
1435 			goto drop;
1436 	}
1437 
1438 	/*
1439 	 * Ack processing.
1440 	 */
1441 	switch (tp->t_state) {
1442 
1443 	/*
1444 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
1445 	 * ESTABLISHED state and continue processing.
1446 	 * The ACK was checked above.
1447 	 */
1448 	case TCPS_SYN_RECEIVED:
1449 		tcpstat.tcps_connects++;
1450 		soisconnected(so);
1451 		tp->t_state = TCPS_ESTABLISHED;
1452 		/* Do window scaling? */
1453 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1454 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
1455 			tp->snd_scale = tp->requested_s_scale;
1456 			tp->rcv_scale = tp->request_r_scale;
1457 		}
1458 		(void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0,
1459 				 &tlen);
1460 		tp->snd_wl1 = th->th_seq - 1;
1461 		/* fall into ... */
1462 
1463 	/*
1464 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1465 	 * ACKs.  If the ack is in the range
1466 	 *	tp->snd_una < ti->ti_ack <= tp->snd_max
1467 	 * then advance tp->snd_una to ti->ti_ack and drop
1468 	 * data from the retransmission queue.  If this ACK reflects
1469 	 * more up to date window information we update our window information.
1470 	 */
1471 	case TCPS_ESTABLISHED:
1472 	case TCPS_FIN_WAIT_1:
1473 	case TCPS_FIN_WAIT_2:
1474 	case TCPS_CLOSE_WAIT:
1475 	case TCPS_CLOSING:
1476 	case TCPS_LAST_ACK:
1477 	case TCPS_TIME_WAIT:
1478 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
1479 			/*
1480 			 * Duplicate/old ACK processing.
1481 			 * Increments t_dupacks:
1482 			 *	Pure duplicate (same seq/ack/window, no data)
1483 			 * Doesn't affect t_dupacks:
1484 			 *	Data packets.
1485 			 *	Normal window updates (window opens)
1486 			 * Resets t_dupacks:
1487 			 *	New data ACKed.
1488 			 *	Window shrinks
1489 			 *	Old ACK
1490 			 */
1491 			if (tlen)
1492 				break;
1493 			/*
1494 			 * If we get an old ACK, there is probably packet
1495 			 * reordering going on.  Be conservative and reset
1496 			 * t_dupacks so that we are less agressive in
1497 			 * doing a fast retransmit.
1498 			 */
1499 			if (th->th_ack != tp->snd_una) {
1500 				tp->t_dupacks = 0;
1501 				break;
1502 			}
1503 			if (tiwin == tp->snd_wnd) {
1504 				tcpstat.tcps_rcvdupack++;
1505 				/*
1506 				 * If we have outstanding data (other than
1507 				 * a window probe), this is a completely
1508 				 * duplicate ack (ie, window info didn't
1509 				 * change), the ack is the biggest we've
1510 				 * seen and we've seen exactly our rexmt
1511 				 * threshhold of them, assume a packet
1512 				 * has been dropped and retransmit it.
1513 				 * Kludge snd_nxt & the congestion
1514 				 * window so we send only this one
1515 				 * packet.
1516 				 *
1517 				 * We know we're losing at the current
1518 				 * window size so do congestion avoidance
1519 				 * (set ssthresh to half the current window
1520 				 * and pull our congestion window back to
1521 				 * the new ssthresh).
1522 				 *
1523 				 * Dup acks mean that packets have left the
1524 				 * network (they're now cached at the receiver)
1525 				 * so bump cwnd by the amount in the receiver
1526 				 * to keep a constant cwnd packets in the
1527 				 * network.
1528 				 */
1529 				if (tp->t_timer[TCPT_REXMT] == 0)
1530 					tp->t_dupacks = 0;
1531 #if defined(TCP_SACK) && defined(TCP_FACK)
1532 				/*
1533 				 * In FACK, can enter fast rec. if the receiver
1534 				 * reports a reass. queue longer than 3 segs.
1535 				 */
1536 				else if (++tp->t_dupacks == tcprexmtthresh ||
1537 				    ((SEQ_GT(tp->snd_fack, tcprexmtthresh *
1538 				    tp->t_maxseg + tp->snd_una)) &&
1539 				    SEQ_GT(tp->snd_una, tp->snd_last))) {
1540 #else
1541 				else if (++tp->t_dupacks == tcprexmtthresh) {
1542 #endif /* TCP_FACK */
1543 					tcp_seq onxt = tp->snd_nxt;
1544 					u_long win =
1545 					    ulmin(tp->snd_wnd, tp->snd_cwnd) /
1546 						2 / tp->t_maxseg;
1547 
1548 #if defined(TCP_SACK)
1549 					if (SEQ_LT(th->th_ack, tp->snd_last)){
1550 					    	/*
1551 						 * False fast retx after
1552 						 * timeout.  Do not cut window.
1553 						 */
1554 						tp->snd_cwnd += tp->t_maxseg;
1555 						tp->t_dupacks = 0;
1556 						(void) tcp_output(tp);
1557 						goto drop;
1558 					}
1559 #endif
1560 					if (win < 2)
1561 						win = 2;
1562 					tp->snd_ssthresh = win * tp->t_maxseg;
1563 #if defined(TCP_SACK)
1564 					tp->snd_last = tp->snd_max;
1565 #endif
1566 #ifdef TCP_SACK
1567                     			if (!tp->sack_disable) {
1568 						tp->t_timer[TCPT_REXMT] = 0;
1569 						tp->t_rtt = 0;
1570 						tcpstat.tcps_sndrexmitfast++;
1571 #if defined(TCP_SACK) && defined(TCP_FACK)
1572 						(void) tcp_output(tp);
1573 						/*
1574 						 * During FR, snd_cwnd is held
1575 						 * constant for FACK.
1576 						 */
1577 						tp->snd_cwnd = tp->snd_ssthresh;
1578 						tp->t_dupacks = tcprexmtthresh;
1579 #else
1580 						/*
1581 						 * tcp_output() will send
1582 						 * oldest SACK-eligible rtx.
1583 						 */
1584 						(void) tcp_output(tp);
1585 						tp->snd_cwnd = tp->snd_ssthresh+
1586 					           tp->t_maxseg * tp->t_dupacks;
1587 #endif /* TCP_FACK */
1588 						goto drop;
1589 					}
1590 #endif /* TCP_SACK */
1591 					tp->t_timer[TCPT_REXMT] = 0;
1592 					tp->t_rtt = 0;
1593 					tp->snd_nxt = th->th_ack;
1594 					tp->snd_cwnd = tp->t_maxseg;
1595 					tcpstat.tcps_sndrexmitfast++;
1596 					(void) tcp_output(tp);
1597 
1598 					tp->snd_cwnd = tp->snd_ssthresh +
1599 					    tp->t_maxseg * tp->t_dupacks;
1600 					if (SEQ_GT(onxt, tp->snd_nxt))
1601 						tp->snd_nxt = onxt;
1602 					goto drop;
1603 				} else if (tp->t_dupacks > tcprexmtthresh) {
1604 #if defined(TCP_SACK) && defined(TCP_FACK)
1605 					/*
1606 					 * while (awnd < cwnd)
1607 					 *         sendsomething();
1608 					 */
1609 					if (!tp->sack_disable) {
1610 						if (tp->snd_awnd < tp->snd_cwnd)
1611 							tcp_output(tp);
1612 						goto drop;
1613 					}
1614 #endif /* TCP_FACK */
1615 					tp->snd_cwnd += tp->t_maxseg;
1616 					(void) tcp_output(tp);
1617 					goto drop;
1618 				}
1619 			} else if (tiwin < tp->snd_wnd) {
1620 				/*
1621 				 * The window was retracted!  Previous dup
1622 				 * ACKs may have been due to packets arriving
1623 				 * after the shrunken window, not a missing
1624 				 * packet, so play it safe and reset t_dupacks
1625 				 */
1626 				tp->t_dupacks = 0;
1627 			}
1628 			break;
1629 		}
1630 		/*
1631 		 * If the congestion window was inflated to account
1632 		 * for the other side's cached packets, retract it.
1633 		 */
1634 #if defined(TCP_SACK)
1635 		if (!tp->sack_disable) {
1636 			if (tp->t_dupacks >= tcprexmtthresh) {
1637 				/* Check for a partial ACK */
1638 				if (tcp_sack_partialack(tp, th)) {
1639 #if defined(TCP_SACK) && defined(TCP_FACK)
1640 					/* Force call to tcp_output */
1641 					if (tp->snd_awnd < tp->snd_cwnd)
1642 						needoutput = 1;
1643 #else
1644 					tp->snd_cwnd += tp->t_maxseg;
1645 					needoutput = 1;
1646 #endif /* TCP_FACK */
1647 				} else {
1648 					/* Out of fast recovery */
1649 					tp->snd_cwnd = tp->snd_ssthresh;
1650 					if (tcp_seq_subtract(tp->snd_max,
1651 					    th->th_ack) < tp->snd_ssthresh)
1652 						tp->snd_cwnd =
1653 						   tcp_seq_subtract(tp->snd_max,
1654 					           th->th_ack) + tp->t_maxseg;
1655 					tp->t_dupacks = 0;
1656 #if defined(TCP_SACK) && defined(TCP_FACK)
1657 					if (SEQ_GT(th->th_ack, tp->snd_fack))
1658 						tp->snd_fack = th->th_ack;
1659 #endif /* TCP_FACK */
1660 				}
1661 			}
1662 		} else {
1663 			if (tp->t_dupacks >= tcprexmtthresh &&
1664 			    !tcp_newreno(tp, th)) {
1665 				/* Out of fast recovery */
1666 				tp->snd_cwnd = tp->snd_ssthresh;
1667 				if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
1668 			  	    tp->snd_ssthresh)
1669 					tp->snd_cwnd =
1670 					    tcp_seq_subtract(tp->snd_max,
1671 					    th->th_ack) + tp->t_maxseg;
1672 				tp->t_dupacks = 0;
1673 			}
1674 		}
1675 		if (tp->t_dupacks < tcprexmtthresh)
1676 			tp->t_dupacks = 0;
1677 #else /* else no TCP_SACK */
1678 		if (tp->t_dupacks >= tcprexmtthresh &&
1679 		    tp->snd_cwnd > tp->snd_ssthresh)
1680 			tp->snd_cwnd = tp->snd_ssthresh;
1681 		tp->t_dupacks = 0;
1682 #endif
1683 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
1684 			tcpstat.tcps_rcvacktoomuch++;
1685 			goto dropafterack;
1686 		}
1687 		acked = th->th_ack - tp->snd_una;
1688 		tcpstat.tcps_rcvackpack++;
1689 		tcpstat.tcps_rcvackbyte += acked;
1690 
1691 		/*
1692 		 * If we have a timestamp reply, update smoothed
1693 		 * round trip time.  If no timestamp is present but
1694 		 * transmit timer is running and timed sequence
1695 		 * number was acked, update smoothed round trip time.
1696 		 * Since we now have an rtt measurement, cancel the
1697 		 * timer backoff (cf., Phil Karn's retransmit alg.).
1698 		 * Recompute the initial retransmit timer.
1699 		 */
1700 		if (ts_present)
1701 			tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1702 		else if (tp->t_rtt && SEQ_GT(th->th_ack, tp->t_rtseq))
1703 			tcp_xmit_timer(tp,tp->t_rtt);
1704 
1705 		/*
1706 		 * If all outstanding data is acked, stop retransmit
1707 		 * timer and remember to restart (more output or persist).
1708 		 * If there is more data to be acked, restart retransmit
1709 		 * timer, using current (possibly backed-off) value.
1710 		 */
1711 		if (th->th_ack == tp->snd_max) {
1712 			tp->t_timer[TCPT_REXMT] = 0;
1713 			needoutput = 1;
1714 		} else if (tp->t_timer[TCPT_PERSIST] == 0)
1715 			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1716 		/*
1717 		 * When new data is acked, open the congestion window.
1718 		 * If the window gives us less than ssthresh packets
1719 		 * in flight, open exponentially (maxseg per packet).
1720 		 * Otherwise open linearly: maxseg per window
1721 		 * (maxseg^2 / cwnd per packet).
1722 		 */
1723 		{
1724 		register u_int cw = tp->snd_cwnd;
1725 		register u_int incr = tp->t_maxseg;
1726 
1727 		if (cw > tp->snd_ssthresh)
1728 			incr = incr * incr / cw;
1729 #if defined (TCP_SACK)
1730 		if (SEQ_GEQ(th->th_ack, tp->snd_last))
1731 #endif
1732 		tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1733 		}
1734 		ND6_HINT(tp);
1735 		if (acked > so->so_snd.sb_cc) {
1736 			tp->snd_wnd -= so->so_snd.sb_cc;
1737 			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
1738 			ourfinisacked = 1;
1739 		} else {
1740 			sbdrop(&so->so_snd, acked);
1741 			tp->snd_wnd -= acked;
1742 			ourfinisacked = 0;
1743 		}
1744 		if (sb_notify(&so->so_snd))
1745 			sowwakeup(so);
1746 		tp->snd_una = th->th_ack;
1747 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1748 			tp->snd_nxt = tp->snd_una;
1749 #if defined (TCP_SACK) && defined (TCP_FACK)
1750 		if (SEQ_GT(tp->snd_una, tp->snd_fack))
1751 			tp->snd_fack = tp->snd_una;
1752 #endif
1753 
1754 		switch (tp->t_state) {
1755 
1756 		/*
1757 		 * In FIN_WAIT_1 STATE in addition to the processing
1758 		 * for the ESTABLISHED state if our FIN is now acknowledged
1759 		 * then enter FIN_WAIT_2.
1760 		 */
1761 		case TCPS_FIN_WAIT_1:
1762 			if (ourfinisacked) {
1763 				/*
1764 				 * If we can't receive any more
1765 				 * data, then closing user can proceed.
1766 				 * Starting the timer is contrary to the
1767 				 * specification, but if we don't get a FIN
1768 				 * we'll hang forever.
1769 				 */
1770 				if (so->so_state & SS_CANTRCVMORE) {
1771 					soisdisconnected(so);
1772 					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1773 				}
1774 				tp->t_state = TCPS_FIN_WAIT_2;
1775 			}
1776 			break;
1777 
1778 		/*
1779 		 * In CLOSING STATE in addition to the processing for
1780 		 * the ESTABLISHED state if the ACK acknowledges our FIN
1781 		 * then enter the TIME-WAIT state, otherwise ignore
1782 		 * the segment.
1783 		 */
1784 		case TCPS_CLOSING:
1785 			if (ourfinisacked) {
1786 				tp->t_state = TCPS_TIME_WAIT;
1787 				tcp_canceltimers(tp);
1788 				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1789 				soisdisconnected(so);
1790 			}
1791 			break;
1792 
1793 		/*
1794 		 * In LAST_ACK, we may still be waiting for data to drain
1795 		 * and/or to be acked, as well as for the ack of our FIN.
1796 		 * If our FIN is now acknowledged, delete the TCB,
1797 		 * enter the closed state and return.
1798 		 */
1799 		case TCPS_LAST_ACK:
1800 			if (ourfinisacked) {
1801 				tp = tcp_close(tp);
1802 				goto drop;
1803 			}
1804 			break;
1805 
1806 		/*
1807 		 * In TIME_WAIT state the only thing that should arrive
1808 		 * is a retransmission of the remote FIN.  Acknowledge
1809 		 * it and restart the finack timer.
1810 		 */
1811 		case TCPS_TIME_WAIT:
1812 			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1813 			goto dropafterack;
1814 		}
1815 	}
1816 
1817 step6:
1818 	/*
1819 	 * Update window information.
1820 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
1821 	 */
1822 	if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) ||
1823 	    (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack)) ||
1824 	    (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))) {
1825 		/* keep track of pure window updates */
1826 		if (tlen == 0 &&
1827 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
1828 			tcpstat.tcps_rcvwinupd++;
1829 		tp->snd_wnd = tiwin;
1830 		tp->snd_wl1 = th->th_seq;
1831 		tp->snd_wl2 = th->th_ack;
1832 		if (tp->snd_wnd > tp->max_sndwnd)
1833 			tp->max_sndwnd = tp->snd_wnd;
1834 		needoutput = 1;
1835 	}
1836 
1837 	/*
1838 	 * Process segments with URG.
1839 	 */
1840 	if ((tiflags & TH_URG) && th->th_urp &&
1841 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1842 		/*
1843 		 * This is a kludge, but if we receive and accept
1844 		 * random urgent pointers, we'll crash in
1845 		 * soreceive.  It's hard to imagine someone
1846 		 * actually wanting to send this much urgent data.
1847 		 */
1848 		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
1849 			th->th_urp = 0;			/* XXX */
1850 			tiflags &= ~TH_URG;		/* XXX */
1851 			goto dodata;			/* XXX */
1852 		}
1853 		/*
1854 		 * If this segment advances the known urgent pointer,
1855 		 * then mark the data stream.  This should not happen
1856 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1857 		 * a FIN has been received from the remote side.
1858 		 * In these states we ignore the URG.
1859 		 *
1860 		 * According to RFC961 (Assigned Protocols),
1861 		 * the urgent pointer points to the last octet
1862 		 * of urgent data.  We continue, however,
1863 		 * to consider it to indicate the first octet
1864 		 * of data past the urgent section as the original
1865 		 * spec states (in one of two places).
1866 		 */
1867 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
1868 			tp->rcv_up = th->th_seq + th->th_urp;
1869 			so->so_oobmark = so->so_rcv.sb_cc +
1870 			    (tp->rcv_up - tp->rcv_nxt) - 1;
1871 			if (so->so_oobmark == 0)
1872 				so->so_state |= SS_RCVATMARK;
1873 			sohasoutofband(so);
1874 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1875 		}
1876 		/*
1877 		 * Remove out of band data so doesn't get presented to user.
1878 		 * This can happen independent of advancing the URG pointer,
1879 		 * but if two URG's are pending at once, some out-of-band
1880 		 * data may creep in... ick.
1881 		 */
1882 		if (th->th_urp <= (u_int16_t) tlen
1883 #ifdef SO_OOBINLINE
1884 		     && (so->so_options & SO_OOBINLINE) == 0
1885 #endif
1886 		     )
1887 		        tcp_pulloutofband(so, th->th_urp, m, hdroptlen);
1888 	} else
1889 		/*
1890 		 * If no out of band data is expected,
1891 		 * pull receive urgent pointer along
1892 		 * with the receive window.
1893 		 */
1894 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1895 			tp->rcv_up = tp->rcv_nxt;
1896 dodata:							/* XXX */
1897 
1898 	/*
1899 	 * Process the segment text, merging it into the TCP sequencing queue,
1900 	 * and arranging for acknowledgment of receipt if necessary.
1901 	 * This process logically involves adjusting tp->rcv_wnd as data
1902 	 * is presented to the user (this happens in tcp_usrreq.c,
1903 	 * case PRU_RCVD).  If a FIN has already been received on this
1904 	 * connection then we just ignore the text.
1905 	 */
1906 	if ((tlen || (tiflags & TH_FIN)) &&
1907 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1908 		if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL &&
1909 		    tp->t_state == TCPS_ESTABLISHED) {
1910 			if (th->th_flags & TH_PUSH)
1911 				tp->t_flags |= TF_ACKNOW;
1912 			else
1913 				tp->t_flags |= TF_DELACK;
1914 			tp->rcv_nxt += tlen;
1915 			tiflags = th->th_flags & TH_FIN;
1916 			tcpstat.tcps_rcvpack++;
1917 			tcpstat.tcps_rcvbyte += tlen;
1918 			ND6_HINT(tp);
1919 			m_adj(m, hdroptlen);
1920 			sbappend(&so->so_rcv, m);
1921 			sorwakeup(so);
1922 		} else {
1923 			m_adj(m, hdroptlen);
1924 			tiflags = tcp_reass(tp, th, m, &tlen);
1925 			tp->t_flags |= TF_ACKNOW;
1926 		}
1927 #ifdef TCP_SACK
1928 		if (!tp->sack_disable)
1929 			tcp_update_sack_list(tp);
1930 #endif
1931 
1932 		/*
1933 		 * variable len never referenced again in modern BSD,
1934 		 * so why bother computing it ??
1935 		 */
1936 #if 0
1937 		/*
1938 		 * Note the amount of data that peer has sent into
1939 		 * our window, in order to estimate the sender's
1940 		 * buffer size.
1941 		 */
1942 		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
1943 #endif /* 0 */
1944 	} else {
1945 		m_freem(m);
1946 		tiflags &= ~TH_FIN;
1947 	}
1948 
1949 	/*
1950 	 * If FIN is received ACK the FIN and let the user know
1951 	 * that the connection is closing.  Ignore a FIN received before
1952 	 * the connection is fully established.
1953 	 */
1954 	if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
1955 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1956 			socantrcvmore(so);
1957 			tp->t_flags |= TF_ACKNOW;
1958 			tp->rcv_nxt++;
1959 		}
1960 		switch (tp->t_state) {
1961 
1962 		/*
1963 		 * In ESTABLISHED STATE enter the CLOSE_WAIT state.
1964 		 */
1965 		case TCPS_ESTABLISHED:
1966 			tp->t_state = TCPS_CLOSE_WAIT;
1967 			break;
1968 
1969 		/*
1970 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
1971 		 * enter the CLOSING state.
1972 		 */
1973 		case TCPS_FIN_WAIT_1:
1974 			tp->t_state = TCPS_CLOSING;
1975 			break;
1976 
1977 		/*
1978 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1979 		 * starting the time-wait timer, turning off the other
1980 		 * standard timers.
1981 		 */
1982 		case TCPS_FIN_WAIT_2:
1983 			tp->t_state = TCPS_TIME_WAIT;
1984 			tcp_canceltimers(tp);
1985 			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1986 			soisdisconnected(so);
1987 			break;
1988 
1989 		/*
1990 		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
1991 		 */
1992 		case TCPS_TIME_WAIT:
1993 			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1994 			break;
1995 		}
1996 	}
1997 	if (so->so_options & SO_DEBUG) {
1998 #ifdef INET6
1999 		if (tp->pf == PF_INET6)
2000 			tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen);
2001 		else
2002 #endif /* INET6 */
2003 			tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen);
2004 	}
2005 
2006 	/*
2007 	 * Return any desired output.
2008 	 */
2009 	if (needoutput || (tp->t_flags & TF_ACKNOW)) {
2010 		(void) tcp_output(tp);
2011 	}
2012 	return;
2013 
2014 dropafterack:
2015 	/*
2016 	 * Generate an ACK dropping incoming segment if it occupies
2017 	 * sequence space, where the ACK reflects our state.
2018 	 */
2019 	if (tiflags & TH_RST)
2020 		goto drop;
2021 	m_freem(m);
2022 	tp->t_flags |= TF_ACKNOW;
2023 	(void) tcp_output(tp);
2024 	return;
2025 
2026 dropwithreset:
2027 	/*
2028 	 * Generate a RST, dropping incoming segment.
2029 	 * Make ACK acceptable to originator of segment.
2030 	 * Don't bother to respond if destination was broadcast/multicast.
2031 	 */
2032 	if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
2033 	  goto drop;
2034 #ifdef INET6
2035 	if (is_ipv6) {
2036 	  /* For following calls to tcp_respond */
2037 	  ti = mtod(m, struct tcpiphdr *);
2038 	  if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst))
2039 	    goto drop;
2040 	} else {
2041 #endif /* INET6 */
2042 	    if (IN_MULTICAST(ti->ti_dst.s_addr))
2043 	      goto drop;
2044 #ifdef INET6
2045 	}
2046 #endif /* INET6 */
2047 	if (tiflags & TH_ACK)
2048 		tcp_respond(tp, (caddr_t) ti, m, (tcp_seq)0, th->th_ack, TH_RST);
2049 	else {
2050 		if (tiflags & TH_SYN)
2051 			tlen++;
2052 		tcp_respond(tp, (caddr_t) ti, m, th->th_seq+tlen, (tcp_seq)0,
2053 		    TH_RST|TH_ACK);
2054 	}
2055 	/* destroy temporarily created socket */
2056 	if (dropsocket)
2057 		(void) soabort(so);
2058 	return;
2059 
2060 drop:
2061 	/*
2062 	 * Drop space held by incoming segment and return.
2063 	 */
2064 	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) {
2065 #ifdef INET6
2066 	  if (tp->pf == PF_INET6)
2067 	    tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen);
2068 	  else
2069 #endif /* INET6 */
2070 	    tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen);
2071 	}
2072 
2073 	m_freem(m);
2074 	/* destroy temporarily created socket */
2075 	if (dropsocket)
2076 		(void) soabort(so);
2077 	return;
2078 #ifndef TUBA_INCLUDE
2079 }
2080 
2081 void
2082 tcp_dooptions(tp, cp, cnt, th, ts_present, ts_val, ts_ecr)
2083 	struct tcpcb *tp;
2084 	u_char *cp;
2085 	int cnt;
2086 	struct tcphdr *th;
2087 	int *ts_present;
2088 	u_int32_t *ts_val, *ts_ecr;
2089 {
2090 	u_int16_t mss = 0;
2091 	int opt, optlen;
2092 
2093 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
2094 		opt = cp[0];
2095 		if (opt == TCPOPT_EOL)
2096 			break;
2097 		if (opt == TCPOPT_NOP)
2098 			optlen = 1;
2099 		else {
2100 			optlen = cp[1];
2101 			if (optlen <= 0)
2102 				break;
2103 		}
2104 		switch (opt) {
2105 
2106 		default:
2107 			continue;
2108 
2109 		case TCPOPT_MAXSEG:
2110 			if (optlen != TCPOLEN_MAXSEG)
2111 				continue;
2112 			if (!(th->th_flags & TH_SYN))
2113 				continue;
2114 			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
2115 			NTOHS(mss);
2116 			break;
2117 
2118 		case TCPOPT_WINDOW:
2119 			if (optlen != TCPOLEN_WINDOW)
2120 				continue;
2121 			if (!(th->th_flags & TH_SYN))
2122 				continue;
2123 			tp->t_flags |= TF_RCVD_SCALE;
2124 			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
2125 			break;
2126 
2127 		case TCPOPT_TIMESTAMP:
2128 			if (optlen != TCPOLEN_TIMESTAMP)
2129 				continue;
2130 			*ts_present = 1;
2131 			bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
2132 			NTOHL(*ts_val);
2133 			bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
2134 			NTOHL(*ts_ecr);
2135 
2136 			/*
2137 			 * A timestamp received in a SYN makes
2138 			 * it ok to send timestamp requests and replies.
2139 			 */
2140 			if (th->th_flags & TH_SYN) {
2141 				tp->t_flags |= TF_RCVD_TSTMP;
2142 				tp->ts_recent = *ts_val;
2143 				tp->ts_recent_age = tcp_now;
2144 			}
2145 			break;
2146 
2147 #ifdef TCP_SACK
2148 		case TCPOPT_SACK_PERMITTED:
2149 			if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED)
2150 				continue;
2151 			if (th->th_flags & TH_SYN)
2152 				/* MUST only be set on SYN */
2153 				tp->t_flags |= TF_SACK_PERMIT;
2154 			break;
2155 		case TCPOPT_SACK:
2156 			if (tcp_sack_option(tp, th, cp, optlen))
2157 				continue;
2158 			break;
2159 #endif
2160 		}
2161 	}
2162 	/* Update t_maxopd and t_maxseg after all options are processed */
2163 	if (th->th_flags & TH_SYN)
2164 		(void) tcp_mss(tp, mss);	/* sets t_maxseg */
2165 }
2166 
2167 #if defined(TCP_SACK)
2168 u_long
2169 tcp_seq_subtract(a, b)
2170 	u_long a, b;
2171 {
2172 	return ((long)(a - b));
2173 }
2174 #endif
2175 
2176 
2177 #ifdef TCP_SACK
2178 /*
2179  * This function is called upon receipt of new valid data (while not in header
2180  * prediction mode), and it updates the ordered list of sacks.
2181  */
2182 void
2183 tcp_update_sack_list(tp)
2184 	struct tcpcb *tp;
2185 {
2186 	/*
2187 	 * First reported block MUST be the most recent one.  Subsequent
2188 	 * blocks SHOULD be in the order in which they arrived at the
2189 	 * receiver.  These two conditions make the implementation fully
2190 	 * compliant with RFC 2018.
2191 	 */
2192 	int i, j = 0, count = 0, lastpos = -1;
2193 	struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
2194 
2195 	/* First clean up current list of sacks */
2196 	for (i = 0; i < tp->rcv_numsacks; i++) {
2197 		sack = tp->sackblks[i];
2198 		if (sack.start == 0 && sack.end == 0) {
2199 			count++; /* count = number of blocks to be discarded */
2200 			continue;
2201 		}
2202 		if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
2203 			tp->sackblks[i].start = tp->sackblks[i].end = 0;
2204 			count++;
2205 		} else {
2206 			temp[j].start = tp->sackblks[i].start;
2207 			temp[j++].end = tp->sackblks[i].end;
2208 		}
2209 	}
2210 	tp->rcv_numsacks -= count;
2211 	if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
2212 		tcp_clean_sackreport(tp);
2213 		if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) {
2214 			/* ==> need first sack block */
2215 			tp->sackblks[0].start = tp->rcv_laststart;
2216 			tp->sackblks[0].end = tp->rcv_lastend;
2217 			tp->rcv_numsacks = 1;
2218 		}
2219 		return;
2220 	}
2221 	/* Otherwise, sack blocks are already present. */
2222 	for (i = 0; i < tp->rcv_numsacks; i++)
2223 		tp->sackblks[i] = temp[i]; /* first copy back sack list */
2224 	if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend))
2225 		return;     /* sack list remains unchanged */
2226 	/*
2227 	 * From here, segment just received should be (part of) the 1st sack.
2228 	 * Go through list, possibly coalescing sack block entries.
2229 	 */
2230 	firstsack.start = tp->rcv_laststart;
2231 	firstsack.end = tp->rcv_lastend;
2232 	for (i = 0; i < tp->rcv_numsacks; i++) {
2233 		sack = tp->sackblks[i];
2234 		if (SEQ_LT(sack.end, firstsack.start) ||
2235 		    SEQ_GT(sack.start, firstsack.end))
2236 			continue; /* no overlap */
2237 		if (sack.start == firstsack.start && sack.end == firstsack.end){
2238 			/*
2239 			 * identical block; delete it here since we will
2240 			 * move it to the front of the list.
2241 			 */
2242 			tp->sackblks[i].start = tp->sackblks[i].end = 0;
2243 			lastpos = i;    /* last posn with a zero entry */
2244 			continue;
2245 		}
2246 		if (SEQ_LEQ(sack.start, firstsack.start))
2247 			firstsack.start = sack.start; /* merge blocks */
2248 		if (SEQ_GEQ(sack.end, firstsack.end))
2249 			firstsack.end = sack.end;     /* merge blocks */
2250 		tp->sackblks[i].start = tp->sackblks[i].end = 0;
2251 		lastpos = i;    /* last posn with a zero entry */
2252 	}
2253 	if (lastpos != -1) {    /* at least one merge */
2254 		for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
2255 			sack = tp->sackblks[i];
2256 			if (sack.start == 0 && sack.end == 0)
2257 				continue;
2258 			temp[j++] = sack;
2259 		}
2260 		tp->rcv_numsacks = j; /* including first blk (added later) */
2261 		for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
2262 			tp->sackblks[i] = temp[i];
2263 	} else {        /* no merges -- shift sacks by 1 */
2264 		if (tp->rcv_numsacks < MAX_SACK_BLKS)
2265 			tp->rcv_numsacks++;
2266 		for (i = tp->rcv_numsacks-1; i > 0; i--)
2267 			tp->sackblks[i] = tp->sackblks[i-1];
2268 	}
2269 	tp->sackblks[0] = firstsack;
2270 	return;
2271 }
2272 
2273 /*
2274  * Process the TCP SACK option.  Returns 1 if tcp_dooptions() should continue,
2275  * and 0 otherwise, if the option was fine.  tp->snd_holes is an ordered list
2276  * of holes (oldest to newest, in terms of the sequence space).
2277  */
2278 int
2279 tcp_sack_option(tp, th, cp, optlen)
2280 	struct tcpcb *tp;
2281 	struct tcphdr *th;
2282 	u_char *cp;
2283 	int    optlen;
2284 {
2285 	int tmp_olen;
2286 	u_char *tmp_cp;
2287 	struct sackhole *cur, *p, *temp;
2288 
2289 	if (tp->sack_disable)
2290 		return 1;
2291 
2292 	/* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
2293 	if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
2294 		return 1;
2295 	tmp_cp = cp + 2;
2296 	tmp_olen = optlen - 2;
2297 	if (tp->snd_numholes < 0)
2298 		tp->snd_numholes = 0;
2299 	if (tp->t_maxseg == 0)
2300 		panic("tcp_sack_option"); /* Should never happen */
2301 	while (tmp_olen > 0) {
2302 		struct sackblk sack;
2303 
2304 		bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq));
2305 		NTOHL(sack.start);
2306 		bcopy((char *) tmp_cp + sizeof(tcp_seq),
2307 		    (char *) &(sack.end), sizeof(tcp_seq));
2308 		NTOHL(sack.end);
2309 		tmp_olen -= TCPOLEN_SACK;
2310 		tmp_cp += TCPOLEN_SACK;
2311 		if (SEQ_LEQ(sack.end, sack.start))
2312 			continue; /* bad SACK fields */
2313 		if (SEQ_LEQ(sack.end, tp->snd_una))
2314 			continue; /* old block */
2315 #if defined(TCP_SACK) && defined(TCP_FACK)
2316 		/* Updates snd_fack.  */
2317 		if (SEQ_GEQ(sack.end, tp->snd_fack))
2318 			tp->snd_fack = sack.end;
2319 #endif /* TCP_FACK */
2320 		if (SEQ_GT(th->th_ack, tp->snd_una)) {
2321 			if (SEQ_LT(sack.start, th->th_ack))
2322 				continue;
2323 		} else {
2324 			if (SEQ_LT(sack.start, tp->snd_una))
2325 				continue;
2326 		}
2327 		if (SEQ_GT(sack.end, tp->snd_max))
2328 			continue;
2329 		if (tp->snd_holes == 0) { /* first hole */
2330 			tp->snd_holes = (struct sackhole *)
2331 			    malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT);
2332 			if (tp->snd_holes == NULL) {
2333 				/* ENOBUFS, so ignore SACKed block for now*/
2334 				continue;
2335 			}
2336 			cur = tp->snd_holes;
2337 			cur->start = th->th_ack;
2338 			cur->end = sack.start;
2339 			cur->rxmit = cur->start;
2340 			cur->next = 0;
2341 			tp->snd_numholes = 1;
2342 			tp->rcv_lastsack = sack.end;
2343 			/*
2344 			 * dups is at least one.  If more data has been
2345 			 * SACKed, it can be greater than one.
2346 			 */
2347 			cur->dups = min(tcprexmtthresh,
2348 			    ((sack.end - cur->end)/tp->t_maxseg));
2349 			if (cur->dups < 1)
2350 				cur->dups = 1;
2351 			continue; /* with next sack block */
2352 		}
2353 		/* Go thru list of holes:  p = previous,  cur = current */
2354 		p = cur = tp->snd_holes;
2355 		while (cur) {
2356 			if (SEQ_LEQ(sack.end, cur->start))
2357 				/* SACKs data before the current hole */
2358 				break; /* no use going through more holes */
2359 			if (SEQ_GEQ(sack.start, cur->end)) {
2360 				/* SACKs data beyond the current hole */
2361 				cur->dups++;
2362 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2363 					tcprexmtthresh)
2364 					cur->dups = tcprexmtthresh;
2365 				p = cur;
2366 				cur = cur->next;
2367 				continue;
2368 			}
2369 			if (SEQ_LEQ(sack.start, cur->start)) {
2370 				/* Data acks at least the beginning of hole */
2371 #if defined(TCP_SACK) && defined(TCP_FACK)
2372 				if (SEQ_GT(sack.end, cur->rxmit))
2373 					tp->retran_data -=
2374 				    	    tcp_seq_subtract(cur->rxmit,
2375 					    cur->start);
2376 				else
2377 					tp->retran_data -=
2378 					    tcp_seq_subtract(sack.end,
2379 					    cur->start);
2380 #endif /* TCP_FACK */
2381 				if (SEQ_GEQ(sack.end,cur->end)){
2382 					/* Acks entire hole, so delete hole */
2383 					if (p != cur) {
2384 						p->next = cur->next;
2385 						free(cur, M_PCB);
2386 						cur = p->next;
2387 					} else {
2388 						cur=cur->next;
2389 						free(p, M_PCB);
2390 						p = cur;
2391 						tp->snd_holes = p;
2392 					}
2393 					tp->snd_numholes--;
2394 					continue;
2395 				}
2396 				/* otherwise, move start of hole forward */
2397 				cur->start = sack.end;
2398 				cur->rxmit = max (cur->rxmit, cur->start);
2399 				p = cur;
2400 				cur = cur->next;
2401 				continue;
2402 			}
2403 			/* move end of hole backward */
2404 			if (SEQ_GEQ(sack.end, cur->end)) {
2405 #if defined(TCP_SACK) && defined(TCP_FACK)
2406 				if (SEQ_GT(cur->rxmit, sack.start))
2407 					tp->retran_data -=
2408 					    tcp_seq_subtract(cur->rxmit,
2409 					    sack.start);
2410 #endif /* TCP_FACK */
2411 				cur->end = sack.start;
2412 				cur->rxmit = min (cur->rxmit, cur->end);
2413 				cur->dups++;
2414 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2415 					tcprexmtthresh)
2416 					cur->dups = tcprexmtthresh;
2417 				p = cur;
2418 				cur = cur->next;
2419 				continue;
2420 			}
2421 			if (SEQ_LT(cur->start, sack.start) &&
2422 			    SEQ_GT(cur->end, sack.end)) {
2423 				/*
2424 				 * ACKs some data in middle of a hole; need to
2425 				 * split current hole
2426 				 */
2427 				temp = (struct sackhole *)malloc(sizeof(*temp),
2428 				    M_PCB,M_NOWAIT);
2429 				if (temp == NULL)
2430 					continue; /* ENOBUFS */
2431 #if defined(TCP_SACK) && defined(TCP_FACK)
2432 				if (SEQ_GT(cur->rxmit, sack.end))
2433 					tp->retran_data -=
2434 					    tcp_seq_subtract(sack.end,
2435 					    sack.start);
2436 				else if (SEQ_GT(cur->rxmit, sack.start))
2437 					tp->retran_data -=
2438 					    tcp_seq_subtract(cur->rxmit,
2439 					    sack.start);
2440 #endif /* TCP_FACK */
2441 				temp->next = cur->next;
2442 				temp->start = sack.end;
2443 				temp->end = cur->end;
2444 				temp->dups = cur->dups;
2445 				temp->rxmit = max (cur->rxmit, temp->start);
2446 				cur->end = sack.start;
2447 				cur->rxmit = min (cur->rxmit, cur->end);
2448 				cur->dups++;
2449 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2450 					tcprexmtthresh)
2451 					cur->dups = tcprexmtthresh;
2452 				cur->next = temp;
2453 				p = temp;
2454 				cur = p->next;
2455 				tp->snd_numholes++;
2456 			}
2457 		}
2458 		/* At this point, p points to the last hole on the list */
2459 		if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
2460 			/*
2461 			 * Need to append new hole at end.
2462 			 * Last hole is p (and it's not NULL).
2463 			 */
2464 			temp = (struct sackhole *) malloc(sizeof(*temp),
2465 			    M_PCB, M_NOWAIT);
2466 			if (temp == NULL)
2467 				continue; /* ENOBUFS */
2468 			temp->start = tp->rcv_lastsack;
2469 			temp->end = sack.start;
2470 			temp->dups = min(tcprexmtthresh,
2471 			    ((sack.end - sack.start)/tp->t_maxseg));
2472 			if (temp->dups < 1)
2473 				temp->dups = 1;
2474 			temp->rxmit = temp->start;
2475 			temp->next = 0;
2476 			p->next = temp;
2477 			tp->rcv_lastsack = sack.end;
2478 			tp->snd_numholes++;
2479 		}
2480 	}
2481 #if defined(TCP_SACK) && defined(TCP_FACK)
2482 	/*
2483 	 * Update retran_data and snd_awnd.  Go through the list of
2484 	 * holes.   Increment retran_data by (hole->rxmit - hole->start).
2485 	 */
2486 	tp->retran_data = 0;
2487 	cur = tp->snd_holes;
2488 	while (cur) {
2489 		tp->retran_data += cur->rxmit - cur->start;
2490 		cur = cur->next;
2491 	}
2492 	tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) +
2493 	    tp->retran_data;
2494 #endif /* TCP_FACK */
2495 
2496 	return 0;
2497 }
2498 
2499 /*
2500  * Delete stale (i.e, cumulatively ack'd) holes.  Hole is deleted only if
2501  * it is completely acked; otherwise, tcp_sack_option(), called from
2502  * tcp_dooptions(), will fix up the hole.
2503  */
2504 void
2505 tcp_del_sackholes(tp, th)
2506 	struct tcpcb *tp;
2507 	struct tcphdr *th;
2508 {
2509 	if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) {
2510 		/* max because this could be an older ack just arrived */
2511 		tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
2512 			th->th_ack : tp->snd_una;
2513 		struct sackhole *cur = tp->snd_holes;
2514 		struct sackhole *prev = cur;
2515 		while (cur)
2516 			if (SEQ_LEQ(cur->end, lastack)) {
2517 				cur = cur->next;
2518 				free(prev, M_PCB);
2519 				prev = cur;
2520 				tp->snd_numholes--;
2521 			} else if (SEQ_LT(cur->start, lastack)) {
2522 				cur->start = lastack;
2523 				break;
2524 			} else
2525 				break;
2526 		tp->snd_holes = cur;
2527 	}
2528 }
2529 
2530 /*
2531  * Delete all receiver-side SACK information.
2532  */
2533 void
2534 tcp_clean_sackreport(tp)
2535 	struct tcpcb *tp;
2536 {
2537 	int i;
2538 
2539 	tp->rcv_numsacks = 0;
2540 	for (i = 0; i < MAX_SACK_BLKS; i++)
2541 		tp->sackblks[i].start = tp->sackblks[i].end=0;
2542 
2543 }
2544 
2545 /*
2546  * Checks for partial ack.  If partial ack arrives, turn off retransmission
2547  * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
2548  * If the ack advances at least to tp->snd_last, return 0.
2549  */
2550 int
2551 tcp_sack_partialack(tp, th)
2552 	struct tcpcb *tp;
2553 	struct tcphdr *th;
2554 {
2555 	if (SEQ_LT(th->th_ack, tp->snd_last)) {
2556 		/* Turn off retx. timer (will start again next segment) */
2557 		tp->t_timer[TCPT_REXMT] = 0;
2558 		tp->t_rtt = 0;
2559 #ifndef TCP_FACK
2560 		/*
2561 		 * Partial window deflation.  This statement relies on the
2562 		 * fact that tp->snd_una has not been updated yet.  In FACK
2563 		 * hold snd_cwnd constant during fast recovery.
2564 		 */
2565 		tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
2566 #endif
2567 		return 1;
2568 	}
2569 	return 0;
2570 }
2571 #endif TCP_SACK
2572 
2573 /*
2574  * Pull out of band byte out of a segment so
2575  * it doesn't appear in the user's data queue.
2576  * It is still reflected in the segment length for
2577  * sequencing purposes.
2578  */
2579 void
2580 tcp_pulloutofband(so, urgent, m, off)
2581 	struct socket *so;
2582 	u_int urgent;
2583 	register struct mbuf *m;
2584 	int off;
2585 {
2586         int cnt = off + urgent - 1;
2587 
2588 	while (cnt >= 0) {
2589 		if (m->m_len > cnt) {
2590 			char *cp = mtod(m, caddr_t) + cnt;
2591 			struct tcpcb *tp = sototcpcb(so);
2592 
2593 			tp->t_iobc = *cp;
2594 			tp->t_oobflags |= TCPOOB_HAVEDATA;
2595 			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
2596 			m->m_len--;
2597 			return;
2598 		}
2599 		cnt -= m->m_len;
2600 		m = m->m_next;
2601 		if (m == 0)
2602 			break;
2603 	}
2604 	panic("tcp_pulloutofband");
2605 }
2606 
2607 /*
2608  * Collect new round-trip time estimate
2609  * and update averages and current timeout.
2610  */
2611 void
2612 tcp_xmit_timer(tp, rtt)
2613 	register struct tcpcb *tp;
2614 	short rtt;
2615 {
2616 	register short delta;
2617 	short rttmin;
2618 
2619 	tcpstat.tcps_rttupdated++;
2620 	--rtt;
2621 	if (tp->t_srtt != 0) {
2622 		/*
2623 		 * srtt is stored as fixed point with 3 bits after the
2624 		 * binary point (i.e., scaled by 8).  The following magic
2625 		 * is equivalent to the smoothing algorithm in rfc793 with
2626 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
2627 		 * point).  Adjust rtt to origin 0.
2628 		 */
2629 		delta = (rtt << 2) - (tp->t_srtt >> TCP_RTT_SHIFT);
2630 		if ((tp->t_srtt += delta) <= 0)
2631 			tp->t_srtt = 1;
2632 		/*
2633 		 * We accumulate a smoothed rtt variance (actually, a
2634 		 * smoothed mean difference), then set the retransmit
2635 		 * timer to smoothed rtt + 4 times the smoothed variance.
2636 		 * rttvar is stored as fixed point with 2 bits after the
2637 		 * binary point (scaled by 4).  The following is
2638 		 * equivalent to rfc793 smoothing with an alpha of .75
2639 		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
2640 		 * rfc793's wired-in beta.
2641 		 */
2642 		if (delta < 0)
2643 			delta = -delta;
2644 		delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
2645 		if ((tp->t_rttvar += delta) <= 0)
2646 			tp->t_rttvar = 1;
2647 	} else {
2648 		/*
2649 		 * No rtt measurement yet - use the unsmoothed rtt.
2650 		 * Set the variance to half the rtt (so our first
2651 		 * retransmit happens at 3*rtt).
2652 		 */
2653 		tp->t_srtt = rtt << (TCP_RTT_SHIFT + 2);
2654 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT + 2 - 1);
2655 	}
2656 	tp->t_rtt = 0;
2657 	tp->t_rxtshift = 0;
2658 
2659 	/*
2660 	 * the retransmit should happen at rtt + 4 * rttvar.
2661 	 * Because of the way we do the smoothing, srtt and rttvar
2662 	 * will each average +1/2 tick of bias.  When we compute
2663 	 * the retransmit timer, we want 1/2 tick of rounding and
2664 	 * 1 extra tick because of +-1/2 tick uncertainty in the
2665 	 * firing of the timer.  The bias will give us exactly the
2666 	 * 1.5 tick we need.  But, because the bias is
2667 	 * statistical, we have to test that we don't drop below
2668 	 * the minimum feasible timer (which is 2 ticks).
2669 	 */
2670 	if (tp->t_rttmin > rtt + 2)
2671 		rttmin = tp->t_rttmin;
2672 	else
2673 		rttmin = rtt + 2;
2674 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), rttmin, TCPTV_REXMTMAX);
2675 
2676 	/*
2677 	 * We received an ack for a packet that wasn't retransmitted;
2678 	 * it is probably safe to discard any error indications we've
2679 	 * received recently.  This isn't quite right, but close enough
2680 	 * for now (a route might have failed after we sent a segment,
2681 	 * and the return path might not be symmetrical).
2682 	 */
2683 	tp->t_softerror = 0;
2684 }
2685 
2686 /*
2687  * Determine a reasonable value for maxseg size.
2688  * If the route is known, check route for mtu.
2689  * If none, use an mss that can be handled on the outgoing
2690  * interface without forcing IP to fragment; if bigger than
2691  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
2692  * to utilize large mbufs.  If no route is found, route has no mtu,
2693  * or the destination isn't local, use a default, hopefully conservative
2694  * size (usually 512 or the default IP max size, but no more than the mtu
2695  * of the interface), as we can't discover anything about intervening
2696  * gateways or networks.  We also initialize the congestion/slow start
2697  * window to be a single segment if the destination isn't local.
2698  * While looking at the routing entry, we also initialize other path-dependent
2699  * parameters from pre-set or cached values in the routing entry.
2700  *
2701  * Also take into account the space needed for options that we
2702  * send regularly.  Make maxseg shorter by that amount to assure
2703  * that we can send maxseg amount of data even when the options
2704  * are present.  Store the upper limit of the length of options plus
2705  * data in maxopd.
2706  */
2707 int
2708 tcp_mss(tp, offer)
2709 	register struct tcpcb *tp;
2710 	u_int offer;
2711 {
2712 	struct route *ro;
2713 	register struct rtentry *rt;
2714 	struct ifnet *ifp;
2715 	register int rtt, mss;
2716 	u_long bufsize;
2717 	struct inpcb *inp;
2718 	struct socket *so;
2719 
2720 	inp = tp->t_inpcb;
2721 	ro = &inp->inp_route;
2722 	so = inp->inp_socket;
2723 
2724 	if ((rt = ro->ro_rt) == (struct rtentry *)0) {
2725 		/* No route yet, so try to acquire one */
2726 #ifdef INET6
2727 	  /*
2728 	   * Get a new IPv6 route if an IPv6 destination, otherwise, get
2729 	   * and IPv4 route (including those pesky IPv4-mapped addresses).
2730 	   */
2731 	  bzero(ro,sizeof(struct route_in6));
2732 	  if (sotopf(so) == AF_INET6) {
2733 	    if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) {
2734 	      /* Get an IPv4 route. */
2735 	      ro->ro_dst.sa_family = AF_INET;
2736 	      ro->ro_dst.sa_len = sizeof(ro->ro_dst);
2737 	      ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
2738 		inp->inp_faddr;
2739 	      rtalloc(ro);
2740 	    } else {
2741 	      ro->ro_dst.sa_family = AF_INET6;
2742 	      ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
2743 	      ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
2744 		inp->inp_faddr6;
2745 	      rtalloc(ro);
2746 	    }
2747 	  } else
2748 #endif /* INET6 */
2749 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
2750 			ro->ro_dst.sa_family = AF_INET;
2751 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
2752 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
2753 			rtalloc(ro);
2754 		}
2755 		if ((rt = ro->ro_rt) == (struct rtentry *)0) {
2756 			tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
2757 			return (tcp_mssdflt);
2758 		}
2759 	}
2760 	ifp = rt->rt_ifp;
2761 
2762 #ifdef RTV_MTU	/* if route characteristics exist ... */
2763 	/*
2764 	 * While we're here, check if there's an initial rtt
2765 	 * or rttvar.  Convert from the route-table units
2766 	 * to scaled multiples of the slow timeout timer.
2767 	 */
2768 	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
2769 		/*
2770 		 * XXX the lock bit for MTU indicates that the value
2771 		 * is also a minimum value; this is subject to time.
2772 		 */
2773 		if (rt->rt_rmx.rmx_locks & RTV_RTT)
2774 			TCPT_RANGESET(tp->t_rttmin,
2775 			    rtt / (RTM_RTTUNIT / PR_SLOWHZ),
2776 			    TCPTV_MIN, TCPTV_REXMTMAX);
2777 		tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
2778 		if (rt->rt_rmx.rmx_rttvar)
2779 			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
2780 			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
2781 		else
2782 			/* default variation is +- 1 rtt */
2783 			tp->t_rttvar =
2784 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
2785 		TCPT_RANGESET((long) tp->t_rxtcur,
2786 		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
2787 		    tp->t_rttmin, TCPTV_REXMTMAX);
2788 	}
2789 	/*
2790 	 * if there's an mtu associated with the route, use it
2791 	 */
2792 	if (rt->rt_rmx.rmx_mtu)
2793 #ifdef INET6
2794 	{
2795 	  /*
2796 	   * One may wish to lower MSS to take into account options,
2797 	   * especially security-related options.
2798 	   */
2799 	  if (tp->pf == AF_INET6)
2800 	    mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpipv6hdr);
2801 	  else
2802 #endif /* INET6 */
2803 		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
2804 #ifdef INET6
2805 	}
2806 #endif /* INET6 */
2807 	else
2808 #endif /* RTV_MTU */
2809 	{
2810 	  /*
2811 	   *  ifp may be null and rmx_mtu may be zero in certain
2812 	   *  v6 cases (e.g., if ND wasn't able to resolve the
2813 	   *  destination host.
2814 	   */
2815 		mss = ifp ? ifp->if_mtu - sizeof(struct tcpiphdr) : 0;
2816 #ifdef INET6
2817 		if (tp->pf == AF_INET)
2818 #endif /* INET6 */
2819 		if (!in_localaddr(inp->inp_faddr))
2820 			mss = min(mss, tcp_mssdflt);
2821 	}
2822 	/*
2823 	 * The current mss, t_maxseg, is initialized to the default value.
2824 	 * If we compute a smaller value, reduce the current mss.
2825 	 * If we compute a larger value, return it for use in sending
2826 	 * a max seg size option, but don't store it for use
2827 	 * unless we received an offer at least that large from peer.
2828 	 * However, do not accept offers under 32 bytes.
2829 	 */
2830 	if (offer)
2831 		mss = min(mss, offer);
2832 	mss = max(mss, 64);		/* sanity - at least max opt. space */
2833 	/*
2834 	 * maxopd stores the maximum length of data AND options
2835 	 * in a segment; maxseg is the amount of data in a normal
2836 	 * segment.  We need to store this value (maxopd) apart
2837 	 * from maxseg, because now every segment carries options
2838 	 * and thus we normally have somewhat less data in segments.
2839 	 */
2840 	tp->t_maxopd = mss;
2841 
2842  	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
2843 	    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
2844 		mss -= TCPOLEN_TSTAMP_APPA;
2845 
2846 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
2847 		if (mss > MCLBYTES)
2848 			mss &= ~(MCLBYTES-1);
2849 #else
2850 		if (mss > MCLBYTES)
2851 			mss = mss / MCLBYTES * MCLBYTES;
2852 #endif
2853 	/*
2854 	 * If there's a pipesize, change the socket buffer
2855 	 * to that size.  Make the socket buffers an integral
2856 	 * number of mss units; if the mss is larger than
2857 	 * the socket buffer, decrease the mss.
2858 	 */
2859 #ifdef RTV_SPIPE
2860 	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
2861 #endif
2862 		bufsize = so->so_snd.sb_hiwat;
2863 	if (bufsize < mss)
2864 		mss = bufsize;
2865 	else {
2866 		bufsize = roundup(bufsize, mss);
2867 		if (bufsize > sb_max)
2868 			bufsize = sb_max;
2869 		(void)sbreserve(&so->so_snd, bufsize);
2870 	}
2871 	tp->t_maxseg = mss;
2872 
2873 #ifdef RTV_RPIPE
2874 	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
2875 #endif
2876 		bufsize = so->so_rcv.sb_hiwat;
2877 	if (bufsize > mss) {
2878 		bufsize = roundup(bufsize, mss);
2879 		if (bufsize > sb_max)
2880 			bufsize = sb_max;
2881 		(void)sbreserve(&so->so_rcv, bufsize);
2882 	}
2883 	tp->snd_cwnd = mss;
2884 
2885 #ifdef RTV_SSTHRESH
2886 	if (rt->rt_rmx.rmx_ssthresh) {
2887 		/*
2888 		 * There's some sort of gateway or interface
2889 		 * buffer limit on the path.  Use this to set
2890 		 * the slow start threshhold, but set the
2891 		 * threshold to no less than 2*mss.
2892 		 */
2893 		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
2894 	}
2895 #endif /* RTV_MTU */
2896 	return (mss);
2897 }
2898 #endif /* TUBA_INCLUDE */
2899 
2900 #if defined (TCP_SACK)
2901 /*
2902  * Checks for partial ack.  If partial ack arrives, force the retransmission
2903  * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
2904  * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to
2905  * be started again.  If the ack advances at least to tp->snd_last, return 0.
2906  */
2907 int
2908 tcp_newreno(tp, th)
2909 	struct tcpcb *tp;
2910 	struct tcphdr *th;
2911 {
2912 	if (SEQ_LT(th->th_ack, tp->snd_last)) {
2913 		/*
2914 		 * snd_una has not been updated and the socket send buffer
2915 		 * not yet drained of the acked data, so we have to leave
2916 		 * snd_una as it was to get the correct data offset in
2917 		 * tcp_output().
2918 		 */
2919 		tcp_seq onxt = tp->snd_nxt;
2920 		u_long  ocwnd = tp->snd_cwnd;
2921 		tp->t_timer[TCPT_REXMT] = 0;
2922 		tp->t_rtt = 0;
2923 		tp->snd_nxt = th->th_ack;
2924 		/*
2925 		 * Set snd_cwnd to one segment beyond acknowledged offset
2926 		 * (tp->snd_una not yet updated when this function is called)
2927 		 */
2928 		tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
2929 		(void) tcp_output(tp);
2930 		tp->snd_cwnd = ocwnd;
2931 		if (SEQ_GT(onxt, tp->snd_nxt))
2932 			tp->snd_nxt = onxt;
2933 		/*
2934 		 * Partial window deflation.  Relies on fact that tp->snd_una
2935 		 * not updated yet.
2936 		 */
2937 		tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
2938 		return 1;
2939     }
2940     return 0;
2941 }
2942 #endif /* TCP_SACK */
2943