xref: /onnv-gate/usr/src/uts/common/inet/tcp/tcp_time_wait.c (revision 12175:31bf438b8ab5)
111754SKacheong.Poon@Sun.COM /*
211754SKacheong.Poon@Sun.COM  * CDDL HEADER START
311754SKacheong.Poon@Sun.COM  *
411754SKacheong.Poon@Sun.COM  * The contents of this file are subject to the terms of the
511754SKacheong.Poon@Sun.COM  * Common Development and Distribution License (the "License").
611754SKacheong.Poon@Sun.COM  * You may not use this file except in compliance with the License.
711754SKacheong.Poon@Sun.COM  *
811754SKacheong.Poon@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911754SKacheong.Poon@Sun.COM  * or http://www.opensolaris.org/os/licensing.
1011754SKacheong.Poon@Sun.COM  * See the License for the specific language governing permissions
1111754SKacheong.Poon@Sun.COM  * and limitations under the License.
1211754SKacheong.Poon@Sun.COM  *
1311754SKacheong.Poon@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
1411754SKacheong.Poon@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511754SKacheong.Poon@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
1611754SKacheong.Poon@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
1711754SKacheong.Poon@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
1811754SKacheong.Poon@Sun.COM  *
1911754SKacheong.Poon@Sun.COM  * CDDL HEADER END
2011754SKacheong.Poon@Sun.COM  */
2111754SKacheong.Poon@Sun.COM 
2211754SKacheong.Poon@Sun.COM /*
2312056SKacheong.Poon@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
2411754SKacheong.Poon@Sun.COM  */
2511754SKacheong.Poon@Sun.COM 
2611754SKacheong.Poon@Sun.COM /*
2711754SKacheong.Poon@Sun.COM  * This file contains functions related to TCP time wait processing.  Also
2811754SKacheong.Poon@Sun.COM  * refer to the time wait handling comments in tcp_impl.h.
2911754SKacheong.Poon@Sun.COM  */
3011754SKacheong.Poon@Sun.COM 
3111754SKacheong.Poon@Sun.COM #include <sys/types.h>
3211754SKacheong.Poon@Sun.COM #include <sys/strsun.h>
3311754SKacheong.Poon@Sun.COM #include <sys/squeue_impl.h>
3411754SKacheong.Poon@Sun.COM #include <sys/squeue.h>
3511754SKacheong.Poon@Sun.COM #include <sys/callo.h>
3611754SKacheong.Poon@Sun.COM 
3711754SKacheong.Poon@Sun.COM #include <inet/common.h>
3811754SKacheong.Poon@Sun.COM #include <inet/ip.h>
3911754SKacheong.Poon@Sun.COM #include <inet/tcp.h>
4011754SKacheong.Poon@Sun.COM #include <inet/tcp_impl.h>
4111754SKacheong.Poon@Sun.COM #include <inet/tcp_cluster.h>
4211754SKacheong.Poon@Sun.COM 
4311754SKacheong.Poon@Sun.COM static void	tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
4411754SKacheong.Poon@Sun.COM 
4511754SKacheong.Poon@Sun.COM /*
4611754SKacheong.Poon@Sun.COM  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
4711754SKacheong.Poon@Sun.COM  * Running it every 5 seconds seems to give the best results.
4811754SKacheong.Poon@Sun.COM  */
4911754SKacheong.Poon@Sun.COM #define	TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
5011754SKacheong.Poon@Sun.COM 
5111754SKacheong.Poon@Sun.COM /*
5211754SKacheong.Poon@Sun.COM  * Remove a connection from the list of detached TIME_WAIT connections.
5311754SKacheong.Poon@Sun.COM  * It returns B_FALSE if it can't remove the connection from the list
5411754SKacheong.Poon@Sun.COM  * as the connection has already been removed from the list due to an
5511754SKacheong.Poon@Sun.COM  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
5611754SKacheong.Poon@Sun.COM  */
5711754SKacheong.Poon@Sun.COM boolean_t
tcp_time_wait_remove(tcp_t * tcp,tcp_squeue_priv_t * tcp_time_wait)5811754SKacheong.Poon@Sun.COM tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
5911754SKacheong.Poon@Sun.COM {
6011754SKacheong.Poon@Sun.COM 	boolean_t	locked = B_FALSE;
6111754SKacheong.Poon@Sun.COM 
6211754SKacheong.Poon@Sun.COM 	if (tcp_time_wait == NULL) {
6311754SKacheong.Poon@Sun.COM 		tcp_time_wait = *((tcp_squeue_priv_t **)
6411754SKacheong.Poon@Sun.COM 		    squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
6511754SKacheong.Poon@Sun.COM 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
6611754SKacheong.Poon@Sun.COM 		locked = B_TRUE;
6711754SKacheong.Poon@Sun.COM 	} else {
6811754SKacheong.Poon@Sun.COM 		ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
6911754SKacheong.Poon@Sun.COM 	}
7011754SKacheong.Poon@Sun.COM 
7111754SKacheong.Poon@Sun.COM 	/* 0 means that the tcp_t has not been added to the time wait list. */
7211754SKacheong.Poon@Sun.COM 	if (tcp->tcp_time_wait_expire == 0) {
7311754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_next == NULL);
7411754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_prev == NULL);
7511754SKacheong.Poon@Sun.COM 		if (locked)
7611754SKacheong.Poon@Sun.COM 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
7711754SKacheong.Poon@Sun.COM 		return (B_FALSE);
7811754SKacheong.Poon@Sun.COM 	}
7911754SKacheong.Poon@Sun.COM 	ASSERT(TCP_IS_DETACHED(tcp));
8011754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
8111754SKacheong.Poon@Sun.COM 
8211754SKacheong.Poon@Sun.COM 	if (tcp == tcp_time_wait->tcp_time_wait_head) {
8311754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_prev == NULL);
8411754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
8511754SKacheong.Poon@Sun.COM 		if (tcp_time_wait->tcp_time_wait_head != NULL) {
8611754SKacheong.Poon@Sun.COM 			tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
8711754SKacheong.Poon@Sun.COM 			    NULL;
8811754SKacheong.Poon@Sun.COM 		} else {
8911754SKacheong.Poon@Sun.COM 			tcp_time_wait->tcp_time_wait_tail = NULL;
9011754SKacheong.Poon@Sun.COM 		}
9111754SKacheong.Poon@Sun.COM 	} else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
9211754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_next == NULL);
9311754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
9411754SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
9511754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
9611754SKacheong.Poon@Sun.COM 	} else {
9711754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
9811754SKacheong.Poon@Sun.COM 		ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
9911754SKacheong.Poon@Sun.COM 		tcp->tcp_time_wait_prev->tcp_time_wait_next =
10011754SKacheong.Poon@Sun.COM 		    tcp->tcp_time_wait_next;
10111754SKacheong.Poon@Sun.COM 		tcp->tcp_time_wait_next->tcp_time_wait_prev =
10211754SKacheong.Poon@Sun.COM 		    tcp->tcp_time_wait_prev;
10311754SKacheong.Poon@Sun.COM 	}
10411754SKacheong.Poon@Sun.COM 	tcp->tcp_time_wait_next = NULL;
10511754SKacheong.Poon@Sun.COM 	tcp->tcp_time_wait_prev = NULL;
10611754SKacheong.Poon@Sun.COM 	tcp->tcp_time_wait_expire = 0;
10711754SKacheong.Poon@Sun.COM 
10811754SKacheong.Poon@Sun.COM 	if (locked)
10911754SKacheong.Poon@Sun.COM 		mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
11011754SKacheong.Poon@Sun.COM 	return (B_TRUE);
11111754SKacheong.Poon@Sun.COM }
11211754SKacheong.Poon@Sun.COM 
11311754SKacheong.Poon@Sun.COM /*
11411754SKacheong.Poon@Sun.COM  * Add a connection to the list of detached TIME_WAIT connections
11511754SKacheong.Poon@Sun.COM  * and set its time to expire.
11611754SKacheong.Poon@Sun.COM  */
11711754SKacheong.Poon@Sun.COM void
tcp_time_wait_append(tcp_t * tcp)11811754SKacheong.Poon@Sun.COM tcp_time_wait_append(tcp_t *tcp)
11911754SKacheong.Poon@Sun.COM {
12011754SKacheong.Poon@Sun.COM 	tcp_stack_t	*tcps = tcp->tcp_tcps;
12112056SKacheong.Poon@Sun.COM 	squeue_t	*sqp = tcp->tcp_connp->conn_sqp;
12211754SKacheong.Poon@Sun.COM 	tcp_squeue_priv_t *tcp_time_wait =
12312056SKacheong.Poon@Sun.COM 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
12411754SKacheong.Poon@Sun.COM 
12511754SKacheong.Poon@Sun.COM 	tcp_timers_stop(tcp);
12611754SKacheong.Poon@Sun.COM 
12711754SKacheong.Poon@Sun.COM 	/* Freed above */
12811754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_timer_tid == 0);
12911754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_ack_tid == 0);
13011754SKacheong.Poon@Sun.COM 
13111754SKacheong.Poon@Sun.COM 	/* must have happened at the time of detaching the tcp */
13211754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_ptpahn == NULL);
13311754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_flow_stopped == 0);
13411754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_time_wait_next == NULL);
13511754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_time_wait_prev == NULL);
13612056SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_time_wait_expire == 0);
13711754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_listener == NULL);
13811754SKacheong.Poon@Sun.COM 
13912056SKacheong.Poon@Sun.COM 	tcp->tcp_time_wait_expire = ddi_get_lbolt64();
14011754SKacheong.Poon@Sun.COM 	/*
14112056SKacheong.Poon@Sun.COM 	 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
14212056SKacheong.Poon@Sun.COM 	 * in practice.  Hence it cannot be 0.  Note that zero means that the
14312056SKacheong.Poon@Sun.COM 	 * tcp_t is not in the TIME_WAIT list.
14411754SKacheong.Poon@Sun.COM 	 */
14511754SKacheong.Poon@Sun.COM 	tcp->tcp_time_wait_expire += MSEC_TO_TICK(
14611754SKacheong.Poon@Sun.COM 	    tcps->tcps_time_wait_interval);
14711754SKacheong.Poon@Sun.COM 
14811754SKacheong.Poon@Sun.COM 	ASSERT(TCP_IS_DETACHED(tcp));
14911754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
15011754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_time_wait_next == NULL);
15111754SKacheong.Poon@Sun.COM 	ASSERT(tcp->tcp_time_wait_prev == NULL);
15211754SKacheong.Poon@Sun.COM 	TCP_DBGSTAT(tcps, tcp_time_wait);
15311754SKacheong.Poon@Sun.COM 
15411754SKacheong.Poon@Sun.COM 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
15511754SKacheong.Poon@Sun.COM 	if (tcp_time_wait->tcp_time_wait_head == NULL) {
15611754SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
15711754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_head = tcp;
15812056SKacheong.Poon@Sun.COM 
15912056SKacheong.Poon@Sun.COM 		/*
16012056SKacheong.Poon@Sun.COM 		 * Even if the list was empty before, there may be a timer
16112056SKacheong.Poon@Sun.COM 		 * running since a tcp_t can be removed from the list
16212056SKacheong.Poon@Sun.COM 		 * in other places, such as tcp_clean_death().  So check if
16312056SKacheong.Poon@Sun.COM 		 * a timer is needed.
16412056SKacheong.Poon@Sun.COM 		 */
16512056SKacheong.Poon@Sun.COM 		if (tcp_time_wait->tcp_time_wait_tid == 0) {
16612056SKacheong.Poon@Sun.COM 			tcp_time_wait->tcp_time_wait_tid =
16712056SKacheong.Poon@Sun.COM 			    timeout_generic(CALLOUT_NORMAL,
16812056SKacheong.Poon@Sun.COM 			    tcp_time_wait_collector, sqp,
16912056SKacheong.Poon@Sun.COM 			    (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
17012056SKacheong.Poon@Sun.COM 			    MICROSEC, CALLOUT_TCP_RESOLUTION,
17112056SKacheong.Poon@Sun.COM 			    CALLOUT_FLAG_ROUNDUP);
17212056SKacheong.Poon@Sun.COM 		}
17311754SKacheong.Poon@Sun.COM 	} else {
174*12175SKacheong.Poon@Sun.COM 		/*
175*12175SKacheong.Poon@Sun.COM 		 * The list is not empty, so a timer must be running.  If not,
176*12175SKacheong.Poon@Sun.COM 		 * tcp_time_wait_collector() must be running on this
177*12175SKacheong.Poon@Sun.COM 		 * tcp_time_wait list at the same time.
178*12175SKacheong.Poon@Sun.COM 		 */
179*12175SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
180*12175SKacheong.Poon@Sun.COM 		    tcp_time_wait->tcp_time_wait_running);
18111754SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
18211754SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
18311754SKacheong.Poon@Sun.COM 		    TCPS_TIME_WAIT);
18411754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
18511754SKacheong.Poon@Sun.COM 		tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
18612056SKacheong.Poon@Sun.COM 
18711754SKacheong.Poon@Sun.COM 	}
18811754SKacheong.Poon@Sun.COM 	tcp_time_wait->tcp_time_wait_tail = tcp;
18911754SKacheong.Poon@Sun.COM 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
19011754SKacheong.Poon@Sun.COM }
19111754SKacheong.Poon@Sun.COM 
19211754SKacheong.Poon@Sun.COM /*
19311754SKacheong.Poon@Sun.COM  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
19411754SKacheong.Poon@Sun.COM  * tcp_t.  Used in tcp_time_wait_collector().
19511754SKacheong.Poon@Sun.COM  */
19611754SKacheong.Poon@Sun.COM /* ARGSUSED */
19711754SKacheong.Poon@Sun.COM static void
tcp_timewait_close(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)19811754SKacheong.Poon@Sun.COM tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
19911754SKacheong.Poon@Sun.COM {
20011754SKacheong.Poon@Sun.COM 	conn_t	*connp = (conn_t *)arg;
20111754SKacheong.Poon@Sun.COM 	tcp_t	*tcp = connp->conn_tcp;
20211754SKacheong.Poon@Sun.COM 
20311754SKacheong.Poon@Sun.COM 	ASSERT(tcp != NULL);
20411754SKacheong.Poon@Sun.COM 	if (tcp->tcp_state == TCPS_CLOSED) {
20511754SKacheong.Poon@Sun.COM 		return;
20611754SKacheong.Poon@Sun.COM 	}
20711754SKacheong.Poon@Sun.COM 
20811754SKacheong.Poon@Sun.COM 	ASSERT((connp->conn_family == AF_INET &&
20911754SKacheong.Poon@Sun.COM 	    connp->conn_ipversion == IPV4_VERSION) ||
21011754SKacheong.Poon@Sun.COM 	    (connp->conn_family == AF_INET6 &&
21111754SKacheong.Poon@Sun.COM 	    (connp->conn_ipversion == IPV4_VERSION ||
21211754SKacheong.Poon@Sun.COM 	    connp->conn_ipversion == IPV6_VERSION)));
21311754SKacheong.Poon@Sun.COM 	ASSERT(!tcp->tcp_listener);
21411754SKacheong.Poon@Sun.COM 
21511754SKacheong.Poon@Sun.COM 	ASSERT(TCP_IS_DETACHED(tcp));
21611754SKacheong.Poon@Sun.COM 
21711754SKacheong.Poon@Sun.COM 	/*
21811754SKacheong.Poon@Sun.COM 	 * Because they have no upstream client to rebind or tcp_close()
21911754SKacheong.Poon@Sun.COM 	 * them later, we axe the connection here and now.
22011754SKacheong.Poon@Sun.COM 	 */
22111754SKacheong.Poon@Sun.COM 	tcp_close_detached(tcp);
22211754SKacheong.Poon@Sun.COM }
22311754SKacheong.Poon@Sun.COM 
22411754SKacheong.Poon@Sun.COM /*
22511754SKacheong.Poon@Sun.COM  * Blows away all tcps whose TIME_WAIT has expired. List traversal
22611754SKacheong.Poon@Sun.COM  * is done forwards from the head.
22711754SKacheong.Poon@Sun.COM  * This walks all stack instances since
22811754SKacheong.Poon@Sun.COM  * tcp_time_wait remains global across all stacks.
22911754SKacheong.Poon@Sun.COM  */
23011754SKacheong.Poon@Sun.COM /* ARGSUSED */
23111754SKacheong.Poon@Sun.COM void
tcp_time_wait_collector(void * arg)23211754SKacheong.Poon@Sun.COM tcp_time_wait_collector(void *arg)
23311754SKacheong.Poon@Sun.COM {
23411754SKacheong.Poon@Sun.COM 	tcp_t *tcp;
23512056SKacheong.Poon@Sun.COM 	int64_t now;
23611754SKacheong.Poon@Sun.COM 	mblk_t *mp;
23711754SKacheong.Poon@Sun.COM 	conn_t *connp;
23811754SKacheong.Poon@Sun.COM 	kmutex_t *lock;
23911754SKacheong.Poon@Sun.COM 	boolean_t removed;
24011754SKacheong.Poon@Sun.COM 	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
24111754SKacheong.Poon@Sun.COM 	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
24211754SKacheong.Poon@Sun.COM 
24311754SKacheong.Poon@Sun.COM 	squeue_t *sqp = (squeue_t *)arg;
24411754SKacheong.Poon@Sun.COM 	tcp_squeue_priv_t *tcp_time_wait =
24511754SKacheong.Poon@Sun.COM 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
24611754SKacheong.Poon@Sun.COM 
24711754SKacheong.Poon@Sun.COM 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
24811754SKacheong.Poon@Sun.COM 	tcp_time_wait->tcp_time_wait_tid = 0;
249*12175SKacheong.Poon@Sun.COM #ifdef DEBUG
250*12175SKacheong.Poon@Sun.COM 	tcp_time_wait->tcp_time_wait_running = B_TRUE;
251*12175SKacheong.Poon@Sun.COM #endif
25211754SKacheong.Poon@Sun.COM 
25311754SKacheong.Poon@Sun.COM 	if (tcp_time_wait->tcp_free_list != NULL &&
25411754SKacheong.Poon@Sun.COM 	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
25511754SKacheong.Poon@Sun.COM 		TCP_G_STAT(tcp_freelist_cleanup);
25611754SKacheong.Poon@Sun.COM 		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
25711754SKacheong.Poon@Sun.COM 			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
25811754SKacheong.Poon@Sun.COM 			tcp->tcp_time_wait_next = NULL;
25911754SKacheong.Poon@Sun.COM 			tcp_time_wait->tcp_free_list_cnt--;
26011754SKacheong.Poon@Sun.COM 			ASSERT(tcp->tcp_tcps == NULL);
26111754SKacheong.Poon@Sun.COM 			CONN_DEC_REF(tcp->tcp_connp);
26211754SKacheong.Poon@Sun.COM 		}
26311754SKacheong.Poon@Sun.COM 		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
26411754SKacheong.Poon@Sun.COM 	}
26511754SKacheong.Poon@Sun.COM 
26611754SKacheong.Poon@Sun.COM 	/*
26711754SKacheong.Poon@Sun.COM 	 * In order to reap time waits reliably, we should use a
26811754SKacheong.Poon@Sun.COM 	 * source of time that is not adjustable by the user -- hence
26912056SKacheong.Poon@Sun.COM 	 * the call to ddi_get_lbolt64().
27011754SKacheong.Poon@Sun.COM 	 */
27112056SKacheong.Poon@Sun.COM 	now = ddi_get_lbolt64();
27211754SKacheong.Poon@Sun.COM 	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
27311754SKacheong.Poon@Sun.COM 		/*
27412056SKacheong.Poon@Sun.COM 		 * lbolt64 should not wrap around in practice...  So we can
27512056SKacheong.Poon@Sun.COM 		 * do a direct comparison.
27611754SKacheong.Poon@Sun.COM 		 */
27712056SKacheong.Poon@Sun.COM 		if (now < tcp->tcp_time_wait_expire)
27811754SKacheong.Poon@Sun.COM 			break;
27911754SKacheong.Poon@Sun.COM 
28011754SKacheong.Poon@Sun.COM 		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
28111754SKacheong.Poon@Sun.COM 		ASSERT(removed);
28211754SKacheong.Poon@Sun.COM 
28311754SKacheong.Poon@Sun.COM 		connp = tcp->tcp_connp;
28411754SKacheong.Poon@Sun.COM 		ASSERT(connp->conn_fanout != NULL);
28511754SKacheong.Poon@Sun.COM 		lock = &connp->conn_fanout->connf_lock;
28611754SKacheong.Poon@Sun.COM 		/*
28711754SKacheong.Poon@Sun.COM 		 * This is essentially a TW reclaim fast path optimization for
28811754SKacheong.Poon@Sun.COM 		 * performance where the timewait collector checks under the
28911754SKacheong.Poon@Sun.COM 		 * fanout lock (so that no one else can get access to the
29011754SKacheong.Poon@Sun.COM 		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
29111754SKacheong.Poon@Sun.COM 		 * the classifier hash list. If ref count is indeed 2, we can
29211754SKacheong.Poon@Sun.COM 		 * just remove the conn under the fanout lock and avoid
29311754SKacheong.Poon@Sun.COM 		 * cleaning up the conn under the squeue, provided that
29411754SKacheong.Poon@Sun.COM 		 * clustering callbacks are not enabled. If clustering is
29511754SKacheong.Poon@Sun.COM 		 * enabled, we need to make the clustering callback before
29611754SKacheong.Poon@Sun.COM 		 * setting the CONDEMNED flag and after dropping all locks and
29711754SKacheong.Poon@Sun.COM 		 * so we forego this optimization and fall back to the slow
29811754SKacheong.Poon@Sun.COM 		 * path. Also please see the comments in tcp_closei_local
29911754SKacheong.Poon@Sun.COM 		 * regarding the refcnt logic.
30011754SKacheong.Poon@Sun.COM 		 *
30111754SKacheong.Poon@Sun.COM 		 * Since we are holding the tcp_time_wait_lock, its better
30211754SKacheong.Poon@Sun.COM 		 * not to block on the fanout_lock because other connections
30311754SKacheong.Poon@Sun.COM 		 * can't add themselves to time_wait list. So we do a
30411754SKacheong.Poon@Sun.COM 		 * tryenter instead of mutex_enter.
30511754SKacheong.Poon@Sun.COM 		 */
30611754SKacheong.Poon@Sun.COM 		if (mutex_tryenter(lock)) {
30711754SKacheong.Poon@Sun.COM 			mutex_enter(&connp->conn_lock);
30811754SKacheong.Poon@Sun.COM 			if ((connp->conn_ref == 2) &&
30911754SKacheong.Poon@Sun.COM 			    (cl_inet_disconnect == NULL)) {
31011754SKacheong.Poon@Sun.COM 				ipcl_hash_remove_locked(connp,
31111754SKacheong.Poon@Sun.COM 				    connp->conn_fanout);
31211754SKacheong.Poon@Sun.COM 				/*
31311754SKacheong.Poon@Sun.COM 				 * Set the CONDEMNED flag now itself so that
31411754SKacheong.Poon@Sun.COM 				 * the refcnt cannot increase due to any
31511754SKacheong.Poon@Sun.COM 				 * walker.
31611754SKacheong.Poon@Sun.COM 				 */
31711754SKacheong.Poon@Sun.COM 				connp->conn_state_flags |= CONN_CONDEMNED;
31811754SKacheong.Poon@Sun.COM 				mutex_exit(lock);
31911754SKacheong.Poon@Sun.COM 				mutex_exit(&connp->conn_lock);
32011754SKacheong.Poon@Sun.COM 				if (tcp_time_wait->tcp_free_list_cnt <
32111754SKacheong.Poon@Sun.COM 				    tcp_free_list_max_cnt) {
32211754SKacheong.Poon@Sun.COM 					/* Add to head of tcp_free_list */
32311754SKacheong.Poon@Sun.COM 					mutex_exit(
32411754SKacheong.Poon@Sun.COM 					    &tcp_time_wait->tcp_time_wait_lock);
32511754SKacheong.Poon@Sun.COM 					tcp_cleanup(tcp);
32611754SKacheong.Poon@Sun.COM 					ASSERT(connp->conn_latch == NULL);
32711754SKacheong.Poon@Sun.COM 					ASSERT(connp->conn_policy == NULL);
32811754SKacheong.Poon@Sun.COM 					ASSERT(tcp->tcp_tcps == NULL);
32911754SKacheong.Poon@Sun.COM 					ASSERT(connp->conn_netstack == NULL);
33011754SKacheong.Poon@Sun.COM 
33111754SKacheong.Poon@Sun.COM 					mutex_enter(
33211754SKacheong.Poon@Sun.COM 					    &tcp_time_wait->tcp_time_wait_lock);
33311754SKacheong.Poon@Sun.COM 					tcp->tcp_time_wait_next =
33411754SKacheong.Poon@Sun.COM 					    tcp_time_wait->tcp_free_list;
33511754SKacheong.Poon@Sun.COM 					tcp_time_wait->tcp_free_list = tcp;
33611754SKacheong.Poon@Sun.COM 					tcp_time_wait->tcp_free_list_cnt++;
33711754SKacheong.Poon@Sun.COM 					continue;
33811754SKacheong.Poon@Sun.COM 				} else {
33911754SKacheong.Poon@Sun.COM 					/* Do not add to tcp_free_list */
34011754SKacheong.Poon@Sun.COM 					mutex_exit(
34111754SKacheong.Poon@Sun.COM 					    &tcp_time_wait->tcp_time_wait_lock);
34211754SKacheong.Poon@Sun.COM 					tcp_bind_hash_remove(tcp);
34311754SKacheong.Poon@Sun.COM 					ixa_cleanup(tcp->tcp_connp->conn_ixa);
34411754SKacheong.Poon@Sun.COM 					tcp_ipsec_cleanup(tcp);
34511754SKacheong.Poon@Sun.COM 					CONN_DEC_REF(tcp->tcp_connp);
34611754SKacheong.Poon@Sun.COM 				}
34711754SKacheong.Poon@Sun.COM 			} else {
34811754SKacheong.Poon@Sun.COM 				CONN_INC_REF_LOCKED(connp);
34911754SKacheong.Poon@Sun.COM 				mutex_exit(lock);
35011754SKacheong.Poon@Sun.COM 				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
35111754SKacheong.Poon@Sun.COM 				mutex_exit(&connp->conn_lock);
35211754SKacheong.Poon@Sun.COM 				/*
35311754SKacheong.Poon@Sun.COM 				 * We can reuse the closemp here since conn has
35411754SKacheong.Poon@Sun.COM 				 * detached (otherwise we wouldn't even be in
35511754SKacheong.Poon@Sun.COM 				 * time_wait list). tcp_closemp_used can safely
35611754SKacheong.Poon@Sun.COM 				 * be changed without taking a lock as no other
35711754SKacheong.Poon@Sun.COM 				 * thread can concurrently access it at this
35811754SKacheong.Poon@Sun.COM 				 * point in the connection lifecycle.
35911754SKacheong.Poon@Sun.COM 				 */
36011754SKacheong.Poon@Sun.COM 
36111754SKacheong.Poon@Sun.COM 				if (tcp->tcp_closemp.b_prev == NULL)
36211754SKacheong.Poon@Sun.COM 					tcp->tcp_closemp_used = B_TRUE;
36311754SKacheong.Poon@Sun.COM 				else
36411754SKacheong.Poon@Sun.COM 					cmn_err(CE_PANIC,
36511754SKacheong.Poon@Sun.COM 					    "tcp_timewait_collector: "
36611754SKacheong.Poon@Sun.COM 					    "concurrent use of tcp_closemp: "
36711754SKacheong.Poon@Sun.COM 					    "connp %p tcp %p\n", (void *)connp,
36811754SKacheong.Poon@Sun.COM 					    (void *)tcp);
36911754SKacheong.Poon@Sun.COM 
37011754SKacheong.Poon@Sun.COM 				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
37111754SKacheong.Poon@Sun.COM 				mp = &tcp->tcp_closemp;
37211754SKacheong.Poon@Sun.COM 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
37311754SKacheong.Poon@Sun.COM 				    tcp_timewait_close, connp, NULL,
37411754SKacheong.Poon@Sun.COM 				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
37511754SKacheong.Poon@Sun.COM 			}
37611754SKacheong.Poon@Sun.COM 		} else {
37711754SKacheong.Poon@Sun.COM 			mutex_enter(&connp->conn_lock);
37811754SKacheong.Poon@Sun.COM 			CONN_INC_REF_LOCKED(connp);
37911754SKacheong.Poon@Sun.COM 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
38011754SKacheong.Poon@Sun.COM 			mutex_exit(&connp->conn_lock);
38111754SKacheong.Poon@Sun.COM 			/*
38211754SKacheong.Poon@Sun.COM 			 * We can reuse the closemp here since conn has
38311754SKacheong.Poon@Sun.COM 			 * detached (otherwise we wouldn't even be in
38411754SKacheong.Poon@Sun.COM 			 * time_wait list). tcp_closemp_used can safely
38511754SKacheong.Poon@Sun.COM 			 * be changed without taking a lock as no other
38611754SKacheong.Poon@Sun.COM 			 * thread can concurrently access it at this
38711754SKacheong.Poon@Sun.COM 			 * point in the connection lifecycle.
38811754SKacheong.Poon@Sun.COM 			 */
38911754SKacheong.Poon@Sun.COM 
39011754SKacheong.Poon@Sun.COM 			if (tcp->tcp_closemp.b_prev == NULL)
39111754SKacheong.Poon@Sun.COM 				tcp->tcp_closemp_used = B_TRUE;
39211754SKacheong.Poon@Sun.COM 			else
39311754SKacheong.Poon@Sun.COM 				cmn_err(CE_PANIC, "tcp_timewait_collector: "
39411754SKacheong.Poon@Sun.COM 				    "concurrent use of tcp_closemp: "
39511754SKacheong.Poon@Sun.COM 				    "connp %p tcp %p\n", (void *)connp,
39611754SKacheong.Poon@Sun.COM 				    (void *)tcp);
39711754SKacheong.Poon@Sun.COM 
39811754SKacheong.Poon@Sun.COM 			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
39911754SKacheong.Poon@Sun.COM 			mp = &tcp->tcp_closemp;
40011754SKacheong.Poon@Sun.COM 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
40111754SKacheong.Poon@Sun.COM 			    tcp_timewait_close, connp, NULL,
40211754SKacheong.Poon@Sun.COM 			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
40311754SKacheong.Poon@Sun.COM 		}
40411754SKacheong.Poon@Sun.COM 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
40511754SKacheong.Poon@Sun.COM 	}
40611754SKacheong.Poon@Sun.COM 
40711754SKacheong.Poon@Sun.COM 	if (tcp_time_wait->tcp_free_list != NULL)
40811754SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
40911754SKacheong.Poon@Sun.COM 
41012056SKacheong.Poon@Sun.COM 	/*
41112056SKacheong.Poon@Sun.COM 	 * If the time wait list is not empty and there is no timer running,
41212056SKacheong.Poon@Sun.COM 	 * restart it.
41312056SKacheong.Poon@Sun.COM 	 */
41412056SKacheong.Poon@Sun.COM 	if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
41512056SKacheong.Poon@Sun.COM 	    tcp_time_wait->tcp_time_wait_tid == 0) {
41612056SKacheong.Poon@Sun.COM 		hrtime_t firetime;
41712056SKacheong.Poon@Sun.COM 
41812056SKacheong.Poon@Sun.COM 		firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
41912056SKacheong.Poon@Sun.COM 		/* This ensures that we won't wake up too often. */
42012056SKacheong.Poon@Sun.COM 		firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
42112056SKacheong.Poon@Sun.COM 		tcp_time_wait->tcp_time_wait_tid =
42212056SKacheong.Poon@Sun.COM 		    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
42312056SKacheong.Poon@Sun.COM 		    sqp, firetime, CALLOUT_TCP_RESOLUTION,
42412056SKacheong.Poon@Sun.COM 		    CALLOUT_FLAG_ROUNDUP);
42512056SKacheong.Poon@Sun.COM 	}
426*12175SKacheong.Poon@Sun.COM #ifdef DEBUG
427*12175SKacheong.Poon@Sun.COM 	tcp_time_wait->tcp_time_wait_running = B_FALSE;
428*12175SKacheong.Poon@Sun.COM #endif
42911754SKacheong.Poon@Sun.COM 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
43011754SKacheong.Poon@Sun.COM }
43111754SKacheong.Poon@Sun.COM 
43211754SKacheong.Poon@Sun.COM /*
43311754SKacheong.Poon@Sun.COM  * tcp_time_wait_processing() handles processing of incoming packets when
43411754SKacheong.Poon@Sun.COM  * the tcp_t is in the TIME_WAIT state.
43511754SKacheong.Poon@Sun.COM  *
43611754SKacheong.Poon@Sun.COM  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
43711754SKacheong.Poon@Sun.COM  * detached state) is never put on the time wait list.
43811754SKacheong.Poon@Sun.COM  */
43911754SKacheong.Poon@Sun.COM void
tcp_time_wait_processing(tcp_t * tcp,mblk_t * mp,uint32_t seg_seq,uint32_t seg_ack,int seg_len,tcpha_t * tcpha,ip_recv_attr_t * ira)44011754SKacheong.Poon@Sun.COM tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
44111754SKacheong.Poon@Sun.COM     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
44211754SKacheong.Poon@Sun.COM {
44311754SKacheong.Poon@Sun.COM 	int32_t		bytes_acked;
44411754SKacheong.Poon@Sun.COM 	int32_t		gap;
44511754SKacheong.Poon@Sun.COM 	int32_t		rgap;
44611754SKacheong.Poon@Sun.COM 	tcp_opt_t	tcpopt;
44711754SKacheong.Poon@Sun.COM 	uint_t		flags;
44811754SKacheong.Poon@Sun.COM 	uint32_t	new_swnd = 0;
44911754SKacheong.Poon@Sun.COM 	conn_t		*nconnp;
45011754SKacheong.Poon@Sun.COM 	conn_t		*connp = tcp->tcp_connp;
45111754SKacheong.Poon@Sun.COM 	tcp_stack_t	*tcps = tcp->tcp_tcps;
45211754SKacheong.Poon@Sun.COM 
45311754SKacheong.Poon@Sun.COM 	BUMP_LOCAL(tcp->tcp_ibsegs);
45411754SKacheong.Poon@Sun.COM 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
45511754SKacheong.Poon@Sun.COM 
45611754SKacheong.Poon@Sun.COM 	flags = (unsigned int)tcpha->tha_flags & 0xFF;
45711754SKacheong.Poon@Sun.COM 	new_swnd = ntohs(tcpha->tha_win) <<
45811754SKacheong.Poon@Sun.COM 	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
45911754SKacheong.Poon@Sun.COM 	if (tcp->tcp_snd_ts_ok) {
46011754SKacheong.Poon@Sun.COM 		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
46111754SKacheong.Poon@Sun.COM 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
46211754SKacheong.Poon@Sun.COM 			    tcp->tcp_rnxt, TH_ACK);
46311754SKacheong.Poon@Sun.COM 			goto done;
46411754SKacheong.Poon@Sun.COM 		}
46511754SKacheong.Poon@Sun.COM 	}
46611754SKacheong.Poon@Sun.COM 	gap = seg_seq - tcp->tcp_rnxt;
46711754SKacheong.Poon@Sun.COM 	rgap = tcp->tcp_rwnd - (gap + seg_len);
46811754SKacheong.Poon@Sun.COM 	if (gap < 0) {
46911754SKacheong.Poon@Sun.COM 		TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
47011754SKacheong.Poon@Sun.COM 		TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
47111754SKacheong.Poon@Sun.COM 		    (seg_len > -gap ? -gap : seg_len));
47211754SKacheong.Poon@Sun.COM 		seg_len += gap;
47311754SKacheong.Poon@Sun.COM 		if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
47411754SKacheong.Poon@Sun.COM 			if (flags & TH_RST) {
47511754SKacheong.Poon@Sun.COM 				goto done;
47611754SKacheong.Poon@Sun.COM 			}
47711754SKacheong.Poon@Sun.COM 			if ((flags & TH_FIN) && seg_len == -1) {
47811754SKacheong.Poon@Sun.COM 				/*
47911754SKacheong.Poon@Sun.COM 				 * When TCP receives a duplicate FIN in
48011754SKacheong.Poon@Sun.COM 				 * TIME_WAIT state, restart the 2 MSL timer.
48111754SKacheong.Poon@Sun.COM 				 * See page 73 in RFC 793. Make sure this TCP
48211754SKacheong.Poon@Sun.COM 				 * is already on the TIME_WAIT list. If not,
48311754SKacheong.Poon@Sun.COM 				 * just restart the timer.
48411754SKacheong.Poon@Sun.COM 				 */
48511754SKacheong.Poon@Sun.COM 				if (TCP_IS_DETACHED(tcp)) {
48611754SKacheong.Poon@Sun.COM 					if (tcp_time_wait_remove(tcp, NULL) ==
48711754SKacheong.Poon@Sun.COM 					    B_TRUE) {
48811754SKacheong.Poon@Sun.COM 						tcp_time_wait_append(tcp);
48911754SKacheong.Poon@Sun.COM 						TCP_DBGSTAT(tcps,
49011754SKacheong.Poon@Sun.COM 						    tcp_rput_time_wait);
49111754SKacheong.Poon@Sun.COM 					}
49211754SKacheong.Poon@Sun.COM 				} else {
49311754SKacheong.Poon@Sun.COM 					ASSERT(tcp != NULL);
49411754SKacheong.Poon@Sun.COM 					TCP_TIMER_RESTART(tcp,
49511754SKacheong.Poon@Sun.COM 					    tcps->tcps_time_wait_interval);
49611754SKacheong.Poon@Sun.COM 				}
49711754SKacheong.Poon@Sun.COM 				tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
49811754SKacheong.Poon@Sun.COM 				    tcp->tcp_rnxt, TH_ACK);
49911754SKacheong.Poon@Sun.COM 				goto done;
50011754SKacheong.Poon@Sun.COM 			}
50111754SKacheong.Poon@Sun.COM 			flags |=  TH_ACK_NEEDED;
50211754SKacheong.Poon@Sun.COM 			seg_len = 0;
50311754SKacheong.Poon@Sun.COM 			goto process_ack;
50411754SKacheong.Poon@Sun.COM 		}
50511754SKacheong.Poon@Sun.COM 
50611754SKacheong.Poon@Sun.COM 		/* Fix seg_seq, and chew the gap off the front. */
50711754SKacheong.Poon@Sun.COM 		seg_seq = tcp->tcp_rnxt;
50811754SKacheong.Poon@Sun.COM 	}
50911754SKacheong.Poon@Sun.COM 
51011754SKacheong.Poon@Sun.COM 	if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
51111754SKacheong.Poon@Sun.COM 		/*
51211754SKacheong.Poon@Sun.COM 		 * Make sure that when we accept the connection, pick
51311754SKacheong.Poon@Sun.COM 		 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
51411754SKacheong.Poon@Sun.COM 		 * old connection.
51511754SKacheong.Poon@Sun.COM 		 *
51611754SKacheong.Poon@Sun.COM 		 * The next ISS generated is equal to tcp_iss_incr_extra
51711754SKacheong.Poon@Sun.COM 		 * + ISS_INCR/2 + other components depending on the
51811754SKacheong.Poon@Sun.COM 		 * value of tcp_strong_iss.  We pre-calculate the new
51911754SKacheong.Poon@Sun.COM 		 * ISS here and compare with tcp_snxt to determine if
52011754SKacheong.Poon@Sun.COM 		 * we need to make adjustment to tcp_iss_incr_extra.
52111754SKacheong.Poon@Sun.COM 		 *
52211754SKacheong.Poon@Sun.COM 		 * The above calculation is ugly and is a
52311754SKacheong.Poon@Sun.COM 		 * waste of CPU cycles...
52411754SKacheong.Poon@Sun.COM 		 */
52511754SKacheong.Poon@Sun.COM 		uint32_t new_iss = tcps->tcps_iss_incr_extra;
52611754SKacheong.Poon@Sun.COM 		int32_t adj;
52711754SKacheong.Poon@Sun.COM 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
52811754SKacheong.Poon@Sun.COM 
52911754SKacheong.Poon@Sun.COM 		switch (tcps->tcps_strong_iss) {
53011754SKacheong.Poon@Sun.COM 		case 2: {
53111754SKacheong.Poon@Sun.COM 			/* Add time and MD5 components. */
53211754SKacheong.Poon@Sun.COM 			uint32_t answer[4];
53311754SKacheong.Poon@Sun.COM 			struct {
53411754SKacheong.Poon@Sun.COM 				uint32_t ports;
53511754SKacheong.Poon@Sun.COM 				in6_addr_t src;
53611754SKacheong.Poon@Sun.COM 				in6_addr_t dst;
53711754SKacheong.Poon@Sun.COM 			} arg;
53811754SKacheong.Poon@Sun.COM 			MD5_CTX context;
53911754SKacheong.Poon@Sun.COM 
54011754SKacheong.Poon@Sun.COM 			mutex_enter(&tcps->tcps_iss_key_lock);
54111754SKacheong.Poon@Sun.COM 			context = tcps->tcps_iss_key;
54211754SKacheong.Poon@Sun.COM 			mutex_exit(&tcps->tcps_iss_key_lock);
54311754SKacheong.Poon@Sun.COM 			arg.ports = connp->conn_ports;
54411754SKacheong.Poon@Sun.COM 			/* We use MAPPED addresses in tcp_iss_init */
54511754SKacheong.Poon@Sun.COM 			arg.src = connp->conn_laddr_v6;
54611754SKacheong.Poon@Sun.COM 			arg.dst = connp->conn_faddr_v6;
54711754SKacheong.Poon@Sun.COM 			MD5Update(&context, (uchar_t *)&arg,
54811754SKacheong.Poon@Sun.COM 			    sizeof (arg));
54911754SKacheong.Poon@Sun.COM 			MD5Final((uchar_t *)answer, &context);
55011754SKacheong.Poon@Sun.COM 			answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
55111754SKacheong.Poon@Sun.COM 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
55211754SKacheong.Poon@Sun.COM 			break;
55311754SKacheong.Poon@Sun.COM 		}
55411754SKacheong.Poon@Sun.COM 		case 1:
55511754SKacheong.Poon@Sun.COM 			/* Add time component and min random (i.e. 1). */
55611754SKacheong.Poon@Sun.COM 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
55711754SKacheong.Poon@Sun.COM 			break;
55811754SKacheong.Poon@Sun.COM 		default:
55911754SKacheong.Poon@Sun.COM 			/* Add only time component. */
56011754SKacheong.Poon@Sun.COM 			new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
56111754SKacheong.Poon@Sun.COM 			break;
56211754SKacheong.Poon@Sun.COM 		}
56311754SKacheong.Poon@Sun.COM 		if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
56411754SKacheong.Poon@Sun.COM 			/*
56511754SKacheong.Poon@Sun.COM 			 * New ISS not guaranteed to be ISS_INCR/2
56611754SKacheong.Poon@Sun.COM 			 * ahead of the current tcp_snxt, so add the
56711754SKacheong.Poon@Sun.COM 			 * difference to tcp_iss_incr_extra.
56811754SKacheong.Poon@Sun.COM 			 */
56911754SKacheong.Poon@Sun.COM 			tcps->tcps_iss_incr_extra += adj;
57011754SKacheong.Poon@Sun.COM 		}
57111754SKacheong.Poon@Sun.COM 		/*
57211754SKacheong.Poon@Sun.COM 		 * If tcp_clean_death() can not perform the task now,
57311754SKacheong.Poon@Sun.COM 		 * drop the SYN packet and let the other side re-xmit.
57411754SKacheong.Poon@Sun.COM 		 * Otherwise pass the SYN packet back in, since the
57511754SKacheong.Poon@Sun.COM 		 * old tcp state has been cleaned up or freed.
57611754SKacheong.Poon@Sun.COM 		 */
57711754SKacheong.Poon@Sun.COM 		if (tcp_clean_death(tcp, 0) == -1)
57811754SKacheong.Poon@Sun.COM 			goto done;
57911754SKacheong.Poon@Sun.COM 		nconnp = ipcl_classify(mp, ira, ipst);
58011754SKacheong.Poon@Sun.COM 		if (nconnp != NULL) {
58111754SKacheong.Poon@Sun.COM 			TCP_STAT(tcps, tcp_time_wait_syn_success);
58211754SKacheong.Poon@Sun.COM 			/* Drops ref on nconnp */
58311754SKacheong.Poon@Sun.COM 			tcp_reinput(nconnp, mp, ira, ipst);
58411754SKacheong.Poon@Sun.COM 			return;
58511754SKacheong.Poon@Sun.COM 		}
58611754SKacheong.Poon@Sun.COM 		goto done;
58711754SKacheong.Poon@Sun.COM 	}
58811754SKacheong.Poon@Sun.COM 
58911754SKacheong.Poon@Sun.COM 	/*
59011754SKacheong.Poon@Sun.COM 	 * rgap is the amount of stuff received out of window.  A negative
59111754SKacheong.Poon@Sun.COM 	 * value is the amount out of window.
59211754SKacheong.Poon@Sun.COM 	 */
59311754SKacheong.Poon@Sun.COM 	if (rgap < 0) {
59411754SKacheong.Poon@Sun.COM 		TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
59511754SKacheong.Poon@Sun.COM 		TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
59611754SKacheong.Poon@Sun.COM 		/* Fix seg_len and make sure there is something left. */
59711754SKacheong.Poon@Sun.COM 		seg_len += rgap;
59811754SKacheong.Poon@Sun.COM 		if (seg_len <= 0) {
59911754SKacheong.Poon@Sun.COM 			if (flags & TH_RST) {
60011754SKacheong.Poon@Sun.COM 				goto done;
60111754SKacheong.Poon@Sun.COM 			}
60211754SKacheong.Poon@Sun.COM 			flags |=  TH_ACK_NEEDED;
60311754SKacheong.Poon@Sun.COM 			seg_len = 0;
60411754SKacheong.Poon@Sun.COM 			goto process_ack;
60511754SKacheong.Poon@Sun.COM 		}
60611754SKacheong.Poon@Sun.COM 	}
60711754SKacheong.Poon@Sun.COM 	/*
60811754SKacheong.Poon@Sun.COM 	 * Check whether we can update tcp_ts_recent.  This test is
60911754SKacheong.Poon@Sun.COM 	 * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
61011754SKacheong.Poon@Sun.COM 	 * Extensions for High Performance: An Update", Internet Draft.
61111754SKacheong.Poon@Sun.COM 	 */
61211754SKacheong.Poon@Sun.COM 	if (tcp->tcp_snd_ts_ok &&
61311754SKacheong.Poon@Sun.COM 	    TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
61411754SKacheong.Poon@Sun.COM 	    SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
61511754SKacheong.Poon@Sun.COM 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
61611754SKacheong.Poon@Sun.COM 		tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
61711754SKacheong.Poon@Sun.COM 	}
61811754SKacheong.Poon@Sun.COM 
61911754SKacheong.Poon@Sun.COM 	if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
62011754SKacheong.Poon@Sun.COM 		/* Always ack out of order packets */
62111754SKacheong.Poon@Sun.COM 		flags |= TH_ACK_NEEDED;
62211754SKacheong.Poon@Sun.COM 		seg_len = 0;
62311754SKacheong.Poon@Sun.COM 	} else if (seg_len > 0) {
62411754SKacheong.Poon@Sun.COM 		TCPS_BUMP_MIB(tcps, tcpInClosed);
62511754SKacheong.Poon@Sun.COM 		TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
62611754SKacheong.Poon@Sun.COM 		TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
62711754SKacheong.Poon@Sun.COM 	}
62811754SKacheong.Poon@Sun.COM 	if (flags & TH_RST) {
62911754SKacheong.Poon@Sun.COM 		(void) tcp_clean_death(tcp, 0);
63011754SKacheong.Poon@Sun.COM 		goto done;
63111754SKacheong.Poon@Sun.COM 	}
63211754SKacheong.Poon@Sun.COM 	if (flags & TH_SYN) {
63311754SKacheong.Poon@Sun.COM 		tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
63411754SKacheong.Poon@Sun.COM 		    TH_RST|TH_ACK);
63511754SKacheong.Poon@Sun.COM 		/*
63611754SKacheong.Poon@Sun.COM 		 * Do not delete the TCP structure if it is in
63711754SKacheong.Poon@Sun.COM 		 * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
63811754SKacheong.Poon@Sun.COM 		 */
63911754SKacheong.Poon@Sun.COM 		goto done;
64011754SKacheong.Poon@Sun.COM 	}
64111754SKacheong.Poon@Sun.COM process_ack:
64211754SKacheong.Poon@Sun.COM 	if (flags & TH_ACK) {
64311754SKacheong.Poon@Sun.COM 		bytes_acked = (int)(seg_ack - tcp->tcp_suna);
64411754SKacheong.Poon@Sun.COM 		if (bytes_acked <= 0) {
64511754SKacheong.Poon@Sun.COM 			if (bytes_acked == 0 && seg_len == 0 &&
64611754SKacheong.Poon@Sun.COM 			    new_swnd == tcp->tcp_swnd)
64711754SKacheong.Poon@Sun.COM 				TCPS_BUMP_MIB(tcps, tcpInDupAck);
64811754SKacheong.Poon@Sun.COM 		} else {
64911754SKacheong.Poon@Sun.COM 			/* Acks something not sent */
65011754SKacheong.Poon@Sun.COM 			flags |= TH_ACK_NEEDED;
65111754SKacheong.Poon@Sun.COM 		}
65211754SKacheong.Poon@Sun.COM 	}
65311754SKacheong.Poon@Sun.COM 	if (flags & TH_ACK_NEEDED) {
65411754SKacheong.Poon@Sun.COM 		/*
65511754SKacheong.Poon@Sun.COM 		 * Time to send an ack for some reason.
65611754SKacheong.Poon@Sun.COM 		 */
65711754SKacheong.Poon@Sun.COM 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
65811754SKacheong.Poon@Sun.COM 		    tcp->tcp_rnxt, TH_ACK);
65911754SKacheong.Poon@Sun.COM 	}
66011754SKacheong.Poon@Sun.COM done:
66111754SKacheong.Poon@Sun.COM 	freemsg(mp);
66211754SKacheong.Poon@Sun.COM }
663