xref: /onnv-gate/usr/src/uts/common/inet/tcp/tcp_tpi.c (revision 12644:4f9a0cd40c5f)
111755SKacheong.Poon@Sun.COM /*
211755SKacheong.Poon@Sun.COM  * CDDL HEADER START
311755SKacheong.Poon@Sun.COM  *
411755SKacheong.Poon@Sun.COM  * The contents of this file are subject to the terms of the
511755SKacheong.Poon@Sun.COM  * Common Development and Distribution License (the "License").
611755SKacheong.Poon@Sun.COM  * You may not use this file except in compliance with the License.
711755SKacheong.Poon@Sun.COM  *
811755SKacheong.Poon@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911755SKacheong.Poon@Sun.COM  * or http://www.opensolaris.org/os/licensing.
1011755SKacheong.Poon@Sun.COM  * See the License for the specific language governing permissions
1111755SKacheong.Poon@Sun.COM  * and limitations under the License.
1211755SKacheong.Poon@Sun.COM  *
1311755SKacheong.Poon@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
1411755SKacheong.Poon@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511755SKacheong.Poon@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
1611755SKacheong.Poon@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
1711755SKacheong.Poon@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
1811755SKacheong.Poon@Sun.COM  *
1911755SKacheong.Poon@Sun.COM  * CDDL HEADER END
2011755SKacheong.Poon@Sun.COM  */
2111755SKacheong.Poon@Sun.COM 
2211755SKacheong.Poon@Sun.COM /*
2312643SAnders.Persson@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
2411755SKacheong.Poon@Sun.COM  */
2511755SKacheong.Poon@Sun.COM 
2611755SKacheong.Poon@Sun.COM /* This files contains all TCP TLI/TPI related functions */
2711755SKacheong.Poon@Sun.COM 
2811755SKacheong.Poon@Sun.COM #include <sys/types.h>
2911755SKacheong.Poon@Sun.COM #include <sys/stream.h>
3011755SKacheong.Poon@Sun.COM #include <sys/strsun.h>
3111755SKacheong.Poon@Sun.COM #include <sys/strsubr.h>
3211755SKacheong.Poon@Sun.COM #include <sys/stropts.h>
3311755SKacheong.Poon@Sun.COM #include <sys/strlog.h>
3411755SKacheong.Poon@Sun.COM #define	_SUN_TPI_VERSION 2
3511755SKacheong.Poon@Sun.COM #include <sys/tihdr.h>
3611755SKacheong.Poon@Sun.COM #include <sys/suntpi.h>
3711755SKacheong.Poon@Sun.COM #include <sys/xti_inet.h>
3811755SKacheong.Poon@Sun.COM #include <sys/squeue_impl.h>
3911755SKacheong.Poon@Sun.COM #include <sys/squeue.h>
4011755SKacheong.Poon@Sun.COM 
4111755SKacheong.Poon@Sun.COM #include <inet/common.h>
4211755SKacheong.Poon@Sun.COM #include <inet/ip.h>
4311755SKacheong.Poon@Sun.COM #include <inet/tcp.h>
4411755SKacheong.Poon@Sun.COM #include <inet/tcp_impl.h>
4511755SKacheong.Poon@Sun.COM #include <inet/proto_set.h>
4611755SKacheong.Poon@Sun.COM 
4711755SKacheong.Poon@Sun.COM static void	tcp_accept_swap(tcp_t *, tcp_t *, tcp_t *);
4811755SKacheong.Poon@Sun.COM static int	tcp_conprim_opt_process(tcp_t *, mblk_t *, int *, int *, int *);
4911755SKacheong.Poon@Sun.COM 
5011755SKacheong.Poon@Sun.COM void
tcp_use_pure_tpi(tcp_t * tcp)5111755SKacheong.Poon@Sun.COM tcp_use_pure_tpi(tcp_t *tcp)
5211755SKacheong.Poon@Sun.COM {
5311755SKacheong.Poon@Sun.COM 	conn_t		*connp = tcp->tcp_connp;
5411755SKacheong.Poon@Sun.COM 
5511755SKacheong.Poon@Sun.COM #ifdef	_ILP32
5611755SKacheong.Poon@Sun.COM 	tcp->tcp_acceptor_id = (t_uscalar_t)connp->conn_rq;
5711755SKacheong.Poon@Sun.COM #else
5811755SKacheong.Poon@Sun.COM 	tcp->tcp_acceptor_id = connp->conn_dev;
5911755SKacheong.Poon@Sun.COM #endif
6011755SKacheong.Poon@Sun.COM 	/*
6111755SKacheong.Poon@Sun.COM 	 * Insert this socket into the acceptor hash.
6211755SKacheong.Poon@Sun.COM 	 * We might need it for T_CONN_RES message
6311755SKacheong.Poon@Sun.COM 	 */
6411755SKacheong.Poon@Sun.COM 	tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
6511755SKacheong.Poon@Sun.COM 
6611755SKacheong.Poon@Sun.COM 	tcp->tcp_issocket = B_FALSE;
6711755SKacheong.Poon@Sun.COM 	TCP_STAT(tcp->tcp_tcps, tcp_sock_fallback);
6811755SKacheong.Poon@Sun.COM }
6911755SKacheong.Poon@Sun.COM 
7011755SKacheong.Poon@Sun.COM /* Shorthand to generate and send TPI error acks to our client */
7111755SKacheong.Poon@Sun.COM void
tcp_err_ack(tcp_t * tcp,mblk_t * mp,int t_error,int sys_error)7211755SKacheong.Poon@Sun.COM tcp_err_ack(tcp_t *tcp, mblk_t *mp, int t_error, int sys_error)
7311755SKacheong.Poon@Sun.COM {
7411755SKacheong.Poon@Sun.COM 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
7511755SKacheong.Poon@Sun.COM 		putnext(tcp->tcp_connp->conn_rq, mp);
7611755SKacheong.Poon@Sun.COM }
7711755SKacheong.Poon@Sun.COM 
7811755SKacheong.Poon@Sun.COM /* Shorthand to generate and send TPI error acks to our client */
7911755SKacheong.Poon@Sun.COM void
tcp_err_ack_prim(tcp_t * tcp,mblk_t * mp,int primitive,int t_error,int sys_error)8011755SKacheong.Poon@Sun.COM tcp_err_ack_prim(tcp_t *tcp, mblk_t *mp, int primitive,
8111755SKacheong.Poon@Sun.COM     int t_error, int sys_error)
8211755SKacheong.Poon@Sun.COM {
8311755SKacheong.Poon@Sun.COM 	struct T_error_ack	*teackp;
8411755SKacheong.Poon@Sun.COM 
8511755SKacheong.Poon@Sun.COM 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
8611755SKacheong.Poon@Sun.COM 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
8711755SKacheong.Poon@Sun.COM 		teackp = (struct T_error_ack *)mp->b_rptr;
8811755SKacheong.Poon@Sun.COM 		teackp->ERROR_prim = primitive;
8911755SKacheong.Poon@Sun.COM 		teackp->TLI_error = t_error;
9011755SKacheong.Poon@Sun.COM 		teackp->UNIX_error = sys_error;
9111755SKacheong.Poon@Sun.COM 		putnext(tcp->tcp_connp->conn_rq, mp);
9211755SKacheong.Poon@Sun.COM 	}
9311755SKacheong.Poon@Sun.COM }
9411755SKacheong.Poon@Sun.COM 
9511755SKacheong.Poon@Sun.COM /*
9611755SKacheong.Poon@Sun.COM  * TCP routine to get the values of options.
9711755SKacheong.Poon@Sun.COM  */
9811755SKacheong.Poon@Sun.COM int
tcp_tpi_opt_get(queue_t * q,int level,int name,uchar_t * ptr)9911755SKacheong.Poon@Sun.COM tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
10011755SKacheong.Poon@Sun.COM {
10111755SKacheong.Poon@Sun.COM 	return (tcp_opt_get(Q_TO_CONN(q), level, name, ptr));
10211755SKacheong.Poon@Sun.COM }
10311755SKacheong.Poon@Sun.COM 
10411755SKacheong.Poon@Sun.COM /* ARGSUSED */
10511755SKacheong.Poon@Sun.COM int
tcp_tpi_opt_set(queue_t * q,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)10611755SKacheong.Poon@Sun.COM tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
10711755SKacheong.Poon@Sun.COM     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
10811755SKacheong.Poon@Sun.COM     void *thisdg_attrs, cred_t *cr)
10911755SKacheong.Poon@Sun.COM {
11011755SKacheong.Poon@Sun.COM 	conn_t	*connp =  Q_TO_CONN(q);
11111755SKacheong.Poon@Sun.COM 
11211755SKacheong.Poon@Sun.COM 	return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp,
11311755SKacheong.Poon@Sun.COM 	    outlenp, outvalp, thisdg_attrs, cr));
11411755SKacheong.Poon@Sun.COM }
11511755SKacheong.Poon@Sun.COM 
11611755SKacheong.Poon@Sun.COM static int
tcp_conprim_opt_process(tcp_t * tcp,mblk_t * mp,int * do_disconnectp,int * t_errorp,int * sys_errorp)11711755SKacheong.Poon@Sun.COM tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
11811755SKacheong.Poon@Sun.COM     int *t_errorp, int *sys_errorp)
11911755SKacheong.Poon@Sun.COM {
12011755SKacheong.Poon@Sun.COM 	int error;
12111755SKacheong.Poon@Sun.COM 	int is_absreq_failure;
12211755SKacheong.Poon@Sun.COM 	t_scalar_t *opt_lenp;
12311755SKacheong.Poon@Sun.COM 	t_scalar_t opt_offset;
12411755SKacheong.Poon@Sun.COM 	int prim_type;
12511755SKacheong.Poon@Sun.COM 	struct T_conn_req *tcreqp;
12611755SKacheong.Poon@Sun.COM 	struct T_conn_res *tcresp;
12711755SKacheong.Poon@Sun.COM 	cred_t *cr;
12811755SKacheong.Poon@Sun.COM 
12911755SKacheong.Poon@Sun.COM 	/*
13011755SKacheong.Poon@Sun.COM 	 * All Solaris components should pass a db_credp
13111755SKacheong.Poon@Sun.COM 	 * for this TPI message, hence we ASSERT.
13211755SKacheong.Poon@Sun.COM 	 * But in case there is some other M_PROTO that looks
13311755SKacheong.Poon@Sun.COM 	 * like a TPI message sent by some other kernel
13411755SKacheong.Poon@Sun.COM 	 * component, we check and return an error.
13511755SKacheong.Poon@Sun.COM 	 */
13611755SKacheong.Poon@Sun.COM 	cr = msg_getcred(mp, NULL);
13711755SKacheong.Poon@Sun.COM 	ASSERT(cr != NULL);
13811755SKacheong.Poon@Sun.COM 	if (cr == NULL)
13911755SKacheong.Poon@Sun.COM 		return (-1);
14011755SKacheong.Poon@Sun.COM 
14111755SKacheong.Poon@Sun.COM 	prim_type = ((union T_primitives *)mp->b_rptr)->type;
14211755SKacheong.Poon@Sun.COM 	ASSERT(prim_type == T_CONN_REQ || prim_type == O_T_CONN_RES ||
14311755SKacheong.Poon@Sun.COM 	    prim_type == T_CONN_RES);
14411755SKacheong.Poon@Sun.COM 
14511755SKacheong.Poon@Sun.COM 	switch (prim_type) {
14611755SKacheong.Poon@Sun.COM 	case T_CONN_REQ:
14711755SKacheong.Poon@Sun.COM 		tcreqp = (struct T_conn_req *)mp->b_rptr;
14811755SKacheong.Poon@Sun.COM 		opt_offset = tcreqp->OPT_offset;
14911755SKacheong.Poon@Sun.COM 		opt_lenp = (t_scalar_t *)&tcreqp->OPT_length;
15011755SKacheong.Poon@Sun.COM 		break;
15111755SKacheong.Poon@Sun.COM 	case O_T_CONN_RES:
15211755SKacheong.Poon@Sun.COM 	case T_CONN_RES:
15311755SKacheong.Poon@Sun.COM 		tcresp = (struct T_conn_res *)mp->b_rptr;
15411755SKacheong.Poon@Sun.COM 		opt_offset = tcresp->OPT_offset;
15511755SKacheong.Poon@Sun.COM 		opt_lenp = (t_scalar_t *)&tcresp->OPT_length;
15611755SKacheong.Poon@Sun.COM 		break;
15711755SKacheong.Poon@Sun.COM 	}
15811755SKacheong.Poon@Sun.COM 
15911755SKacheong.Poon@Sun.COM 	*t_errorp = 0;
16011755SKacheong.Poon@Sun.COM 	*sys_errorp = 0;
16111755SKacheong.Poon@Sun.COM 	*do_disconnectp = 0;
16211755SKacheong.Poon@Sun.COM 
16311755SKacheong.Poon@Sun.COM 	error = tpi_optcom_buf(tcp->tcp_connp->conn_wq, mp, opt_lenp,
16411755SKacheong.Poon@Sun.COM 	    opt_offset, cr, &tcp_opt_obj,
16511755SKacheong.Poon@Sun.COM 	    NULL, &is_absreq_failure);
16611755SKacheong.Poon@Sun.COM 
16711755SKacheong.Poon@Sun.COM 	switch (error) {
16811755SKacheong.Poon@Sun.COM 	case  0:		/* no error */
16911755SKacheong.Poon@Sun.COM 		ASSERT(is_absreq_failure == 0);
17011755SKacheong.Poon@Sun.COM 		return (0);
17111755SKacheong.Poon@Sun.COM 	case ENOPROTOOPT:
17211755SKacheong.Poon@Sun.COM 		*t_errorp = TBADOPT;
17311755SKacheong.Poon@Sun.COM 		break;
17411755SKacheong.Poon@Sun.COM 	case EACCES:
17511755SKacheong.Poon@Sun.COM 		*t_errorp = TACCES;
17611755SKacheong.Poon@Sun.COM 		break;
17711755SKacheong.Poon@Sun.COM 	default:
17811755SKacheong.Poon@Sun.COM 		*t_errorp = TSYSERR; *sys_errorp = error;
17911755SKacheong.Poon@Sun.COM 		break;
18011755SKacheong.Poon@Sun.COM 	}
18111755SKacheong.Poon@Sun.COM 	if (is_absreq_failure != 0) {
18211755SKacheong.Poon@Sun.COM 		/*
18311755SKacheong.Poon@Sun.COM 		 * The connection request should get the local ack
18411755SKacheong.Poon@Sun.COM 		 * T_OK_ACK and then a T_DISCON_IND.
18511755SKacheong.Poon@Sun.COM 		 */
18611755SKacheong.Poon@Sun.COM 		*do_disconnectp = 1;
18711755SKacheong.Poon@Sun.COM 	}
18811755SKacheong.Poon@Sun.COM 	return (-1);
18911755SKacheong.Poon@Sun.COM }
19011755SKacheong.Poon@Sun.COM 
19111755SKacheong.Poon@Sun.COM void
tcp_tpi_bind(tcp_t * tcp,mblk_t * mp)19211755SKacheong.Poon@Sun.COM tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
19311755SKacheong.Poon@Sun.COM {
19411755SKacheong.Poon@Sun.COM 	int	error;
19511755SKacheong.Poon@Sun.COM 	conn_t	*connp = tcp->tcp_connp;
19611755SKacheong.Poon@Sun.COM 	struct sockaddr	*sa;
19711755SKacheong.Poon@Sun.COM 	mblk_t  *mp1;
19811755SKacheong.Poon@Sun.COM 	struct T_bind_req *tbr;
19911755SKacheong.Poon@Sun.COM 	int	backlog;
20011755SKacheong.Poon@Sun.COM 	socklen_t	len;
20111755SKacheong.Poon@Sun.COM 	sin_t	*sin;
20211755SKacheong.Poon@Sun.COM 	sin6_t	*sin6;
20311755SKacheong.Poon@Sun.COM 	cred_t		*cr;
20411755SKacheong.Poon@Sun.COM 
20511755SKacheong.Poon@Sun.COM 	/*
20611755SKacheong.Poon@Sun.COM 	 * All Solaris components should pass a db_credp
20711755SKacheong.Poon@Sun.COM 	 * for this TPI message, hence we ASSERT.
20811755SKacheong.Poon@Sun.COM 	 * But in case there is some other M_PROTO that looks
20911755SKacheong.Poon@Sun.COM 	 * like a TPI message sent by some other kernel
21011755SKacheong.Poon@Sun.COM 	 * component, we check and return an error.
21111755SKacheong.Poon@Sun.COM 	 */
21211755SKacheong.Poon@Sun.COM 	cr = msg_getcred(mp, NULL);
21311755SKacheong.Poon@Sun.COM 	ASSERT(cr != NULL);
21411755SKacheong.Poon@Sun.COM 	if (cr == NULL) {
21511755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
21611755SKacheong.Poon@Sun.COM 		return;
21711755SKacheong.Poon@Sun.COM 	}
21811755SKacheong.Poon@Sun.COM 
21911755SKacheong.Poon@Sun.COM 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
22011755SKacheong.Poon@Sun.COM 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
22111755SKacheong.Poon@Sun.COM 		if (connp->conn_debug) {
22211755SKacheong.Poon@Sun.COM 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
22311755SKacheong.Poon@Sun.COM 			    "tcp_tpi_bind: bad req, len %u",
22411755SKacheong.Poon@Sun.COM 			    (uint_t)(mp->b_wptr - mp->b_rptr));
22511755SKacheong.Poon@Sun.COM 		}
22611755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TPROTO, 0);
22711755SKacheong.Poon@Sun.COM 		return;
22811755SKacheong.Poon@Sun.COM 	}
22911755SKacheong.Poon@Sun.COM 	/* Make sure the largest address fits */
23011755SKacheong.Poon@Sun.COM 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
23111755SKacheong.Poon@Sun.COM 	if (mp1 == NULL) {
23211755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
23311755SKacheong.Poon@Sun.COM 		return;
23411755SKacheong.Poon@Sun.COM 	}
23511755SKacheong.Poon@Sun.COM 	mp = mp1;
23611755SKacheong.Poon@Sun.COM 	tbr = (struct T_bind_req *)mp->b_rptr;
23711755SKacheong.Poon@Sun.COM 
23811755SKacheong.Poon@Sun.COM 	backlog = tbr->CONIND_number;
23911755SKacheong.Poon@Sun.COM 	len = tbr->ADDR_length;
24011755SKacheong.Poon@Sun.COM 
24111755SKacheong.Poon@Sun.COM 	switch (len) {
24211755SKacheong.Poon@Sun.COM 	case 0:		/* request for a generic port */
24311755SKacheong.Poon@Sun.COM 		tbr->ADDR_offset = sizeof (struct T_bind_req);
24411755SKacheong.Poon@Sun.COM 		if (connp->conn_family == AF_INET) {
24511755SKacheong.Poon@Sun.COM 			tbr->ADDR_length = sizeof (sin_t);
24611755SKacheong.Poon@Sun.COM 			sin = (sin_t *)&tbr[1];
24711755SKacheong.Poon@Sun.COM 			*sin = sin_null;
24811755SKacheong.Poon@Sun.COM 			sin->sin_family = AF_INET;
24911755SKacheong.Poon@Sun.COM 			sa = (struct sockaddr *)sin;
25011755SKacheong.Poon@Sun.COM 			len = sizeof (sin_t);
25111755SKacheong.Poon@Sun.COM 			mp->b_wptr = (uchar_t *)&sin[1];
25211755SKacheong.Poon@Sun.COM 		} else {
25311755SKacheong.Poon@Sun.COM 			ASSERT(connp->conn_family == AF_INET6);
25411755SKacheong.Poon@Sun.COM 			tbr->ADDR_length = sizeof (sin6_t);
25511755SKacheong.Poon@Sun.COM 			sin6 = (sin6_t *)&tbr[1];
25611755SKacheong.Poon@Sun.COM 			*sin6 = sin6_null;
25711755SKacheong.Poon@Sun.COM 			sin6->sin6_family = AF_INET6;
25811755SKacheong.Poon@Sun.COM 			sa = (struct sockaddr *)sin6;
25911755SKacheong.Poon@Sun.COM 			len = sizeof (sin6_t);
26011755SKacheong.Poon@Sun.COM 			mp->b_wptr = (uchar_t *)&sin6[1];
26111755SKacheong.Poon@Sun.COM 		}
26211755SKacheong.Poon@Sun.COM 		break;
26311755SKacheong.Poon@Sun.COM 
26411755SKacheong.Poon@Sun.COM 	case sizeof (sin_t):    /* Complete IPv4 address */
26511755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
26611755SKacheong.Poon@Sun.COM 		    sizeof (sin_t));
26711755SKacheong.Poon@Sun.COM 		break;
26811755SKacheong.Poon@Sun.COM 
26911755SKacheong.Poon@Sun.COM 	case sizeof (sin6_t): /* Complete IPv6 address */
27011755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp,
27111755SKacheong.Poon@Sun.COM 		    tbr->ADDR_offset, sizeof (sin6_t));
27211755SKacheong.Poon@Sun.COM 		break;
27311755SKacheong.Poon@Sun.COM 
27411755SKacheong.Poon@Sun.COM 	default:
27511755SKacheong.Poon@Sun.COM 		if (connp->conn_debug) {
27611755SKacheong.Poon@Sun.COM 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
27711755SKacheong.Poon@Sun.COM 			    "tcp_tpi_bind: bad address length, %d",
27811755SKacheong.Poon@Sun.COM 			    tbr->ADDR_length);
27911755SKacheong.Poon@Sun.COM 		}
28011755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TBADADDR, 0);
28111755SKacheong.Poon@Sun.COM 		return;
28211755SKacheong.Poon@Sun.COM 	}
28311755SKacheong.Poon@Sun.COM 
28411755SKacheong.Poon@Sun.COM 	if (backlog > 0) {
28511755SKacheong.Poon@Sun.COM 		error = tcp_do_listen(connp, sa, len, backlog, DB_CRED(mp),
28611755SKacheong.Poon@Sun.COM 		    tbr->PRIM_type != O_T_BIND_REQ);
28711755SKacheong.Poon@Sun.COM 	} else {
28811755SKacheong.Poon@Sun.COM 		error = tcp_do_bind(connp, sa, len, DB_CRED(mp),
28911755SKacheong.Poon@Sun.COM 		    tbr->PRIM_type != O_T_BIND_REQ);
29011755SKacheong.Poon@Sun.COM 	}
29111755SKacheong.Poon@Sun.COM done:
29211755SKacheong.Poon@Sun.COM 	if (error > 0) {
29311755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, error);
29411755SKacheong.Poon@Sun.COM 	} else if (error < 0) {
29511755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, -error, 0);
29611755SKacheong.Poon@Sun.COM 	} else {
29711755SKacheong.Poon@Sun.COM 		/*
29811755SKacheong.Poon@Sun.COM 		 * Update port information as sockfs/tpi needs it for checking
29911755SKacheong.Poon@Sun.COM 		 */
30011755SKacheong.Poon@Sun.COM 		if (connp->conn_family == AF_INET) {
30111755SKacheong.Poon@Sun.COM 			sin = (sin_t *)sa;
30211755SKacheong.Poon@Sun.COM 			sin->sin_port = connp->conn_lport;
30311755SKacheong.Poon@Sun.COM 		} else {
30411755SKacheong.Poon@Sun.COM 			sin6 = (sin6_t *)sa;
30511755SKacheong.Poon@Sun.COM 			sin6->sin6_port = connp->conn_lport;
30611755SKacheong.Poon@Sun.COM 		}
30711755SKacheong.Poon@Sun.COM 		mp->b_datap->db_type = M_PCPROTO;
30811755SKacheong.Poon@Sun.COM 		tbr->PRIM_type = T_BIND_ACK;
30911755SKacheong.Poon@Sun.COM 		putnext(connp->conn_rq, mp);
31011755SKacheong.Poon@Sun.COM 	}
31111755SKacheong.Poon@Sun.COM }
31211755SKacheong.Poon@Sun.COM 
31311755SKacheong.Poon@Sun.COM /* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
31411755SKacheong.Poon@Sun.COM void
tcp_tpi_unbind(tcp_t * tcp,mblk_t * mp)31511755SKacheong.Poon@Sun.COM tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp)
31611755SKacheong.Poon@Sun.COM {
31711755SKacheong.Poon@Sun.COM 	conn_t *connp = tcp->tcp_connp;
31811755SKacheong.Poon@Sun.COM 	int error;
31911755SKacheong.Poon@Sun.COM 
32011755SKacheong.Poon@Sun.COM 	error = tcp_do_unbind(connp);
32111755SKacheong.Poon@Sun.COM 	if (error > 0) {
32211755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, error);
32311755SKacheong.Poon@Sun.COM 	} else if (error < 0) {
32411755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, -error, 0);
32511755SKacheong.Poon@Sun.COM 	} else {
32611755SKacheong.Poon@Sun.COM 		/* Send M_FLUSH according to TPI */
32711755SKacheong.Poon@Sun.COM 		(void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);
32811755SKacheong.Poon@Sun.COM 
32911755SKacheong.Poon@Sun.COM 		mp = mi_tpi_ok_ack_alloc(mp);
33011755SKacheong.Poon@Sun.COM 		if (mp != NULL)
33111755SKacheong.Poon@Sun.COM 			putnext(connp->conn_rq, mp);
33211755SKacheong.Poon@Sun.COM 	}
33311755SKacheong.Poon@Sun.COM }
33411755SKacheong.Poon@Sun.COM 
33511755SKacheong.Poon@Sun.COM int
tcp_tpi_close(queue_t * q,int flags)33611755SKacheong.Poon@Sun.COM tcp_tpi_close(queue_t *q, int flags)
33711755SKacheong.Poon@Sun.COM {
33811755SKacheong.Poon@Sun.COM 	conn_t		*connp;
33911755SKacheong.Poon@Sun.COM 
34011755SKacheong.Poon@Sun.COM 	ASSERT(WR(q)->q_next == NULL);
34111755SKacheong.Poon@Sun.COM 
34211755SKacheong.Poon@Sun.COM 	if (flags & SO_FALLBACK) {
34311755SKacheong.Poon@Sun.COM 		/*
34411755SKacheong.Poon@Sun.COM 		 * stream is being closed while in fallback
34511755SKacheong.Poon@Sun.COM 		 * simply free the resources that were allocated
34611755SKacheong.Poon@Sun.COM 		 */
34711755SKacheong.Poon@Sun.COM 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
34811755SKacheong.Poon@Sun.COM 		qprocsoff(q);
34911755SKacheong.Poon@Sun.COM 		goto done;
35011755SKacheong.Poon@Sun.COM 	}
35111755SKacheong.Poon@Sun.COM 
35211755SKacheong.Poon@Sun.COM 	connp = Q_TO_CONN(q);
35311755SKacheong.Poon@Sun.COM 	/*
35411755SKacheong.Poon@Sun.COM 	 * We are being closed as /dev/tcp or /dev/tcp6.
35511755SKacheong.Poon@Sun.COM 	 */
35611755SKacheong.Poon@Sun.COM 	tcp_close_common(connp, flags);
35711755SKacheong.Poon@Sun.COM 
35811755SKacheong.Poon@Sun.COM 	qprocsoff(q);
35911755SKacheong.Poon@Sun.COM 	inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
36011755SKacheong.Poon@Sun.COM 
36111755SKacheong.Poon@Sun.COM 	/*
36211755SKacheong.Poon@Sun.COM 	 * Drop IP's reference on the conn. This is the last reference
36311755SKacheong.Poon@Sun.COM 	 * on the connp if the state was less than established. If the
36411755SKacheong.Poon@Sun.COM 	 * connection has gone into timewait state, then we will have
36511755SKacheong.Poon@Sun.COM 	 * one ref for the TCP and one more ref (total of two) for the
36611755SKacheong.Poon@Sun.COM 	 * classifier connected hash list (a timewait connections stays
36711755SKacheong.Poon@Sun.COM 	 * in connected hash till closed).
36811755SKacheong.Poon@Sun.COM 	 *
36911755SKacheong.Poon@Sun.COM 	 * We can't assert the references because there might be other
37011755SKacheong.Poon@Sun.COM 	 * transient reference places because of some walkers or queued
37111755SKacheong.Poon@Sun.COM 	 * packets in squeue for the timewait state.
37211755SKacheong.Poon@Sun.COM 	 */
37311755SKacheong.Poon@Sun.COM 	CONN_DEC_REF(connp);
37411755SKacheong.Poon@Sun.COM done:
37511755SKacheong.Poon@Sun.COM 	q->q_ptr = WR(q)->q_ptr = NULL;
37611755SKacheong.Poon@Sun.COM 	return (0);
37711755SKacheong.Poon@Sun.COM }
37811755SKacheong.Poon@Sun.COM 
37911755SKacheong.Poon@Sun.COM int
tcp_tpi_close_accept(queue_t * q)38011755SKacheong.Poon@Sun.COM tcp_tpi_close_accept(queue_t *q)
38111755SKacheong.Poon@Sun.COM {
38211755SKacheong.Poon@Sun.COM 	vmem_t	*minor_arena;
38311755SKacheong.Poon@Sun.COM 	dev_t	conn_dev;
38411755SKacheong.Poon@Sun.COM 	extern struct qinit tcp_acceptor_winit;
38511755SKacheong.Poon@Sun.COM 
38611755SKacheong.Poon@Sun.COM 	ASSERT(WR(q)->q_qinfo == &tcp_acceptor_winit);
38711755SKacheong.Poon@Sun.COM 
38811755SKacheong.Poon@Sun.COM 	/*
38911755SKacheong.Poon@Sun.COM 	 * We had opened an acceptor STREAM for sockfs which is
39011755SKacheong.Poon@Sun.COM 	 * now being closed due to some error.
39111755SKacheong.Poon@Sun.COM 	 */
39211755SKacheong.Poon@Sun.COM 	qprocsoff(q);
39311755SKacheong.Poon@Sun.COM 
39411755SKacheong.Poon@Sun.COM 	minor_arena = (vmem_t *)WR(q)->q_ptr;
39511755SKacheong.Poon@Sun.COM 	conn_dev = (dev_t)RD(q)->q_ptr;
39611755SKacheong.Poon@Sun.COM 	ASSERT(minor_arena != NULL);
39711755SKacheong.Poon@Sun.COM 	ASSERT(conn_dev != 0);
39811755SKacheong.Poon@Sun.COM 	inet_minor_free(minor_arena, conn_dev);
39911755SKacheong.Poon@Sun.COM 	q->q_ptr = WR(q)->q_ptr = NULL;
40011755SKacheong.Poon@Sun.COM 	return (0);
40111755SKacheong.Poon@Sun.COM }
40211755SKacheong.Poon@Sun.COM 
40311755SKacheong.Poon@Sun.COM /*
40411755SKacheong.Poon@Sun.COM  * Put a connection confirmation message upstream built from the
40511755SKacheong.Poon@Sun.COM  * address/flowid information with the conn and iph. Report our success or
40611755SKacheong.Poon@Sun.COM  * failure.
40711755SKacheong.Poon@Sun.COM  */
40811755SKacheong.Poon@Sun.COM boolean_t
tcp_conn_con(tcp_t * tcp,uchar_t * iphdr,mblk_t * idmp,mblk_t ** defermp,ip_recv_attr_t * ira)40911755SKacheong.Poon@Sun.COM tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, mblk_t *idmp,
41011755SKacheong.Poon@Sun.COM     mblk_t **defermp, ip_recv_attr_t *ira)
41111755SKacheong.Poon@Sun.COM {
41211755SKacheong.Poon@Sun.COM 	sin_t	sin;
41311755SKacheong.Poon@Sun.COM 	sin6_t	sin6;
41411755SKacheong.Poon@Sun.COM 	mblk_t	*mp;
41511755SKacheong.Poon@Sun.COM 	char	*optp = NULL;
41611755SKacheong.Poon@Sun.COM 	int	optlen = 0;
41711755SKacheong.Poon@Sun.COM 	conn_t	*connp = tcp->tcp_connp;
41811755SKacheong.Poon@Sun.COM 
41911755SKacheong.Poon@Sun.COM 	if (defermp != NULL)
42011755SKacheong.Poon@Sun.COM 		*defermp = NULL;
42111755SKacheong.Poon@Sun.COM 
42211755SKacheong.Poon@Sun.COM 	if (tcp->tcp_conn.tcp_opts_conn_req != NULL) {
42311755SKacheong.Poon@Sun.COM 		/*
42411755SKacheong.Poon@Sun.COM 		 * Return in T_CONN_CON results of option negotiation through
42511755SKacheong.Poon@Sun.COM 		 * the T_CONN_REQ. Note: If there is an real end-to-end option
42611755SKacheong.Poon@Sun.COM 		 * negotiation, then what is received from remote end needs
42711755SKacheong.Poon@Sun.COM 		 * to be taken into account but there is no such thing (yet?)
42811755SKacheong.Poon@Sun.COM 		 * in our TCP/IP.
42911755SKacheong.Poon@Sun.COM 		 * Note: We do not use mi_offset_param() here as
43011755SKacheong.Poon@Sun.COM 		 * tcp_opts_conn_req contents do not directly come from
43111755SKacheong.Poon@Sun.COM 		 * an application and are either generated in kernel or
43211755SKacheong.Poon@Sun.COM 		 * from user input that was already verified.
43311755SKacheong.Poon@Sun.COM 		 */
43411755SKacheong.Poon@Sun.COM 		mp = tcp->tcp_conn.tcp_opts_conn_req;
43511755SKacheong.Poon@Sun.COM 		optp = (char *)(mp->b_rptr +
43611755SKacheong.Poon@Sun.COM 		    ((struct T_conn_req *)mp->b_rptr)->OPT_offset);
43711755SKacheong.Poon@Sun.COM 		optlen = (int)
43811755SKacheong.Poon@Sun.COM 		    ((struct T_conn_req *)mp->b_rptr)->OPT_length;
43911755SKacheong.Poon@Sun.COM 	}
44011755SKacheong.Poon@Sun.COM 
44111755SKacheong.Poon@Sun.COM 	if (IPH_HDR_VERSION(iphdr) == IPV4_VERSION) {
44211755SKacheong.Poon@Sun.COM 
44311755SKacheong.Poon@Sun.COM 		/* packet is IPv4 */
44411755SKacheong.Poon@Sun.COM 		if (connp->conn_family == AF_INET) {
44511755SKacheong.Poon@Sun.COM 			sin = sin_null;
44611755SKacheong.Poon@Sun.COM 			sin.sin_addr.s_addr = connp->conn_faddr_v4;
44711755SKacheong.Poon@Sun.COM 			sin.sin_port = connp->conn_fport;
44811755SKacheong.Poon@Sun.COM 			sin.sin_family = AF_INET;
44911755SKacheong.Poon@Sun.COM 			mp = mi_tpi_conn_con(NULL, (char *)&sin,
45011755SKacheong.Poon@Sun.COM 			    (int)sizeof (sin_t), optp, optlen);
45111755SKacheong.Poon@Sun.COM 		} else {
45211755SKacheong.Poon@Sun.COM 			sin6 = sin6_null;
45311755SKacheong.Poon@Sun.COM 			sin6.sin6_addr = connp->conn_faddr_v6;
45411755SKacheong.Poon@Sun.COM 			sin6.sin6_port = connp->conn_fport;
45511755SKacheong.Poon@Sun.COM 			sin6.sin6_family = AF_INET6;
45611755SKacheong.Poon@Sun.COM 			mp = mi_tpi_conn_con(NULL, (char *)&sin6,
45711755SKacheong.Poon@Sun.COM 			    (int)sizeof (sin6_t), optp, optlen);
45811755SKacheong.Poon@Sun.COM 
45911755SKacheong.Poon@Sun.COM 		}
46011755SKacheong.Poon@Sun.COM 	} else {
46111755SKacheong.Poon@Sun.COM 		ip6_t	*ip6h = (ip6_t *)iphdr;
46211755SKacheong.Poon@Sun.COM 
46311755SKacheong.Poon@Sun.COM 		ASSERT(IPH_HDR_VERSION(iphdr) == IPV6_VERSION);
46411755SKacheong.Poon@Sun.COM 		ASSERT(connp->conn_family == AF_INET6);
46511755SKacheong.Poon@Sun.COM 		sin6 = sin6_null;
46611755SKacheong.Poon@Sun.COM 		sin6.sin6_addr = connp->conn_faddr_v6;
46711755SKacheong.Poon@Sun.COM 		sin6.sin6_port = connp->conn_fport;
46811755SKacheong.Poon@Sun.COM 		sin6.sin6_family = AF_INET6;
46911755SKacheong.Poon@Sun.COM 		sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
47011755SKacheong.Poon@Sun.COM 		mp = mi_tpi_conn_con(NULL, (char *)&sin6,
47111755SKacheong.Poon@Sun.COM 		    (int)sizeof (sin6_t), optp, optlen);
47211755SKacheong.Poon@Sun.COM 	}
47311755SKacheong.Poon@Sun.COM 
47411755SKacheong.Poon@Sun.COM 	if (!mp)
47511755SKacheong.Poon@Sun.COM 		return (B_FALSE);
47611755SKacheong.Poon@Sun.COM 
47711755SKacheong.Poon@Sun.COM 	mblk_copycred(mp, idmp);
47811755SKacheong.Poon@Sun.COM 
47911755SKacheong.Poon@Sun.COM 	if (defermp == NULL) {
48011755SKacheong.Poon@Sun.COM 		conn_t *connp = tcp->tcp_connp;
48111755SKacheong.Poon@Sun.COM 		if (IPCL_IS_NONSTR(connp)) {
48211755SKacheong.Poon@Sun.COM 			(*connp->conn_upcalls->su_connected)
48311755SKacheong.Poon@Sun.COM 			    (connp->conn_upper_handle, tcp->tcp_connid,
48411755SKacheong.Poon@Sun.COM 			    ira->ira_cred, ira->ira_cpid);
48511755SKacheong.Poon@Sun.COM 			freemsg(mp);
48611755SKacheong.Poon@Sun.COM 		} else {
48711755SKacheong.Poon@Sun.COM 			if (ira->ira_cred != NULL) {
48811755SKacheong.Poon@Sun.COM 				/* So that getpeerucred works for TPI sockfs */
48911755SKacheong.Poon@Sun.COM 				mblk_setcred(mp, ira->ira_cred, ira->ira_cpid);
49011755SKacheong.Poon@Sun.COM 			}
49111755SKacheong.Poon@Sun.COM 			putnext(connp->conn_rq, mp);
49211755SKacheong.Poon@Sun.COM 		}
49311755SKacheong.Poon@Sun.COM 	} else {
49411755SKacheong.Poon@Sun.COM 		*defermp = mp;
49511755SKacheong.Poon@Sun.COM 	}
49611755SKacheong.Poon@Sun.COM 
49711755SKacheong.Poon@Sun.COM 	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
49811755SKacheong.Poon@Sun.COM 		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
49911755SKacheong.Poon@Sun.COM 	return (B_TRUE);
50011755SKacheong.Poon@Sun.COM }
50111755SKacheong.Poon@Sun.COM 
50211755SKacheong.Poon@Sun.COM /*
50311755SKacheong.Poon@Sun.COM  * Successful connect request processing begins when our client passes
50411755SKacheong.Poon@Sun.COM  * a T_CONN_REQ message into tcp_wput(), which performs function calls into
50511755SKacheong.Poon@Sun.COM  * IP and the passes a T_OK_ACK (or T_ERROR_ACK upstream).
50611755SKacheong.Poon@Sun.COM  *
50711755SKacheong.Poon@Sun.COM  * After various error checks are completed, tcp_tpi_connect() lays
50811755SKacheong.Poon@Sun.COM  * the target address and port into the composite header template.
50911755SKacheong.Poon@Sun.COM  * Then we ask IP for information, including a source address if we didn't
51011755SKacheong.Poon@Sun.COM  * already have one. Finally we prepare to send the SYN packet, and then
51111755SKacheong.Poon@Sun.COM  * send up the T_OK_ACK reply message.
51211755SKacheong.Poon@Sun.COM  */
51311755SKacheong.Poon@Sun.COM void
tcp_tpi_connect(tcp_t * tcp,mblk_t * mp)51411755SKacheong.Poon@Sun.COM tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
51511755SKacheong.Poon@Sun.COM {
51611755SKacheong.Poon@Sun.COM 	sin_t		*sin;
51711755SKacheong.Poon@Sun.COM 	struct T_conn_req	*tcr;
51811755SKacheong.Poon@Sun.COM 	struct sockaddr	*sa;
51911755SKacheong.Poon@Sun.COM 	socklen_t	len;
52011755SKacheong.Poon@Sun.COM 	int		error;
52111755SKacheong.Poon@Sun.COM 	cred_t		*cr;
52211755SKacheong.Poon@Sun.COM 	pid_t		cpid;
52311755SKacheong.Poon@Sun.COM 	conn_t		*connp = tcp->tcp_connp;
52411755SKacheong.Poon@Sun.COM 	queue_t		*q = connp->conn_wq;
52511755SKacheong.Poon@Sun.COM 
52611755SKacheong.Poon@Sun.COM 	/*
52711755SKacheong.Poon@Sun.COM 	 * All Solaris components should pass a db_credp
52811755SKacheong.Poon@Sun.COM 	 * for this TPI message, hence we ASSERT.
52911755SKacheong.Poon@Sun.COM 	 * But in case there is some other M_PROTO that looks
53011755SKacheong.Poon@Sun.COM 	 * like a TPI message sent by some other kernel
53111755SKacheong.Poon@Sun.COM 	 * component, we check and return an error.
53211755SKacheong.Poon@Sun.COM 	 */
53311755SKacheong.Poon@Sun.COM 	cr = msg_getcred(mp, &cpid);
53411755SKacheong.Poon@Sun.COM 	ASSERT(cr != NULL);
53511755SKacheong.Poon@Sun.COM 	if (cr == NULL) {
53611755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
53711755SKacheong.Poon@Sun.COM 		return;
53811755SKacheong.Poon@Sun.COM 	}
53911755SKacheong.Poon@Sun.COM 
54011755SKacheong.Poon@Sun.COM 	tcr = (struct T_conn_req *)mp->b_rptr;
54111755SKacheong.Poon@Sun.COM 
54211755SKacheong.Poon@Sun.COM 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
54311755SKacheong.Poon@Sun.COM 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
54411755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TPROTO, 0);
54511755SKacheong.Poon@Sun.COM 		return;
54611755SKacheong.Poon@Sun.COM 	}
54711755SKacheong.Poon@Sun.COM 
54811755SKacheong.Poon@Sun.COM 	/*
54911755SKacheong.Poon@Sun.COM 	 * Pre-allocate the T_ordrel_ind mblk so that at close time, we
55011755SKacheong.Poon@Sun.COM 	 * will always have that to send up.  Otherwise, we need to do
55111755SKacheong.Poon@Sun.COM 	 * special handling in case the allocation fails at that time.
55211755SKacheong.Poon@Sun.COM 	 * If the end point is TPI, the tcp_t can be reused and the
55311755SKacheong.Poon@Sun.COM 	 * tcp_ordrel_mp may be allocated already.
55411755SKacheong.Poon@Sun.COM 	 */
55511755SKacheong.Poon@Sun.COM 	if (tcp->tcp_ordrel_mp == NULL) {
55611755SKacheong.Poon@Sun.COM 		if ((tcp->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) {
55711755SKacheong.Poon@Sun.COM 			tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
55811755SKacheong.Poon@Sun.COM 			return;
55911755SKacheong.Poon@Sun.COM 		}
56011755SKacheong.Poon@Sun.COM 	}
56111755SKacheong.Poon@Sun.COM 
56211755SKacheong.Poon@Sun.COM 	/*
56311755SKacheong.Poon@Sun.COM 	 * Determine packet type based on type of address passed in
56411755SKacheong.Poon@Sun.COM 	 * the request should contain an IPv4 or IPv6 address.
56511755SKacheong.Poon@Sun.COM 	 * Make sure that address family matches the type of
56611755SKacheong.Poon@Sun.COM 	 * family of the address passed down.
56711755SKacheong.Poon@Sun.COM 	 */
56811755SKacheong.Poon@Sun.COM 	switch (tcr->DEST_length) {
56911755SKacheong.Poon@Sun.COM 	default:
57011755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TBADADDR, 0);
57111755SKacheong.Poon@Sun.COM 		return;
57211755SKacheong.Poon@Sun.COM 
57311755SKacheong.Poon@Sun.COM 	case (sizeof (sin_t) - sizeof (sin->sin_zero)): {
57411755SKacheong.Poon@Sun.COM 		/*
57511755SKacheong.Poon@Sun.COM 		 * XXX: The check for valid DEST_length was not there
57611755SKacheong.Poon@Sun.COM 		 * in earlier releases and some buggy
57711755SKacheong.Poon@Sun.COM 		 * TLI apps (e.g Sybase) got away with not feeding
57811755SKacheong.Poon@Sun.COM 		 * in sin_zero part of address.
57911755SKacheong.Poon@Sun.COM 		 * We allow that bug to keep those buggy apps humming.
58011755SKacheong.Poon@Sun.COM 		 * Test suites require the check on DEST_length.
58111755SKacheong.Poon@Sun.COM 		 * We construct a new mblk with valid DEST_length
58211755SKacheong.Poon@Sun.COM 		 * free the original so the rest of the code does
58311755SKacheong.Poon@Sun.COM 		 * not have to keep track of this special shorter
58411755SKacheong.Poon@Sun.COM 		 * length address case.
58511755SKacheong.Poon@Sun.COM 		 */
58611755SKacheong.Poon@Sun.COM 		mblk_t *nmp;
58711755SKacheong.Poon@Sun.COM 		struct T_conn_req *ntcr;
58811755SKacheong.Poon@Sun.COM 		sin_t *nsin;
58911755SKacheong.Poon@Sun.COM 
59011755SKacheong.Poon@Sun.COM 		nmp = allocb(sizeof (struct T_conn_req) + sizeof (sin_t) +
59111755SKacheong.Poon@Sun.COM 		    tcr->OPT_length, BPRI_HI);
59211755SKacheong.Poon@Sun.COM 		if (nmp == NULL) {
59311755SKacheong.Poon@Sun.COM 			tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
59411755SKacheong.Poon@Sun.COM 			return;
59511755SKacheong.Poon@Sun.COM 		}
59611755SKacheong.Poon@Sun.COM 		ntcr = (struct T_conn_req *)nmp->b_rptr;
59711755SKacheong.Poon@Sun.COM 		bzero(ntcr, sizeof (struct T_conn_req)); /* zero fill */
59811755SKacheong.Poon@Sun.COM 		ntcr->PRIM_type = T_CONN_REQ;
59911755SKacheong.Poon@Sun.COM 		ntcr->DEST_length = sizeof (sin_t);
60011755SKacheong.Poon@Sun.COM 		ntcr->DEST_offset = sizeof (struct T_conn_req);
60111755SKacheong.Poon@Sun.COM 
60211755SKacheong.Poon@Sun.COM 		nsin = (sin_t *)((uchar_t *)ntcr + ntcr->DEST_offset);
60311755SKacheong.Poon@Sun.COM 		*nsin = sin_null;
60411755SKacheong.Poon@Sun.COM 		/* Get pointer to shorter address to copy from original mp */
60511755SKacheong.Poon@Sun.COM 		sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
60611755SKacheong.Poon@Sun.COM 		    tcr->DEST_length); /* extract DEST_length worth of sin_t */
60711755SKacheong.Poon@Sun.COM 		if (sin == NULL || !OK_32PTR((char *)sin)) {
60811755SKacheong.Poon@Sun.COM 			freemsg(nmp);
60911755SKacheong.Poon@Sun.COM 			tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
61011755SKacheong.Poon@Sun.COM 			return;
61111755SKacheong.Poon@Sun.COM 		}
61211755SKacheong.Poon@Sun.COM 		nsin->sin_family = sin->sin_family;
61311755SKacheong.Poon@Sun.COM 		nsin->sin_port = sin->sin_port;
61411755SKacheong.Poon@Sun.COM 		nsin->sin_addr = sin->sin_addr;
61511755SKacheong.Poon@Sun.COM 		/* Note:nsin->sin_zero zero-fill with sin_null assign above */
61611755SKacheong.Poon@Sun.COM 		nmp->b_wptr = (uchar_t *)&nsin[1];
61711755SKacheong.Poon@Sun.COM 		if (tcr->OPT_length != 0) {
61811755SKacheong.Poon@Sun.COM 			ntcr->OPT_length = tcr->OPT_length;
61911755SKacheong.Poon@Sun.COM 			ntcr->OPT_offset = nmp->b_wptr - nmp->b_rptr;
62011755SKacheong.Poon@Sun.COM 			bcopy((uchar_t *)tcr + tcr->OPT_offset,
62111755SKacheong.Poon@Sun.COM 			    (uchar_t *)ntcr + ntcr->OPT_offset,
62211755SKacheong.Poon@Sun.COM 			    tcr->OPT_length);
62311755SKacheong.Poon@Sun.COM 			nmp->b_wptr += tcr->OPT_length;
62411755SKacheong.Poon@Sun.COM 		}
62511755SKacheong.Poon@Sun.COM 		freemsg(mp);	/* original mp freed */
62611755SKacheong.Poon@Sun.COM 		mp = nmp;	/* re-initialize original variables */
62711755SKacheong.Poon@Sun.COM 		tcr = ntcr;
62811755SKacheong.Poon@Sun.COM 	}
62911755SKacheong.Poon@Sun.COM 	/* FALLTHRU */
63011755SKacheong.Poon@Sun.COM 
63111755SKacheong.Poon@Sun.COM 	case sizeof (sin_t):
63211755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
63311755SKacheong.Poon@Sun.COM 		    sizeof (sin_t));
63411755SKacheong.Poon@Sun.COM 		len = sizeof (sin_t);
63511755SKacheong.Poon@Sun.COM 		break;
63611755SKacheong.Poon@Sun.COM 
63711755SKacheong.Poon@Sun.COM 	case sizeof (sin6_t):
63811755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
63911755SKacheong.Poon@Sun.COM 		    sizeof (sin6_t));
64011755SKacheong.Poon@Sun.COM 		len = sizeof (sin6_t);
64111755SKacheong.Poon@Sun.COM 		break;
64211755SKacheong.Poon@Sun.COM 	}
64311755SKacheong.Poon@Sun.COM 
64411755SKacheong.Poon@Sun.COM 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
64511755SKacheong.Poon@Sun.COM 	if (error != 0) {
64611755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, error);
64711755SKacheong.Poon@Sun.COM 		return;
64811755SKacheong.Poon@Sun.COM 	}
64911755SKacheong.Poon@Sun.COM 
65011755SKacheong.Poon@Sun.COM 	/*
65111755SKacheong.Poon@Sun.COM 	 * TODO: If someone in TCPS_TIME_WAIT has this dst/port we
65211755SKacheong.Poon@Sun.COM 	 * should key on their sequence number and cut them loose.
65311755SKacheong.Poon@Sun.COM 	 */
65411755SKacheong.Poon@Sun.COM 
65511755SKacheong.Poon@Sun.COM 	/*
65611755SKacheong.Poon@Sun.COM 	 * If options passed in, feed it for verification and handling
65711755SKacheong.Poon@Sun.COM 	 */
65811755SKacheong.Poon@Sun.COM 	if (tcr->OPT_length != 0) {
65911755SKacheong.Poon@Sun.COM 		mblk_t	*ok_mp;
66011755SKacheong.Poon@Sun.COM 		mblk_t	*discon_mp;
66111755SKacheong.Poon@Sun.COM 		mblk_t  *conn_opts_mp;
66211755SKacheong.Poon@Sun.COM 		int t_error, sys_error, do_disconnect;
66311755SKacheong.Poon@Sun.COM 
66411755SKacheong.Poon@Sun.COM 		conn_opts_mp = NULL;
66511755SKacheong.Poon@Sun.COM 
66611755SKacheong.Poon@Sun.COM 		if (tcp_conprim_opt_process(tcp, mp,
66711755SKacheong.Poon@Sun.COM 		    &do_disconnect, &t_error, &sys_error) < 0) {
66811755SKacheong.Poon@Sun.COM 			if (do_disconnect) {
66911755SKacheong.Poon@Sun.COM 				ASSERT(t_error == 0 && sys_error == 0);
67011755SKacheong.Poon@Sun.COM 				discon_mp = mi_tpi_discon_ind(NULL,
67111755SKacheong.Poon@Sun.COM 				    ECONNREFUSED, 0);
67211755SKacheong.Poon@Sun.COM 				if (!discon_mp) {
67311755SKacheong.Poon@Sun.COM 					tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
67411755SKacheong.Poon@Sun.COM 					    TSYSERR, ENOMEM);
67511755SKacheong.Poon@Sun.COM 					return;
67611755SKacheong.Poon@Sun.COM 				}
67711755SKacheong.Poon@Sun.COM 				ok_mp = mi_tpi_ok_ack_alloc(mp);
67811755SKacheong.Poon@Sun.COM 				if (!ok_mp) {
67911755SKacheong.Poon@Sun.COM 					tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
68011755SKacheong.Poon@Sun.COM 					    TSYSERR, ENOMEM);
68111755SKacheong.Poon@Sun.COM 					return;
68211755SKacheong.Poon@Sun.COM 				}
68311755SKacheong.Poon@Sun.COM 				qreply(q, ok_mp);
68411755SKacheong.Poon@Sun.COM 				qreply(q, discon_mp); /* no flush! */
68511755SKacheong.Poon@Sun.COM 			} else {
68611755SKacheong.Poon@Sun.COM 				ASSERT(t_error != 0);
68711755SKacheong.Poon@Sun.COM 				tcp_err_ack_prim(tcp, mp, T_CONN_REQ, t_error,
68811755SKacheong.Poon@Sun.COM 				    sys_error);
68911755SKacheong.Poon@Sun.COM 			}
69011755SKacheong.Poon@Sun.COM 			return;
69111755SKacheong.Poon@Sun.COM 		}
69211755SKacheong.Poon@Sun.COM 		/*
69311755SKacheong.Poon@Sun.COM 		 * Success in setting options, the mp option buffer represented
69411755SKacheong.Poon@Sun.COM 		 * by OPT_length/offset has been potentially modified and
69511755SKacheong.Poon@Sun.COM 		 * contains results of option processing. We copy it in
69611755SKacheong.Poon@Sun.COM 		 * another mp to save it for potentially influencing returning
69711755SKacheong.Poon@Sun.COM 		 * it in T_CONN_CONN.
69811755SKacheong.Poon@Sun.COM 		 */
69911755SKacheong.Poon@Sun.COM 		if (tcr->OPT_length != 0) { /* there are resulting options */
70011755SKacheong.Poon@Sun.COM 			conn_opts_mp = copyb(mp);
70111755SKacheong.Poon@Sun.COM 			if (!conn_opts_mp) {
70211755SKacheong.Poon@Sun.COM 				tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
70311755SKacheong.Poon@Sun.COM 				    TSYSERR, ENOMEM);
70411755SKacheong.Poon@Sun.COM 				return;
70511755SKacheong.Poon@Sun.COM 			}
70611755SKacheong.Poon@Sun.COM 			ASSERT(tcp->tcp_conn.tcp_opts_conn_req == NULL);
70711755SKacheong.Poon@Sun.COM 			tcp->tcp_conn.tcp_opts_conn_req = conn_opts_mp;
70811755SKacheong.Poon@Sun.COM 			/*
70911755SKacheong.Poon@Sun.COM 			 * Note:
71011755SKacheong.Poon@Sun.COM 			 * These resulting option negotiation can include any
71111755SKacheong.Poon@Sun.COM 			 * end-to-end negotiation options but there no such
71211755SKacheong.Poon@Sun.COM 			 * thing (yet?) in our TCP/IP.
71311755SKacheong.Poon@Sun.COM 			 */
71411755SKacheong.Poon@Sun.COM 		}
71511755SKacheong.Poon@Sun.COM 	}
71611755SKacheong.Poon@Sun.COM 
71711755SKacheong.Poon@Sun.COM 	/* call the non-TPI version */
71811755SKacheong.Poon@Sun.COM 	error = tcp_do_connect(tcp->tcp_connp, sa, len, cr, cpid);
71911755SKacheong.Poon@Sun.COM 	if (error < 0) {
72011755SKacheong.Poon@Sun.COM 		mp = mi_tpi_err_ack_alloc(mp, -error, 0);
72111755SKacheong.Poon@Sun.COM 	} else if (error > 0) {
72211755SKacheong.Poon@Sun.COM 		mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
72311755SKacheong.Poon@Sun.COM 	} else {
72411755SKacheong.Poon@Sun.COM 		mp = mi_tpi_ok_ack_alloc(mp);
72511755SKacheong.Poon@Sun.COM 	}
72611755SKacheong.Poon@Sun.COM 
72711755SKacheong.Poon@Sun.COM 	/*
72811755SKacheong.Poon@Sun.COM 	 * Note: Code below is the "failure" case
72911755SKacheong.Poon@Sun.COM 	 */
73011755SKacheong.Poon@Sun.COM 	/* return error ack and blow away saved option results if any */
73111755SKacheong.Poon@Sun.COM connect_failed:
73211755SKacheong.Poon@Sun.COM 	if (mp != NULL)
73311755SKacheong.Poon@Sun.COM 		putnext(connp->conn_rq, mp);
73411755SKacheong.Poon@Sun.COM 	else {
73511755SKacheong.Poon@Sun.COM 		tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
73611755SKacheong.Poon@Sun.COM 		    TSYSERR, ENOMEM);
73711755SKacheong.Poon@Sun.COM 	}
73811755SKacheong.Poon@Sun.COM }
73911755SKacheong.Poon@Sun.COM 
74011755SKacheong.Poon@Sun.COM /* Return the TPI/TLI equivalent of our current tcp_state */
74111755SKacheong.Poon@Sun.COM static int
tcp_tpistate(tcp_t * tcp)74211755SKacheong.Poon@Sun.COM tcp_tpistate(tcp_t *tcp)
74311755SKacheong.Poon@Sun.COM {
74411755SKacheong.Poon@Sun.COM 	switch (tcp->tcp_state) {
74511755SKacheong.Poon@Sun.COM 	case TCPS_IDLE:
74611755SKacheong.Poon@Sun.COM 		return (TS_UNBND);
74711755SKacheong.Poon@Sun.COM 	case TCPS_LISTEN:
74811755SKacheong.Poon@Sun.COM 		/*
74911755SKacheong.Poon@Sun.COM 		 * Return whether there are outstanding T_CONN_IND waiting
75011755SKacheong.Poon@Sun.COM 		 * for the matching T_CONN_RES. Therefore don't count q0.
75111755SKacheong.Poon@Sun.COM 		 */
75211755SKacheong.Poon@Sun.COM 		if (tcp->tcp_conn_req_cnt_q > 0)
75311755SKacheong.Poon@Sun.COM 			return (TS_WRES_CIND);
75411755SKacheong.Poon@Sun.COM 		else
75511755SKacheong.Poon@Sun.COM 			return (TS_IDLE);
75611755SKacheong.Poon@Sun.COM 	case TCPS_BOUND:
75711755SKacheong.Poon@Sun.COM 		return (TS_IDLE);
75811755SKacheong.Poon@Sun.COM 	case TCPS_SYN_SENT:
75911755SKacheong.Poon@Sun.COM 		return (TS_WCON_CREQ);
76011755SKacheong.Poon@Sun.COM 	case TCPS_SYN_RCVD:
76111755SKacheong.Poon@Sun.COM 		/*
76211755SKacheong.Poon@Sun.COM 		 * Note: assumption: this has to the active open SYN_RCVD.
76311755SKacheong.Poon@Sun.COM 		 * The passive instance is detached in SYN_RCVD stage of
76411755SKacheong.Poon@Sun.COM 		 * incoming connection processing so we cannot get request
76511755SKacheong.Poon@Sun.COM 		 * for T_info_ack on it.
76611755SKacheong.Poon@Sun.COM 		 */
76711755SKacheong.Poon@Sun.COM 		return (TS_WACK_CRES);
76811755SKacheong.Poon@Sun.COM 	case TCPS_ESTABLISHED:
76911755SKacheong.Poon@Sun.COM 		return (TS_DATA_XFER);
77011755SKacheong.Poon@Sun.COM 	case TCPS_CLOSE_WAIT:
77111755SKacheong.Poon@Sun.COM 		return (TS_WREQ_ORDREL);
77211755SKacheong.Poon@Sun.COM 	case TCPS_FIN_WAIT_1:
77311755SKacheong.Poon@Sun.COM 		return (TS_WIND_ORDREL);
77411755SKacheong.Poon@Sun.COM 	case TCPS_FIN_WAIT_2:
77511755SKacheong.Poon@Sun.COM 		return (TS_WIND_ORDREL);
77611755SKacheong.Poon@Sun.COM 
77711755SKacheong.Poon@Sun.COM 	case TCPS_CLOSING:
77811755SKacheong.Poon@Sun.COM 	case TCPS_LAST_ACK:
77911755SKacheong.Poon@Sun.COM 	case TCPS_TIME_WAIT:
78011755SKacheong.Poon@Sun.COM 	case TCPS_CLOSED:
78111755SKacheong.Poon@Sun.COM 		/*
78211755SKacheong.Poon@Sun.COM 		 * Following TS_WACK_DREQ7 is a rendition of "not
78311755SKacheong.Poon@Sun.COM 		 * yet TS_IDLE" TPI state. There is no best match to any
78411755SKacheong.Poon@Sun.COM 		 * TPI state for TCPS_{CLOSING, LAST_ACK, TIME_WAIT} but we
78511755SKacheong.Poon@Sun.COM 		 * choose a value chosen that will map to TLI/XTI level
78611755SKacheong.Poon@Sun.COM 		 * state of TSTATECHNG (state is process of changing) which
78711755SKacheong.Poon@Sun.COM 		 * captures what this dummy state represents.
78811755SKacheong.Poon@Sun.COM 		 */
78911755SKacheong.Poon@Sun.COM 		return (TS_WACK_DREQ7);
79011755SKacheong.Poon@Sun.COM 	default:
79111755SKacheong.Poon@Sun.COM 		cmn_err(CE_WARN, "tcp_tpistate: strange state (%d) %s",
79211755SKacheong.Poon@Sun.COM 		    tcp->tcp_state, tcp_display(tcp, NULL,
79311755SKacheong.Poon@Sun.COM 		    DISP_PORT_ONLY));
79411755SKacheong.Poon@Sun.COM 		return (TS_UNBND);
79511755SKacheong.Poon@Sun.COM 	}
79611755SKacheong.Poon@Sun.COM }
79711755SKacheong.Poon@Sun.COM 
79811755SKacheong.Poon@Sun.COM static void
tcp_copy_info(struct T_info_ack * tia,tcp_t * tcp)79911755SKacheong.Poon@Sun.COM tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
80011755SKacheong.Poon@Sun.COM {
80111755SKacheong.Poon@Sun.COM 	tcp_stack_t	*tcps = tcp->tcp_tcps;
80211755SKacheong.Poon@Sun.COM 	conn_t		*connp = tcp->tcp_connp;
80311755SKacheong.Poon@Sun.COM 	extern struct T_info_ack tcp_g_t_info_ack;
80411755SKacheong.Poon@Sun.COM 	extern struct T_info_ack tcp_g_t_info_ack_v6;
80511755SKacheong.Poon@Sun.COM 
80611755SKacheong.Poon@Sun.COM 	if (connp->conn_family == AF_INET6)
80711755SKacheong.Poon@Sun.COM 		*tia = tcp_g_t_info_ack_v6;
80811755SKacheong.Poon@Sun.COM 	else
80911755SKacheong.Poon@Sun.COM 		*tia = tcp_g_t_info_ack;
81011755SKacheong.Poon@Sun.COM 	tia->CURRENT_state = tcp_tpistate(tcp);
81111755SKacheong.Poon@Sun.COM 	tia->OPT_size = tcp_max_optsize;
81211755SKacheong.Poon@Sun.COM 	if (tcp->tcp_mss == 0) {
81311755SKacheong.Poon@Sun.COM 		/* Not yet set - tcp_open does not set mss */
81411755SKacheong.Poon@Sun.COM 		if (connp->conn_ipversion == IPV4_VERSION)
81511755SKacheong.Poon@Sun.COM 			tia->TIDU_size = tcps->tcps_mss_def_ipv4;
81611755SKacheong.Poon@Sun.COM 		else
81711755SKacheong.Poon@Sun.COM 			tia->TIDU_size = tcps->tcps_mss_def_ipv6;
81811755SKacheong.Poon@Sun.COM 	} else {
81911755SKacheong.Poon@Sun.COM 		tia->TIDU_size = tcp->tcp_mss;
82011755SKacheong.Poon@Sun.COM 	}
82111755SKacheong.Poon@Sun.COM 	/* TODO: Default ETSDU is 1.  Is that correct for tcp? */
82211755SKacheong.Poon@Sun.COM }
82311755SKacheong.Poon@Sun.COM 
82412643SAnders.Persson@Sun.COM void
tcp_do_capability_ack(tcp_t * tcp,struct T_capability_ack * tcap,t_uscalar_t cap_bits1)82511755SKacheong.Poon@Sun.COM tcp_do_capability_ack(tcp_t *tcp, struct T_capability_ack *tcap,
82611755SKacheong.Poon@Sun.COM     t_uscalar_t cap_bits1)
82711755SKacheong.Poon@Sun.COM {
82811755SKacheong.Poon@Sun.COM 	tcap->CAP_bits1 = 0;
82911755SKacheong.Poon@Sun.COM 
83011755SKacheong.Poon@Sun.COM 	if (cap_bits1 & TC1_INFO) {
83111755SKacheong.Poon@Sun.COM 		tcp_copy_info(&tcap->INFO_ack, tcp);
83211755SKacheong.Poon@Sun.COM 		tcap->CAP_bits1 |= TC1_INFO;
83311755SKacheong.Poon@Sun.COM 	}
83411755SKacheong.Poon@Sun.COM 
83511755SKacheong.Poon@Sun.COM 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
83611755SKacheong.Poon@Sun.COM 		tcap->ACCEPTOR_id = tcp->tcp_acceptor_id;
83711755SKacheong.Poon@Sun.COM 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
83811755SKacheong.Poon@Sun.COM 	}
83911755SKacheong.Poon@Sun.COM 
84011755SKacheong.Poon@Sun.COM }
84111755SKacheong.Poon@Sun.COM 
84211755SKacheong.Poon@Sun.COM /*
84311755SKacheong.Poon@Sun.COM  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
84411755SKacheong.Poon@Sun.COM  * tcp_wput.  Much of the T_CAPABILITY_ACK information is copied from
84511755SKacheong.Poon@Sun.COM  * tcp_g_t_info_ack.  The current state of the stream is copied from
84611755SKacheong.Poon@Sun.COM  * tcp_state.
84711755SKacheong.Poon@Sun.COM  */
84811755SKacheong.Poon@Sun.COM void
tcp_capability_req(tcp_t * tcp,mblk_t * mp)84911755SKacheong.Poon@Sun.COM tcp_capability_req(tcp_t *tcp, mblk_t *mp)
85011755SKacheong.Poon@Sun.COM {
85111755SKacheong.Poon@Sun.COM 	t_uscalar_t		cap_bits1;
85211755SKacheong.Poon@Sun.COM 	struct T_capability_ack	*tcap;
85311755SKacheong.Poon@Sun.COM 
85411755SKacheong.Poon@Sun.COM 	if (MBLKL(mp) < sizeof (struct T_capability_req)) {
85511755SKacheong.Poon@Sun.COM 		freemsg(mp);
85611755SKacheong.Poon@Sun.COM 		return;
85711755SKacheong.Poon@Sun.COM 	}
85811755SKacheong.Poon@Sun.COM 
85911755SKacheong.Poon@Sun.COM 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
86011755SKacheong.Poon@Sun.COM 
86111755SKacheong.Poon@Sun.COM 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
86211755SKacheong.Poon@Sun.COM 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
86311755SKacheong.Poon@Sun.COM 	if (mp == NULL)
86411755SKacheong.Poon@Sun.COM 		return;
86511755SKacheong.Poon@Sun.COM 
86611755SKacheong.Poon@Sun.COM 	tcap = (struct T_capability_ack *)mp->b_rptr;
86711755SKacheong.Poon@Sun.COM 	tcp_do_capability_ack(tcp, tcap, cap_bits1);
86811755SKacheong.Poon@Sun.COM 
86911755SKacheong.Poon@Sun.COM 	putnext(tcp->tcp_connp->conn_rq, mp);
87011755SKacheong.Poon@Sun.COM }
87111755SKacheong.Poon@Sun.COM 
87211755SKacheong.Poon@Sun.COM /*
87311755SKacheong.Poon@Sun.COM  * This routine responds to T_INFO_REQ messages.  It is called by tcp_wput.
87411755SKacheong.Poon@Sun.COM  * Most of the T_INFO_ACK information is copied from tcp_g_t_info_ack.
87511755SKacheong.Poon@Sun.COM  * The current state of the stream is copied from tcp_state.
87611755SKacheong.Poon@Sun.COM  */
87711755SKacheong.Poon@Sun.COM void
tcp_info_req(tcp_t * tcp,mblk_t * mp)87811755SKacheong.Poon@Sun.COM tcp_info_req(tcp_t *tcp, mblk_t *mp)
87911755SKacheong.Poon@Sun.COM {
88011755SKacheong.Poon@Sun.COM 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
88111755SKacheong.Poon@Sun.COM 	    T_INFO_ACK);
88211755SKacheong.Poon@Sun.COM 	if (!mp) {
88311755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
88411755SKacheong.Poon@Sun.COM 		return;
88511755SKacheong.Poon@Sun.COM 	}
88611755SKacheong.Poon@Sun.COM 	tcp_copy_info((struct T_info_ack *)mp->b_rptr, tcp);
88711755SKacheong.Poon@Sun.COM 	putnext(tcp->tcp_connp->conn_rq, mp);
88811755SKacheong.Poon@Sun.COM }
88911755SKacheong.Poon@Sun.COM 
89011755SKacheong.Poon@Sun.COM /* Respond to the TPI addr request */
89111755SKacheong.Poon@Sun.COM void
tcp_addr_req(tcp_t * tcp,mblk_t * mp)89211755SKacheong.Poon@Sun.COM tcp_addr_req(tcp_t *tcp, mblk_t *mp)
89311755SKacheong.Poon@Sun.COM {
89411755SKacheong.Poon@Sun.COM 	struct sockaddr *sa;
89511755SKacheong.Poon@Sun.COM 	mblk_t	*ackmp;
89611755SKacheong.Poon@Sun.COM 	struct T_addr_ack *taa;
89711755SKacheong.Poon@Sun.COM 	conn_t	*connp = tcp->tcp_connp;
89811755SKacheong.Poon@Sun.COM 	uint_t	addrlen;
89911755SKacheong.Poon@Sun.COM 
90011755SKacheong.Poon@Sun.COM 	/* Make it large enough for worst case */
90111755SKacheong.Poon@Sun.COM 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
90211755SKacheong.Poon@Sun.COM 	    2 * sizeof (sin6_t), 1);
90311755SKacheong.Poon@Sun.COM 	if (ackmp == NULL) {
90411755SKacheong.Poon@Sun.COM 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
90511755SKacheong.Poon@Sun.COM 		return;
90611755SKacheong.Poon@Sun.COM 	}
90711755SKacheong.Poon@Sun.COM 
90811755SKacheong.Poon@Sun.COM 	taa = (struct T_addr_ack *)ackmp->b_rptr;
90911755SKacheong.Poon@Sun.COM 
91011755SKacheong.Poon@Sun.COM 	bzero(taa, sizeof (struct T_addr_ack));
91111755SKacheong.Poon@Sun.COM 	ackmp->b_wptr = (uchar_t *)&taa[1];
91211755SKacheong.Poon@Sun.COM 
91311755SKacheong.Poon@Sun.COM 	taa->PRIM_type = T_ADDR_ACK;
91411755SKacheong.Poon@Sun.COM 	ackmp->b_datap->db_type = M_PCPROTO;
91511755SKacheong.Poon@Sun.COM 
91611755SKacheong.Poon@Sun.COM 	if (connp->conn_family == AF_INET)
91711755SKacheong.Poon@Sun.COM 		addrlen = sizeof (sin_t);
91811755SKacheong.Poon@Sun.COM 	else
91911755SKacheong.Poon@Sun.COM 		addrlen = sizeof (sin6_t);
92011755SKacheong.Poon@Sun.COM 
92111755SKacheong.Poon@Sun.COM 	/*
92211755SKacheong.Poon@Sun.COM 	 * Note: Following code assumes 32 bit alignment of basic
92311755SKacheong.Poon@Sun.COM 	 * data structures like sin_t and struct T_addr_ack.
92411755SKacheong.Poon@Sun.COM 	 */
92511755SKacheong.Poon@Sun.COM 	if (tcp->tcp_state >= TCPS_BOUND) {
92611755SKacheong.Poon@Sun.COM 		/*
92711755SKacheong.Poon@Sun.COM 		 * Fill in local address first
92811755SKacheong.Poon@Sun.COM 		 */
92911755SKacheong.Poon@Sun.COM 		taa->LOCADDR_offset = sizeof (*taa);
93011755SKacheong.Poon@Sun.COM 		taa->LOCADDR_length = addrlen;
93111755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)&taa[1];
93211755SKacheong.Poon@Sun.COM 		(void) conn_getsockname(connp, sa, &addrlen);
93311755SKacheong.Poon@Sun.COM 		ackmp->b_wptr += addrlen;
93411755SKacheong.Poon@Sun.COM 	}
93511755SKacheong.Poon@Sun.COM 	if (tcp->tcp_state >= TCPS_SYN_RCVD) {
93611755SKacheong.Poon@Sun.COM 		/*
93711755SKacheong.Poon@Sun.COM 		 * Fill in Remote address
93811755SKacheong.Poon@Sun.COM 		 */
93911755SKacheong.Poon@Sun.COM 		taa->REMADDR_length = addrlen;
94011755SKacheong.Poon@Sun.COM 		/* assumed 32-bit alignment */
94111755SKacheong.Poon@Sun.COM 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
94211755SKacheong.Poon@Sun.COM 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
94311755SKacheong.Poon@Sun.COM 		(void) conn_getpeername(connp, sa, &addrlen);
94411755SKacheong.Poon@Sun.COM 		ackmp->b_wptr += addrlen;
94511755SKacheong.Poon@Sun.COM 	}
94611755SKacheong.Poon@Sun.COM 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
94711755SKacheong.Poon@Sun.COM 	putnext(tcp->tcp_connp->conn_rq, ackmp);
94811755SKacheong.Poon@Sun.COM }
94911755SKacheong.Poon@Sun.COM 
95011755SKacheong.Poon@Sun.COM /*
95111755SKacheong.Poon@Sun.COM  * Swap information between the eager and acceptor for a TLI/XTI client.
95211755SKacheong.Poon@Sun.COM  * The sockfs accept is done on the acceptor stream and control goes
95311755SKacheong.Poon@Sun.COM  * through tcp_tli_accept() and tcp_accept()/tcp_accept_swap() is not
95411755SKacheong.Poon@Sun.COM  * called. In either case, both the eager and listener are in their own
95511755SKacheong.Poon@Sun.COM  * perimeter (squeue) and the code has to deal with potential race.
95611755SKacheong.Poon@Sun.COM  *
95711755SKacheong.Poon@Sun.COM  * See the block comment on top of tcp_accept() and tcp_tli_accept().
95811755SKacheong.Poon@Sun.COM  */
95911755SKacheong.Poon@Sun.COM static void
tcp_accept_swap(tcp_t * listener,tcp_t * acceptor,tcp_t * eager)96011755SKacheong.Poon@Sun.COM tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
96111755SKacheong.Poon@Sun.COM {
96211755SKacheong.Poon@Sun.COM 	conn_t	*econnp, *aconnp;
96311755SKacheong.Poon@Sun.COM 
96411755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_connp->conn_rq == listener->tcp_connp->conn_rq);
96511755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_detached && !acceptor->tcp_detached);
96611755SKacheong.Poon@Sun.COM 	ASSERT(!TCP_IS_SOCKET(acceptor));
96711755SKacheong.Poon@Sun.COM 	ASSERT(!TCP_IS_SOCKET(eager));
96811755SKacheong.Poon@Sun.COM 	ASSERT(!TCP_IS_SOCKET(listener));
96911755SKacheong.Poon@Sun.COM 
97011755SKacheong.Poon@Sun.COM 	/*
97111755SKacheong.Poon@Sun.COM 	 * Trusted Extensions may need to use a security label that is
97211755SKacheong.Poon@Sun.COM 	 * different from the acceptor's label on MLP and MAC-Exempt
97311755SKacheong.Poon@Sun.COM 	 * sockets. If this is the case, the required security label
97411755SKacheong.Poon@Sun.COM 	 * already exists in econnp->conn_ixa->ixa_tsl. Since we make the
97511755SKacheong.Poon@Sun.COM 	 * acceptor stream refer to econnp we atomatically get that label.
97611755SKacheong.Poon@Sun.COM 	 */
97711755SKacheong.Poon@Sun.COM 
97811755SKacheong.Poon@Sun.COM 	acceptor->tcp_detached = B_TRUE;
97911755SKacheong.Poon@Sun.COM 	/*
98011755SKacheong.Poon@Sun.COM 	 * To permit stream re-use by TLI/XTI, the eager needs a copy of
98111755SKacheong.Poon@Sun.COM 	 * the acceptor id.
98211755SKacheong.Poon@Sun.COM 	 */
98311755SKacheong.Poon@Sun.COM 	eager->tcp_acceptor_id = acceptor->tcp_acceptor_id;
98411755SKacheong.Poon@Sun.COM 
98511755SKacheong.Poon@Sun.COM 	/* remove eager from listen list... */
98611755SKacheong.Poon@Sun.COM 	mutex_enter(&listener->tcp_eager_lock);
98711755SKacheong.Poon@Sun.COM 	tcp_eager_unlink(eager);
98811755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_eager_next_q == NULL &&
98911755SKacheong.Poon@Sun.COM 	    eager->tcp_eager_last_q == NULL);
99011755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_eager_next_q0 == NULL &&
99111755SKacheong.Poon@Sun.COM 	    eager->tcp_eager_prev_q0 == NULL);
99211755SKacheong.Poon@Sun.COM 	mutex_exit(&listener->tcp_eager_lock);
99311755SKacheong.Poon@Sun.COM 
99411755SKacheong.Poon@Sun.COM 	econnp = eager->tcp_connp;
99511755SKacheong.Poon@Sun.COM 	aconnp = acceptor->tcp_connp;
99611755SKacheong.Poon@Sun.COM 	econnp->conn_rq = aconnp->conn_rq;
99711755SKacheong.Poon@Sun.COM 	econnp->conn_wq = aconnp->conn_wq;
99811755SKacheong.Poon@Sun.COM 	econnp->conn_rq->q_ptr = econnp;
99911755SKacheong.Poon@Sun.COM 	econnp->conn_wq->q_ptr = econnp;
100011755SKacheong.Poon@Sun.COM 
100111755SKacheong.Poon@Sun.COM 	/*
100211755SKacheong.Poon@Sun.COM 	 * In the TLI/XTI loopback case, we are inside the listener's squeue,
100311755SKacheong.Poon@Sun.COM 	 * which might be a different squeue from our peer TCP instance.
100411755SKacheong.Poon@Sun.COM 	 * For TCP Fusion, the peer expects that whenever tcp_detached is
100511755SKacheong.Poon@Sun.COM 	 * clear, our TCP queues point to the acceptor's queues.  Thus, use
100611755SKacheong.Poon@Sun.COM 	 * membar_producer() to ensure that the assignments of conn_rq/conn_wq
100711755SKacheong.Poon@Sun.COM 	 * above reach global visibility prior to the clearing of tcp_detached.
100811755SKacheong.Poon@Sun.COM 	 */
100911755SKacheong.Poon@Sun.COM 	membar_producer();
101011755SKacheong.Poon@Sun.COM 	eager->tcp_detached = B_FALSE;
101111755SKacheong.Poon@Sun.COM 
101211755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_ack_tid == 0);
101311755SKacheong.Poon@Sun.COM 
101411755SKacheong.Poon@Sun.COM 	econnp->conn_dev = aconnp->conn_dev;
101511755SKacheong.Poon@Sun.COM 	econnp->conn_minor_arena = aconnp->conn_minor_arena;
101611755SKacheong.Poon@Sun.COM 
101711755SKacheong.Poon@Sun.COM 	ASSERT(econnp->conn_minor_arena != NULL);
101811755SKacheong.Poon@Sun.COM 	if (econnp->conn_cred != NULL)
101911755SKacheong.Poon@Sun.COM 		crfree(econnp->conn_cred);
102011755SKacheong.Poon@Sun.COM 	econnp->conn_cred = aconnp->conn_cred;
102111849SErik.Nordmark@Sun.COM 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
102211755SKacheong.Poon@Sun.COM 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
102311755SKacheong.Poon@Sun.COM 	aconnp->conn_cred = NULL;
102411755SKacheong.Poon@Sun.COM 	econnp->conn_cpid = aconnp->conn_cpid;
102511755SKacheong.Poon@Sun.COM 	ASSERT(econnp->conn_netstack == aconnp->conn_netstack);
102611755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_tcps == acceptor->tcp_tcps);
102711755SKacheong.Poon@Sun.COM 
102811755SKacheong.Poon@Sun.COM 	econnp->conn_zoneid = aconnp->conn_zoneid;
102911755SKacheong.Poon@Sun.COM 	econnp->conn_allzones = aconnp->conn_allzones;
103011755SKacheong.Poon@Sun.COM 	econnp->conn_ixa->ixa_zoneid = aconnp->conn_ixa->ixa_zoneid;
103111755SKacheong.Poon@Sun.COM 
103211755SKacheong.Poon@Sun.COM 	econnp->conn_mac_mode = aconnp->conn_mac_mode;
103311755SKacheong.Poon@Sun.COM 	econnp->conn_zone_is_global = aconnp->conn_zone_is_global;
103411755SKacheong.Poon@Sun.COM 	aconnp->conn_mac_mode = CONN_MAC_DEFAULT;
103511755SKacheong.Poon@Sun.COM 
103611755SKacheong.Poon@Sun.COM 	/* Do the IPC initialization */
103711755SKacheong.Poon@Sun.COM 	CONN_INC_REF(econnp);
103811755SKacheong.Poon@Sun.COM 
103911755SKacheong.Poon@Sun.COM 	/* Done with old IPC. Drop its ref on its connp */
104011755SKacheong.Poon@Sun.COM 	CONN_DEC_REF(aconnp);
104111755SKacheong.Poon@Sun.COM }
104211755SKacheong.Poon@Sun.COM 
104311755SKacheong.Poon@Sun.COM /*
104412643SAnders.Persson@Sun.COM  * This runs at the tail end of accept processing on the squeue of the
104512643SAnders.Persson@Sun.COM  * new connection.
104612643SAnders.Persson@Sun.COM  */
104712643SAnders.Persson@Sun.COM /* ARGSUSED */
104812643SAnders.Persson@Sun.COM static void
tcp_accept_finish(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)104912643SAnders.Persson@Sun.COM tcp_accept_finish(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
105012643SAnders.Persson@Sun.COM {
105112643SAnders.Persson@Sun.COM 	conn_t			*connp = (conn_t *)arg;
105212643SAnders.Persson@Sun.COM 	tcp_t			*tcp = connp->conn_tcp;
105312643SAnders.Persson@Sun.COM 	queue_t			*q = connp->conn_rq;
105412643SAnders.Persson@Sun.COM 	tcp_stack_t		*tcps = tcp->tcp_tcps;
105512643SAnders.Persson@Sun.COM 	struct stroptions 	*stropt;
105612643SAnders.Persson@Sun.COM 	struct sock_proto_props sopp;
105712643SAnders.Persson@Sun.COM 
105812643SAnders.Persson@Sun.COM 	/* Should never be called for non-STREAMS sockets */
105912643SAnders.Persson@Sun.COM 	ASSERT(!IPCL_IS_NONSTR(connp));
106012643SAnders.Persson@Sun.COM 
106112643SAnders.Persson@Sun.COM 	/* We should just receive a single mblk that fits a T_discon_ind */
106212643SAnders.Persson@Sun.COM 	ASSERT(mp->b_cont == NULL);
106312643SAnders.Persson@Sun.COM 
106412643SAnders.Persson@Sun.COM 	/*
106512643SAnders.Persson@Sun.COM 	 * Drop the eager's ref on the listener, that was placed when
106612643SAnders.Persson@Sun.COM 	 * this eager began life in tcp_input_listener.
106712643SAnders.Persson@Sun.COM 	 */
106812643SAnders.Persson@Sun.COM 	CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp);
106912643SAnders.Persson@Sun.COM 
107012643SAnders.Persson@Sun.COM 	tcp->tcp_detached = B_FALSE;
107112643SAnders.Persson@Sun.COM 
107212643SAnders.Persson@Sun.COM 	if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_accept_error) {
107312643SAnders.Persson@Sun.COM 		/*
107412643SAnders.Persson@Sun.COM 		 * Someone blewoff the eager before we could finish
107512643SAnders.Persson@Sun.COM 		 * the accept.
107612643SAnders.Persson@Sun.COM 		 *
107712643SAnders.Persson@Sun.COM 		 * The only reason eager exists it because we put in
107812643SAnders.Persson@Sun.COM 		 * a ref on it when conn ind went up. We need to send
107912643SAnders.Persson@Sun.COM 		 * a disconnect indication up while the last reference
108012643SAnders.Persson@Sun.COM 		 * on the eager will be dropped by the squeue when we
108112643SAnders.Persson@Sun.COM 		 * return.
108212643SAnders.Persson@Sun.COM 		 */
108312643SAnders.Persson@Sun.COM 		ASSERT(tcp->tcp_listener == NULL);
108412643SAnders.Persson@Sun.COM 		if (tcp->tcp_issocket || tcp->tcp_send_discon_ind) {
108512643SAnders.Persson@Sun.COM 			struct	T_discon_ind	*tdi;
108612643SAnders.Persson@Sun.COM 
108712643SAnders.Persson@Sun.COM 			(void) putnextctl1(q, M_FLUSH, FLUSHRW);
108812643SAnders.Persson@Sun.COM 			/*
108912643SAnders.Persson@Sun.COM 			 * Let us reuse the incoming mblk to avoid
109012643SAnders.Persson@Sun.COM 			 * memory allocation failure problems. We know
109112643SAnders.Persson@Sun.COM 			 * that the size of the incoming mblk i.e.
109212643SAnders.Persson@Sun.COM 			 * stroptions is greater than sizeof
109312643SAnders.Persson@Sun.COM 			 * T_discon_ind.
109412643SAnders.Persson@Sun.COM 			 */
109512643SAnders.Persson@Sun.COM 			ASSERT(DB_REF(mp) == 1);
109612643SAnders.Persson@Sun.COM 			ASSERT(MBLKSIZE(mp) >=
109712643SAnders.Persson@Sun.COM 			    sizeof (struct T_discon_ind));
109812643SAnders.Persson@Sun.COM 
109912643SAnders.Persson@Sun.COM 			DB_TYPE(mp) = M_PROTO;
110012643SAnders.Persson@Sun.COM 			((union T_primitives *)mp->b_rptr)->type =
110112643SAnders.Persson@Sun.COM 			    T_DISCON_IND;
110212643SAnders.Persson@Sun.COM 			tdi = (struct T_discon_ind *)mp->b_rptr;
110312643SAnders.Persson@Sun.COM 			if (tcp->tcp_issocket) {
110412643SAnders.Persson@Sun.COM 				tdi->DISCON_reason = ECONNREFUSED;
110512643SAnders.Persson@Sun.COM 				tdi->SEQ_number = 0;
110612643SAnders.Persson@Sun.COM 			} else {
110712643SAnders.Persson@Sun.COM 				tdi->DISCON_reason = ENOPROTOOPT;
110812643SAnders.Persson@Sun.COM 				tdi->SEQ_number =
110912643SAnders.Persson@Sun.COM 				    tcp->tcp_conn_req_seqnum;
111012643SAnders.Persson@Sun.COM 			}
111112643SAnders.Persson@Sun.COM 			mp->b_wptr = mp->b_rptr +
111212643SAnders.Persson@Sun.COM 			    sizeof (struct T_discon_ind);
111312643SAnders.Persson@Sun.COM 			putnext(q, mp);
111412643SAnders.Persson@Sun.COM 		}
111512643SAnders.Persson@Sun.COM 		tcp->tcp_hard_binding = B_FALSE;
111612643SAnders.Persson@Sun.COM 		return;
111712643SAnders.Persson@Sun.COM 	}
111812643SAnders.Persson@Sun.COM 
111912643SAnders.Persson@Sun.COM 	/*
112012643SAnders.Persson@Sun.COM 	 * This is the first time we run on the correct
112112643SAnders.Persson@Sun.COM 	 * queue after tcp_accept. So fix all the q parameters
112212643SAnders.Persson@Sun.COM 	 * here.
112312643SAnders.Persson@Sun.COM 	 *
112412643SAnders.Persson@Sun.COM 	 * Let us reuse the incoming mblk to avoid
112512643SAnders.Persson@Sun.COM 	 * memory allocation failure problems. We know
112612643SAnders.Persson@Sun.COM 	 * that the size of the incoming mblk is at least
112712643SAnders.Persson@Sun.COM 	 * stroptions
112812643SAnders.Persson@Sun.COM 	 */
112912643SAnders.Persson@Sun.COM 	tcp_get_proto_props(tcp, &sopp);
113012643SAnders.Persson@Sun.COM 
113112643SAnders.Persson@Sun.COM 	ASSERT(DB_REF(mp) == 1);
113212643SAnders.Persson@Sun.COM 	ASSERT(MBLKSIZE(mp) >= sizeof (struct stroptions));
113312643SAnders.Persson@Sun.COM 
113412643SAnders.Persson@Sun.COM 	DB_TYPE(mp) = M_SETOPTS;
113512643SAnders.Persson@Sun.COM 	stropt = (struct stroptions *)mp->b_rptr;
113612643SAnders.Persson@Sun.COM 	mp->b_wptr = mp->b_rptr + sizeof (struct stroptions);
113712643SAnders.Persson@Sun.COM 	stropt = (struct stroptions *)mp->b_rptr;
113812643SAnders.Persson@Sun.COM 	ASSERT(sopp.sopp_flags & (SO_HIWAT|SO_WROFF|SO_MAXBLK));
113912643SAnders.Persson@Sun.COM 	stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
114012643SAnders.Persson@Sun.COM 	stropt->so_hiwat = sopp.sopp_rxhiwat;
114112643SAnders.Persson@Sun.COM 	stropt->so_wroff = sopp.sopp_wroff;
114212643SAnders.Persson@Sun.COM 	stropt->so_maxblk = sopp.sopp_maxblk;
114312643SAnders.Persson@Sun.COM 
114412643SAnders.Persson@Sun.COM 	/* Send the options up */
114512643SAnders.Persson@Sun.COM 	putnext(q, mp);
114612643SAnders.Persson@Sun.COM 
114712643SAnders.Persson@Sun.COM 	/*
114812643SAnders.Persson@Sun.COM 	 * Pass up any data and/or a fin that has been received.
114912643SAnders.Persson@Sun.COM 	 *
115012643SAnders.Persson@Sun.COM 	 * Adjust receive window in case it had decreased
115112643SAnders.Persson@Sun.COM 	 * (because there is data <=> tcp_rcv_list != NULL)
115212643SAnders.Persson@Sun.COM 	 * while the connection was detached. Note that
115312643SAnders.Persson@Sun.COM 	 * in case the eager was flow-controlled, w/o this
115412643SAnders.Persson@Sun.COM 	 * code, the rwnd may never open up again!
115512643SAnders.Persson@Sun.COM 	 */
115612643SAnders.Persson@Sun.COM 	if (tcp->tcp_rcv_list != NULL) {
115712643SAnders.Persson@Sun.COM 		/* We drain directly in case of fused tcp loopback */
115812643SAnders.Persson@Sun.COM 
115912643SAnders.Persson@Sun.COM 		if (!tcp->tcp_fused && canputnext(q)) {
116012643SAnders.Persson@Sun.COM 			tcp->tcp_rwnd = connp->conn_rcvbuf;
116112643SAnders.Persson@Sun.COM 			if (tcp->tcp_state >= TCPS_ESTABLISHED &&
116212643SAnders.Persson@Sun.COM 			    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
116312643SAnders.Persson@Sun.COM 				tcp_xmit_ctl(NULL,
116412643SAnders.Persson@Sun.COM 				    tcp, (tcp->tcp_swnd == 0) ?
116512643SAnders.Persson@Sun.COM 				    tcp->tcp_suna : tcp->tcp_snxt,
116612643SAnders.Persson@Sun.COM 				    tcp->tcp_rnxt, TH_ACK);
116712643SAnders.Persson@Sun.COM 			}
116812643SAnders.Persson@Sun.COM 		}
116912643SAnders.Persson@Sun.COM 
117012643SAnders.Persson@Sun.COM 		(void) tcp_rcv_drain(tcp);
117112643SAnders.Persson@Sun.COM 
117212643SAnders.Persson@Sun.COM 		/*
117312643SAnders.Persson@Sun.COM 		 * For fused tcp loopback, back-enable peer endpoint
117412643SAnders.Persson@Sun.COM 		 * if it's currently flow-controlled.
117512643SAnders.Persson@Sun.COM 		 */
117612643SAnders.Persson@Sun.COM 		if (tcp->tcp_fused) {
117712643SAnders.Persson@Sun.COM 			tcp_t *peer_tcp = tcp->tcp_loopback_peer;
117812643SAnders.Persson@Sun.COM 
117912643SAnders.Persson@Sun.COM 			ASSERT(peer_tcp != NULL);
118012643SAnders.Persson@Sun.COM 			ASSERT(peer_tcp->tcp_fused);
118112643SAnders.Persson@Sun.COM 
118212643SAnders.Persson@Sun.COM 			mutex_enter(&peer_tcp->tcp_non_sq_lock);
118312643SAnders.Persson@Sun.COM 			if (peer_tcp->tcp_flow_stopped) {
118412643SAnders.Persson@Sun.COM 				tcp_clrqfull(peer_tcp);
118512643SAnders.Persson@Sun.COM 				TCP_STAT(tcps, tcp_fusion_backenabled);
118612643SAnders.Persson@Sun.COM 			}
118712643SAnders.Persson@Sun.COM 			mutex_exit(&peer_tcp->tcp_non_sq_lock);
118812643SAnders.Persson@Sun.COM 		}
118912643SAnders.Persson@Sun.COM 	}
119012643SAnders.Persson@Sun.COM 	ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
119112643SAnders.Persson@Sun.COM 	if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
119212643SAnders.Persson@Sun.COM 		tcp->tcp_ordrel_done = B_TRUE;
119312643SAnders.Persson@Sun.COM 		mp = tcp->tcp_ordrel_mp;
119412643SAnders.Persson@Sun.COM 		tcp->tcp_ordrel_mp = NULL;
119512643SAnders.Persson@Sun.COM 		putnext(q, mp);
119612643SAnders.Persson@Sun.COM 	}
119712643SAnders.Persson@Sun.COM 	tcp->tcp_hard_binding = B_FALSE;
119812643SAnders.Persson@Sun.COM 
119912643SAnders.Persson@Sun.COM 	if (connp->conn_keepalive) {
120012643SAnders.Persson@Sun.COM 		tcp->tcp_ka_last_intrvl = 0;
120112643SAnders.Persson@Sun.COM 		tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
120212643SAnders.Persson@Sun.COM 		    tcp->tcp_ka_interval);
120312643SAnders.Persson@Sun.COM 	}
120412643SAnders.Persson@Sun.COM 
120512643SAnders.Persson@Sun.COM 	/*
120612643SAnders.Persson@Sun.COM 	 * At this point, eager is fully established and will
120712643SAnders.Persson@Sun.COM 	 * have the following references -
120812643SAnders.Persson@Sun.COM 	 *
120912643SAnders.Persson@Sun.COM 	 * 2 references for connection to exist (1 for TCP and 1 for IP).
121012643SAnders.Persson@Sun.COM 	 * 1 reference for the squeue which will be dropped by the squeue as
121112643SAnders.Persson@Sun.COM 	 *	soon as this function returns.
121212643SAnders.Persson@Sun.COM 	 * There will be 1 additonal reference for being in classifier
121312643SAnders.Persson@Sun.COM 	 *	hash list provided something bad hasn't happened.
121412643SAnders.Persson@Sun.COM 	 */
121512643SAnders.Persson@Sun.COM 	ASSERT((connp->conn_fanout != NULL && connp->conn_ref >= 4) ||
121612643SAnders.Persson@Sun.COM 	    (connp->conn_fanout == NULL && connp->conn_ref >= 3));
121712643SAnders.Persson@Sun.COM }
121812643SAnders.Persson@Sun.COM 
1219*12644SAnders.Persson@Sun.COM /*
1220*12644SAnders.Persson@Sun.COM  * Pull a deferred connection indication off of the listener. The caller
1221*12644SAnders.Persson@Sun.COM  * must verify that there is a deferred conn ind under eager_lock before
1222*12644SAnders.Persson@Sun.COM  * calling this function.
1223*12644SAnders.Persson@Sun.COM  */
1224*12644SAnders.Persson@Sun.COM static mblk_t *
tcp_get_def_conn_ind(tcp_t * listener)1225*12644SAnders.Persson@Sun.COM tcp_get_def_conn_ind(tcp_t *listener)
1226*12644SAnders.Persson@Sun.COM {
1227*12644SAnders.Persson@Sun.COM 	tcp_t *tail;
1228*12644SAnders.Persson@Sun.COM 	tcp_t *tcp;
1229*12644SAnders.Persson@Sun.COM 	mblk_t *conn_ind;
1230*12644SAnders.Persson@Sun.COM 
1231*12644SAnders.Persson@Sun.COM 	ASSERT(MUTEX_HELD(&listener->tcp_eager_lock));
1232*12644SAnders.Persson@Sun.COM 	ASSERT(listener->tcp_eager_prev_q0->tcp_conn_def_q0);
1233*12644SAnders.Persson@Sun.COM 
1234*12644SAnders.Persson@Sun.COM 	tcp = listener->tcp_eager_prev_q0;
1235*12644SAnders.Persson@Sun.COM 	/*
1236*12644SAnders.Persson@Sun.COM 	 * listener->tcp_eager_prev_q0 points to the TAIL of the
1237*12644SAnders.Persson@Sun.COM 	 * deferred T_conn_ind queue. We need to get to the head
1238*12644SAnders.Persson@Sun.COM 	 * of the queue in order to send up T_conn_ind the same
1239*12644SAnders.Persson@Sun.COM 	 * order as how the 3WHS is completed.
1240*12644SAnders.Persson@Sun.COM 	 */
1241*12644SAnders.Persson@Sun.COM 	while (tcp != listener) {
1242*12644SAnders.Persson@Sun.COM 		if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0)
1243*12644SAnders.Persson@Sun.COM 			break;
1244*12644SAnders.Persson@Sun.COM 		else
1245*12644SAnders.Persson@Sun.COM 			tcp = tcp->tcp_eager_prev_q0;
1246*12644SAnders.Persson@Sun.COM 	}
1247*12644SAnders.Persson@Sun.COM 
1248*12644SAnders.Persson@Sun.COM 	conn_ind = tcp->tcp_conn.tcp_eager_conn_ind;
1249*12644SAnders.Persson@Sun.COM 	tcp->tcp_conn.tcp_eager_conn_ind = NULL;
1250*12644SAnders.Persson@Sun.COM 	/* Move from q0 to q */
1251*12644SAnders.Persson@Sun.COM 	ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
1252*12644SAnders.Persson@Sun.COM 	listener->tcp_conn_req_cnt_q0--;
1253*12644SAnders.Persson@Sun.COM 	listener->tcp_conn_req_cnt_q++;
1254*12644SAnders.Persson@Sun.COM 	tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
1255*12644SAnders.Persson@Sun.COM 	    tcp->tcp_eager_prev_q0;
1256*12644SAnders.Persson@Sun.COM 	tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
1257*12644SAnders.Persson@Sun.COM 	    tcp->tcp_eager_next_q0;
1258*12644SAnders.Persson@Sun.COM 	tcp->tcp_eager_prev_q0 = NULL;
1259*12644SAnders.Persson@Sun.COM 	tcp->tcp_eager_next_q0 = NULL;
1260*12644SAnders.Persson@Sun.COM 	tcp->tcp_conn_def_q0 = B_FALSE;
1261*12644SAnders.Persson@Sun.COM 
1262*12644SAnders.Persson@Sun.COM 	/* Make sure the tcp isn't in the list of droppables */
1263*12644SAnders.Persson@Sun.COM 	ASSERT(tcp->tcp_eager_next_drop_q0 == NULL &&
1264*12644SAnders.Persson@Sun.COM 	    tcp->tcp_eager_prev_drop_q0 == NULL);
1265*12644SAnders.Persson@Sun.COM 
1266*12644SAnders.Persson@Sun.COM 	/*
1267*12644SAnders.Persson@Sun.COM 	 * Insert at end of the queue because sockfs sends
1268*12644SAnders.Persson@Sun.COM 	 * down T_CONN_RES in chronological order. Leaving
1269*12644SAnders.Persson@Sun.COM 	 * the older conn indications at front of the queue
1270*12644SAnders.Persson@Sun.COM 	 * helps reducing search time.
1271*12644SAnders.Persson@Sun.COM 	 */
1272*12644SAnders.Persson@Sun.COM 	tail = listener->tcp_eager_last_q;
1273*12644SAnders.Persson@Sun.COM 	if (tail != NULL) {
1274*12644SAnders.Persson@Sun.COM 		tail->tcp_eager_next_q = tcp;
1275*12644SAnders.Persson@Sun.COM 	} else {
1276*12644SAnders.Persson@Sun.COM 		listener->tcp_eager_next_q = tcp;
1277*12644SAnders.Persson@Sun.COM 	}
1278*12644SAnders.Persson@Sun.COM 	listener->tcp_eager_last_q = tcp;
1279*12644SAnders.Persson@Sun.COM 	tcp->tcp_eager_next_q = NULL;
1280*12644SAnders.Persson@Sun.COM 
1281*12644SAnders.Persson@Sun.COM 	return (conn_ind);
1282*12644SAnders.Persson@Sun.COM }
1283*12644SAnders.Persson@Sun.COM 
128412643SAnders.Persson@Sun.COM 
128512643SAnders.Persson@Sun.COM /*
128611755SKacheong.Poon@Sun.COM  * Reply to a clients T_CONN_RES TPI message. This function
128711755SKacheong.Poon@Sun.COM  * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES
128811755SKacheong.Poon@Sun.COM  * on the acceptor STREAM and processed in tcp_accept_common().
128911755SKacheong.Poon@Sun.COM  * Read the block comment on top of tcp_input_listener().
129011755SKacheong.Poon@Sun.COM  */
129111755SKacheong.Poon@Sun.COM void
tcp_tli_accept(tcp_t * listener,mblk_t * mp)129211755SKacheong.Poon@Sun.COM tcp_tli_accept(tcp_t *listener, mblk_t *mp)
129311755SKacheong.Poon@Sun.COM {
129411755SKacheong.Poon@Sun.COM 	tcp_t		*acceptor;
129511755SKacheong.Poon@Sun.COM 	tcp_t		*eager;
129611755SKacheong.Poon@Sun.COM 	struct T_conn_res	*tcr;
129711755SKacheong.Poon@Sun.COM 	t_uscalar_t	acceptor_id;
129811755SKacheong.Poon@Sun.COM 	t_scalar_t	seqnum;
129911755SKacheong.Poon@Sun.COM 	mblk_t		*discon_mp = NULL;
130011755SKacheong.Poon@Sun.COM 	mblk_t		*ok_mp;
130111755SKacheong.Poon@Sun.COM 	mblk_t		*mp1;
130211755SKacheong.Poon@Sun.COM 	tcp_stack_t	*tcps = listener->tcp_tcps;
130311755SKacheong.Poon@Sun.COM 	conn_t		*econnp;
130411755SKacheong.Poon@Sun.COM 
130511755SKacheong.Poon@Sun.COM 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
130611755SKacheong.Poon@Sun.COM 		tcp_err_ack(listener, mp, TPROTO, 0);
130711755SKacheong.Poon@Sun.COM 		return;
130811755SKacheong.Poon@Sun.COM 	}
130911755SKacheong.Poon@Sun.COM 	tcr = (struct T_conn_res *)mp->b_rptr;
131011755SKacheong.Poon@Sun.COM 
131111755SKacheong.Poon@Sun.COM 	/*
131211755SKacheong.Poon@Sun.COM 	 * Under ILP32 the stream head points tcr->ACCEPTOR_id at the
131311755SKacheong.Poon@Sun.COM 	 * read side queue of the streams device underneath us i.e. the
131411755SKacheong.Poon@Sun.COM 	 * read side queue of 'ip'. Since we can't deference QUEUE_ptr we
131511755SKacheong.Poon@Sun.COM 	 * look it up in the queue_hash.  Under LP64 it sends down the
131611755SKacheong.Poon@Sun.COM 	 * minor_t of the accepting endpoint.
131711755SKacheong.Poon@Sun.COM 	 *
131811755SKacheong.Poon@Sun.COM 	 * Once the acceptor/eager are modified (in tcp_accept_swap) the
131911755SKacheong.Poon@Sun.COM 	 * fanout hash lock is held.
132011755SKacheong.Poon@Sun.COM 	 * This prevents any thread from entering the acceptor queue from
132111755SKacheong.Poon@Sun.COM 	 * below (since it has not been hard bound yet i.e. any inbound
132211755SKacheong.Poon@Sun.COM 	 * packets will arrive on the listener conn_t and
132311755SKacheong.Poon@Sun.COM 	 * go through the classifier).
132411755SKacheong.Poon@Sun.COM 	 * The CONN_INC_REF will prevent the acceptor from closing.
132511755SKacheong.Poon@Sun.COM 	 *
132611755SKacheong.Poon@Sun.COM 	 * XXX It is still possible for a tli application to send down data
132711755SKacheong.Poon@Sun.COM 	 * on the accepting stream while another thread calls t_accept.
132811755SKacheong.Poon@Sun.COM 	 * This should not be a problem for well-behaved applications since
132911755SKacheong.Poon@Sun.COM 	 * the T_OK_ACK is sent after the queue swapping is completed.
133011755SKacheong.Poon@Sun.COM 	 *
133111755SKacheong.Poon@Sun.COM 	 * If the accepting fd is the same as the listening fd, avoid
133211755SKacheong.Poon@Sun.COM 	 * queue hash lookup since that will return an eager listener in a
133311755SKacheong.Poon@Sun.COM 	 * already established state.
133411755SKacheong.Poon@Sun.COM 	 */
133511755SKacheong.Poon@Sun.COM 	acceptor_id = tcr->ACCEPTOR_id;
133611755SKacheong.Poon@Sun.COM 	mutex_enter(&listener->tcp_eager_lock);
133711755SKacheong.Poon@Sun.COM 	if (listener->tcp_acceptor_id == acceptor_id) {
133811755SKacheong.Poon@Sun.COM 		eager = listener->tcp_eager_next_q;
133911755SKacheong.Poon@Sun.COM 		/* only count how many T_CONN_INDs so don't count q0 */
134011755SKacheong.Poon@Sun.COM 		if ((listener->tcp_conn_req_cnt_q != 1) ||
134111755SKacheong.Poon@Sun.COM 		    (eager->tcp_conn_req_seqnum != tcr->SEQ_number)) {
134211755SKacheong.Poon@Sun.COM 			mutex_exit(&listener->tcp_eager_lock);
134311755SKacheong.Poon@Sun.COM 			tcp_err_ack(listener, mp, TBADF, 0);
134411755SKacheong.Poon@Sun.COM 			return;
134511755SKacheong.Poon@Sun.COM 		}
134611755SKacheong.Poon@Sun.COM 		if (listener->tcp_conn_req_cnt_q0 != 0) {
134711755SKacheong.Poon@Sun.COM 			/* Throw away all the eagers on q0. */
134811755SKacheong.Poon@Sun.COM 			tcp_eager_cleanup(listener, 1);
134911755SKacheong.Poon@Sun.COM 		}
135011755SKacheong.Poon@Sun.COM 		if (listener->tcp_syn_defense) {
135111755SKacheong.Poon@Sun.COM 			listener->tcp_syn_defense = B_FALSE;
135211755SKacheong.Poon@Sun.COM 			if (listener->tcp_ip_addr_cache != NULL) {
135311755SKacheong.Poon@Sun.COM 				kmem_free(listener->tcp_ip_addr_cache,
135411755SKacheong.Poon@Sun.COM 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
135511755SKacheong.Poon@Sun.COM 				listener->tcp_ip_addr_cache = NULL;
135611755SKacheong.Poon@Sun.COM 			}
135711755SKacheong.Poon@Sun.COM 		}
135811755SKacheong.Poon@Sun.COM 		/*
135911755SKacheong.Poon@Sun.COM 		 * Transfer tcp_conn_req_max to the eager so that when
136011755SKacheong.Poon@Sun.COM 		 * a disconnect occurs we can revert the endpoint to the
136111755SKacheong.Poon@Sun.COM 		 * listen state.
136211755SKacheong.Poon@Sun.COM 		 */
136311755SKacheong.Poon@Sun.COM 		eager->tcp_conn_req_max = listener->tcp_conn_req_max;
136411755SKacheong.Poon@Sun.COM 		ASSERT(listener->tcp_conn_req_cnt_q0 == 0);
136511755SKacheong.Poon@Sun.COM 		/*
136611755SKacheong.Poon@Sun.COM 		 * Get a reference on the acceptor just like the
136711755SKacheong.Poon@Sun.COM 		 * tcp_acceptor_hash_lookup below.
136811755SKacheong.Poon@Sun.COM 		 */
136911755SKacheong.Poon@Sun.COM 		acceptor = listener;
137011755SKacheong.Poon@Sun.COM 		CONN_INC_REF(acceptor->tcp_connp);
137111755SKacheong.Poon@Sun.COM 	} else {
137211755SKacheong.Poon@Sun.COM 		acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps);
137311755SKacheong.Poon@Sun.COM 		if (acceptor == NULL) {
137411755SKacheong.Poon@Sun.COM 			if (listener->tcp_connp->conn_debug) {
137511755SKacheong.Poon@Sun.COM 				(void) strlog(TCP_MOD_ID, 0, 1,
137611755SKacheong.Poon@Sun.COM 				    SL_ERROR|SL_TRACE,
137711755SKacheong.Poon@Sun.COM 				    "tcp_accept: did not find acceptor 0x%x\n",
137811755SKacheong.Poon@Sun.COM 				    acceptor_id);
137911755SKacheong.Poon@Sun.COM 			}
138011755SKacheong.Poon@Sun.COM 			mutex_exit(&listener->tcp_eager_lock);
138111755SKacheong.Poon@Sun.COM 			tcp_err_ack(listener, mp, TPROVMISMATCH, 0);
138211755SKacheong.Poon@Sun.COM 			return;
138311755SKacheong.Poon@Sun.COM 		}
138411755SKacheong.Poon@Sun.COM 		/*
138511755SKacheong.Poon@Sun.COM 		 * Verify acceptor state. The acceptable states for an acceptor
138611755SKacheong.Poon@Sun.COM 		 * include TCPS_IDLE and TCPS_BOUND.
138711755SKacheong.Poon@Sun.COM 		 */
138811755SKacheong.Poon@Sun.COM 		switch (acceptor->tcp_state) {
138911755SKacheong.Poon@Sun.COM 		case TCPS_IDLE:
139011755SKacheong.Poon@Sun.COM 			/* FALLTHRU */
139111755SKacheong.Poon@Sun.COM 		case TCPS_BOUND:
139211755SKacheong.Poon@Sun.COM 			break;
139311755SKacheong.Poon@Sun.COM 		default:
139411755SKacheong.Poon@Sun.COM 			CONN_DEC_REF(acceptor->tcp_connp);
139511755SKacheong.Poon@Sun.COM 			mutex_exit(&listener->tcp_eager_lock);
139611755SKacheong.Poon@Sun.COM 			tcp_err_ack(listener, mp, TOUTSTATE, 0);
139711755SKacheong.Poon@Sun.COM 			return;
139811755SKacheong.Poon@Sun.COM 		}
139911755SKacheong.Poon@Sun.COM 	}
140011755SKacheong.Poon@Sun.COM 
140111755SKacheong.Poon@Sun.COM 	/* The listener must be in TCPS_LISTEN */
140211755SKacheong.Poon@Sun.COM 	if (listener->tcp_state != TCPS_LISTEN) {
140311755SKacheong.Poon@Sun.COM 		CONN_DEC_REF(acceptor->tcp_connp);
140411755SKacheong.Poon@Sun.COM 		mutex_exit(&listener->tcp_eager_lock);
140511755SKacheong.Poon@Sun.COM 		tcp_err_ack(listener, mp, TOUTSTATE, 0);
140611755SKacheong.Poon@Sun.COM 		return;
140711755SKacheong.Poon@Sun.COM 	}
140811755SKacheong.Poon@Sun.COM 
140911755SKacheong.Poon@Sun.COM 	/*
141011755SKacheong.Poon@Sun.COM 	 * Rendezvous with an eager connection request packet hanging off
141111755SKacheong.Poon@Sun.COM 	 * 'tcp' that has the 'seqnum' tag.  We tagged the detached open
141211755SKacheong.Poon@Sun.COM 	 * tcp structure when the connection packet arrived in
141311755SKacheong.Poon@Sun.COM 	 * tcp_input_listener().
141411755SKacheong.Poon@Sun.COM 	 */
141511755SKacheong.Poon@Sun.COM 	seqnum = tcr->SEQ_number;
141611755SKacheong.Poon@Sun.COM 	eager = listener;
141711755SKacheong.Poon@Sun.COM 	do {
141811755SKacheong.Poon@Sun.COM 		eager = eager->tcp_eager_next_q;
141911755SKacheong.Poon@Sun.COM 		if (eager == NULL) {
142011755SKacheong.Poon@Sun.COM 			CONN_DEC_REF(acceptor->tcp_connp);
142111755SKacheong.Poon@Sun.COM 			mutex_exit(&listener->tcp_eager_lock);
142211755SKacheong.Poon@Sun.COM 			tcp_err_ack(listener, mp, TBADSEQ, 0);
142311755SKacheong.Poon@Sun.COM 			return;
142411755SKacheong.Poon@Sun.COM 		}
142511755SKacheong.Poon@Sun.COM 	} while (eager->tcp_conn_req_seqnum != seqnum);
142611755SKacheong.Poon@Sun.COM 	mutex_exit(&listener->tcp_eager_lock);
142711755SKacheong.Poon@Sun.COM 
142811755SKacheong.Poon@Sun.COM 	/*
142911755SKacheong.Poon@Sun.COM 	 * At this point, both acceptor and listener have 2 ref
143011755SKacheong.Poon@Sun.COM 	 * that they begin with. Acceptor has one additional ref
143111755SKacheong.Poon@Sun.COM 	 * we placed in lookup while listener has 3 additional
143211755SKacheong.Poon@Sun.COM 	 * ref for being behind the squeue (tcp_accept() is
143311755SKacheong.Poon@Sun.COM 	 * done on listener's squeue); being in classifier hash;
143411755SKacheong.Poon@Sun.COM 	 * and eager's ref on listener.
143511755SKacheong.Poon@Sun.COM 	 */
143611755SKacheong.Poon@Sun.COM 	ASSERT(listener->tcp_connp->conn_ref >= 5);
143711755SKacheong.Poon@Sun.COM 	ASSERT(acceptor->tcp_connp->conn_ref >= 3);
143811755SKacheong.Poon@Sun.COM 
143911755SKacheong.Poon@Sun.COM 	/*
144011755SKacheong.Poon@Sun.COM 	 * The eager at this point is set in its own squeue and
144111755SKacheong.Poon@Sun.COM 	 * could easily have been killed (tcp_accept_finish will
144211755SKacheong.Poon@Sun.COM 	 * deal with that) because of a TH_RST so we can only
144311755SKacheong.Poon@Sun.COM 	 * ASSERT for a single ref.
144411755SKacheong.Poon@Sun.COM 	 */
144511755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_connp->conn_ref >= 1);
144611755SKacheong.Poon@Sun.COM 
144711755SKacheong.Poon@Sun.COM 	/*
144811755SKacheong.Poon@Sun.COM 	 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
144911755SKacheong.Poon@Sun.COM 	 * use it if something failed.
145011755SKacheong.Poon@Sun.COM 	 */
145111755SKacheong.Poon@Sun.COM 	discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
145211755SKacheong.Poon@Sun.COM 	    sizeof (struct stroptions)), BPRI_HI);
145311755SKacheong.Poon@Sun.COM 	if (discon_mp == NULL) {
145411755SKacheong.Poon@Sun.COM 		CONN_DEC_REF(acceptor->tcp_connp);
145511755SKacheong.Poon@Sun.COM 		CONN_DEC_REF(eager->tcp_connp);
145611755SKacheong.Poon@Sun.COM 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
145711755SKacheong.Poon@Sun.COM 		return;
145811755SKacheong.Poon@Sun.COM 	}
145911755SKacheong.Poon@Sun.COM 
146011755SKacheong.Poon@Sun.COM 	econnp = eager->tcp_connp;
146111755SKacheong.Poon@Sun.COM 
146211755SKacheong.Poon@Sun.COM 	/* Hold a copy of mp, in case reallocb fails */
146311755SKacheong.Poon@Sun.COM 	if ((mp1 = copymsg(mp)) == NULL) {
146411755SKacheong.Poon@Sun.COM 		CONN_DEC_REF(acceptor->tcp_connp);
146511755SKacheong.Poon@Sun.COM 		CONN_DEC_REF(eager->tcp_connp);
146611755SKacheong.Poon@Sun.COM 		freemsg(discon_mp);
146711755SKacheong.Poon@Sun.COM 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
146811755SKacheong.Poon@Sun.COM 		return;
146911755SKacheong.Poon@Sun.COM 	}
147011755SKacheong.Poon@Sun.COM 
147111755SKacheong.Poon@Sun.COM 	tcr = (struct T_conn_res *)mp1->b_rptr;
147211755SKacheong.Poon@Sun.COM 
147311755SKacheong.Poon@Sun.COM 	/*
147411755SKacheong.Poon@Sun.COM 	 * This is an expanded version of mi_tpi_ok_ack_alloc()
147511755SKacheong.Poon@Sun.COM 	 * which allocates a larger mblk and appends the new
147611755SKacheong.Poon@Sun.COM 	 * local address to the ok_ack.  The address is copied by
147711755SKacheong.Poon@Sun.COM 	 * soaccept() for getsockname().
147811755SKacheong.Poon@Sun.COM 	 */
147911755SKacheong.Poon@Sun.COM 	{
148011755SKacheong.Poon@Sun.COM 		int extra;
148111755SKacheong.Poon@Sun.COM 
148211755SKacheong.Poon@Sun.COM 		extra = (econnp->conn_family == AF_INET) ?
148311755SKacheong.Poon@Sun.COM 		    sizeof (sin_t) : sizeof (sin6_t);
148411755SKacheong.Poon@Sun.COM 
148511755SKacheong.Poon@Sun.COM 		/*
148611755SKacheong.Poon@Sun.COM 		 * Try to re-use mp, if possible.  Otherwise, allocate
148711755SKacheong.Poon@Sun.COM 		 * an mblk and return it as ok_mp.  In any case, mp
148811755SKacheong.Poon@Sun.COM 		 * is no longer usable upon return.
148911755SKacheong.Poon@Sun.COM 		 */
149011755SKacheong.Poon@Sun.COM 		if ((ok_mp = mi_tpi_ok_ack_alloc_extra(mp, extra)) == NULL) {
149111755SKacheong.Poon@Sun.COM 			CONN_DEC_REF(acceptor->tcp_connp);
149211755SKacheong.Poon@Sun.COM 			CONN_DEC_REF(eager->tcp_connp);
149311755SKacheong.Poon@Sun.COM 			freemsg(discon_mp);
149411755SKacheong.Poon@Sun.COM 			/* Original mp has been freed by now, so use mp1 */
149511755SKacheong.Poon@Sun.COM 			tcp_err_ack(listener, mp1, TSYSERR, ENOMEM);
149611755SKacheong.Poon@Sun.COM 			return;
149711755SKacheong.Poon@Sun.COM 		}
149811755SKacheong.Poon@Sun.COM 
149911755SKacheong.Poon@Sun.COM 		mp = NULL;	/* We should never use mp after this point */
150011755SKacheong.Poon@Sun.COM 
150111755SKacheong.Poon@Sun.COM 		switch (extra) {
150211755SKacheong.Poon@Sun.COM 		case sizeof (sin_t): {
150311755SKacheong.Poon@Sun.COM 			sin_t *sin = (sin_t *)ok_mp->b_wptr;
150411755SKacheong.Poon@Sun.COM 
150511755SKacheong.Poon@Sun.COM 			ok_mp->b_wptr += extra;
150611755SKacheong.Poon@Sun.COM 			sin->sin_family = AF_INET;
150711755SKacheong.Poon@Sun.COM 			sin->sin_port = econnp->conn_lport;
150811755SKacheong.Poon@Sun.COM 			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
150911755SKacheong.Poon@Sun.COM 			break;
151011755SKacheong.Poon@Sun.COM 		}
151111755SKacheong.Poon@Sun.COM 		case sizeof (sin6_t): {
151211755SKacheong.Poon@Sun.COM 			sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr;
151311755SKacheong.Poon@Sun.COM 
151411755SKacheong.Poon@Sun.COM 			ok_mp->b_wptr += extra;
151511755SKacheong.Poon@Sun.COM 			sin6->sin6_family = AF_INET6;
151611755SKacheong.Poon@Sun.COM 			sin6->sin6_port = econnp->conn_lport;
151711755SKacheong.Poon@Sun.COM 			sin6->sin6_addr = econnp->conn_laddr_v6;
151811755SKacheong.Poon@Sun.COM 			sin6->sin6_flowinfo = econnp->conn_flowinfo;
151911755SKacheong.Poon@Sun.COM 			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
152011755SKacheong.Poon@Sun.COM 			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
152111755SKacheong.Poon@Sun.COM 				sin6->sin6_scope_id =
152211755SKacheong.Poon@Sun.COM 				    econnp->conn_ixa->ixa_scopeid;
152311755SKacheong.Poon@Sun.COM 			} else {
152411755SKacheong.Poon@Sun.COM 				sin6->sin6_scope_id = 0;
152511755SKacheong.Poon@Sun.COM 			}
152611755SKacheong.Poon@Sun.COM 			sin6->__sin6_src_id = 0;
152711755SKacheong.Poon@Sun.COM 			break;
152811755SKacheong.Poon@Sun.COM 		}
152911755SKacheong.Poon@Sun.COM 		default:
153011755SKacheong.Poon@Sun.COM 			break;
153111755SKacheong.Poon@Sun.COM 		}
153211755SKacheong.Poon@Sun.COM 		ASSERT(ok_mp->b_wptr <= ok_mp->b_datap->db_lim);
153311755SKacheong.Poon@Sun.COM 	}
153411755SKacheong.Poon@Sun.COM 
153511755SKacheong.Poon@Sun.COM 	/*
153611755SKacheong.Poon@Sun.COM 	 * If there are no options we know that the T_CONN_RES will
153711755SKacheong.Poon@Sun.COM 	 * succeed. However, we can't send the T_OK_ACK upstream until
153811755SKacheong.Poon@Sun.COM 	 * the tcp_accept_swap is done since it would be dangerous to
153911755SKacheong.Poon@Sun.COM 	 * let the application start using the new fd prior to the swap.
154011755SKacheong.Poon@Sun.COM 	 */
154111755SKacheong.Poon@Sun.COM 	tcp_accept_swap(listener, acceptor, eager);
154211755SKacheong.Poon@Sun.COM 
154311755SKacheong.Poon@Sun.COM 	/*
154411755SKacheong.Poon@Sun.COM 	 * tcp_accept_swap unlinks eager from listener but does not drop
154511755SKacheong.Poon@Sun.COM 	 * the eager's reference on the listener.
154611755SKacheong.Poon@Sun.COM 	 */
154711755SKacheong.Poon@Sun.COM 	ASSERT(eager->tcp_listener == NULL);
154811755SKacheong.Poon@Sun.COM 	ASSERT(listener->tcp_connp->conn_ref >= 5);
154911755SKacheong.Poon@Sun.COM 
155011755SKacheong.Poon@Sun.COM 	/*
155111755SKacheong.Poon@Sun.COM 	 * The eager is now associated with its own queue. Insert in
155211755SKacheong.Poon@Sun.COM 	 * the hash so that the connection can be reused for a future
155311755SKacheong.Poon@Sun.COM 	 * T_CONN_RES.
155411755SKacheong.Poon@Sun.COM 	 */
155511755SKacheong.Poon@Sun.COM 	tcp_acceptor_hash_insert(acceptor_id, eager);
155611755SKacheong.Poon@Sun.COM 
155711755SKacheong.Poon@Sun.COM 	/*
155811755SKacheong.Poon@Sun.COM 	 * We now do the processing of options with T_CONN_RES.
155911755SKacheong.Poon@Sun.COM 	 * We delay till now since we wanted to have queue to pass to
156011755SKacheong.Poon@Sun.COM 	 * option processing routines that points back to the right
156111755SKacheong.Poon@Sun.COM 	 * instance structure which does not happen until after
156211755SKacheong.Poon@Sun.COM 	 * tcp_accept_swap().
156311755SKacheong.Poon@Sun.COM 	 *
156411755SKacheong.Poon@Sun.COM 	 * Note:
156511755SKacheong.Poon@Sun.COM 	 * The sanity of the logic here assumes that whatever options
156611755SKacheong.Poon@Sun.COM 	 * are appropriate to inherit from listner=>eager are done
156711755SKacheong.Poon@Sun.COM 	 * before this point, and whatever were to be overridden (or not)
156811755SKacheong.Poon@Sun.COM 	 * in transfer logic from eager=>acceptor in tcp_accept_swap().
156911755SKacheong.Poon@Sun.COM 	 * [ Warning: acceptor endpoint can have T_OPTMGMT_REQ done to it
157011755SKacheong.Poon@Sun.COM 	 *   before its ACCEPTOR_id comes down in T_CONN_RES ]
157111755SKacheong.Poon@Sun.COM 	 * This may not be true at this point in time but can be fixed
157211755SKacheong.Poon@Sun.COM 	 * independently. This option processing code starts with
157311755SKacheong.Poon@Sun.COM 	 * the instantiated acceptor instance and the final queue at
157411755SKacheong.Poon@Sun.COM 	 * this point.
157511755SKacheong.Poon@Sun.COM 	 */
157611755SKacheong.Poon@Sun.COM 
157711755SKacheong.Poon@Sun.COM 	if (tcr->OPT_length != 0) {
157811755SKacheong.Poon@Sun.COM 		/* Options to process */
157911755SKacheong.Poon@Sun.COM 		int t_error = 0;
158011755SKacheong.Poon@Sun.COM 		int sys_error = 0;
158111755SKacheong.Poon@Sun.COM 		int do_disconnect = 0;
158211755SKacheong.Poon@Sun.COM 
158311755SKacheong.Poon@Sun.COM 		if (tcp_conprim_opt_process(eager, mp1,
158411755SKacheong.Poon@Sun.COM 		    &do_disconnect, &t_error, &sys_error) < 0) {
158511755SKacheong.Poon@Sun.COM 			eager->tcp_accept_error = 1;
158611755SKacheong.Poon@Sun.COM 			if (do_disconnect) {
158711755SKacheong.Poon@Sun.COM 				/*
158811755SKacheong.Poon@Sun.COM 				 * An option failed which does not allow
158911755SKacheong.Poon@Sun.COM 				 * connection to be accepted.
159011755SKacheong.Poon@Sun.COM 				 *
159111755SKacheong.Poon@Sun.COM 				 * We allow T_CONN_RES to succeed and
159211755SKacheong.Poon@Sun.COM 				 * put a T_DISCON_IND on the eager queue.
159311755SKacheong.Poon@Sun.COM 				 */
159411755SKacheong.Poon@Sun.COM 				ASSERT(t_error == 0 && sys_error == 0);
159511755SKacheong.Poon@Sun.COM 				eager->tcp_send_discon_ind = 1;
159611755SKacheong.Poon@Sun.COM 			} else {
159711755SKacheong.Poon@Sun.COM 				ASSERT(t_error != 0);
159811755SKacheong.Poon@Sun.COM 				freemsg(ok_mp);
159911755SKacheong.Poon@Sun.COM 				/*
160011755SKacheong.Poon@Sun.COM 				 * Original mp was either freed or set
160111755SKacheong.Poon@Sun.COM 				 * to ok_mp above, so use mp1 instead.
160211755SKacheong.Poon@Sun.COM 				 */
160311755SKacheong.Poon@Sun.COM 				tcp_err_ack(listener, mp1, t_error, sys_error);
160411755SKacheong.Poon@Sun.COM 				goto finish;
160511755SKacheong.Poon@Sun.COM 			}
160611755SKacheong.Poon@Sun.COM 		}
160711755SKacheong.Poon@Sun.COM 		/*
160811755SKacheong.Poon@Sun.COM 		 * Most likely success in setting options (except if
160911755SKacheong.Poon@Sun.COM 		 * eager->tcp_send_discon_ind set).
161011755SKacheong.Poon@Sun.COM 		 * mp1 option buffer represented by OPT_length/offset
161111755SKacheong.Poon@Sun.COM 		 * potentially modified and contains results of setting
161211755SKacheong.Poon@Sun.COM 		 * options at this point
161311755SKacheong.Poon@Sun.COM 		 */
161411755SKacheong.Poon@Sun.COM 	}
161511755SKacheong.Poon@Sun.COM 
161611755SKacheong.Poon@Sun.COM 	/* We no longer need mp1, since all options processing has passed */
161711755SKacheong.Poon@Sun.COM 	freemsg(mp1);
161811755SKacheong.Poon@Sun.COM 
161911755SKacheong.Poon@Sun.COM 	putnext(listener->tcp_connp->conn_rq, ok_mp);
162011755SKacheong.Poon@Sun.COM 
162111755SKacheong.Poon@Sun.COM 	mutex_enter(&listener->tcp_eager_lock);
162211755SKacheong.Poon@Sun.COM 	if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
162311755SKacheong.Poon@Sun.COM 		mblk_t	*conn_ind;
162411755SKacheong.Poon@Sun.COM 
162511755SKacheong.Poon@Sun.COM 		/*
162611755SKacheong.Poon@Sun.COM 		 * This path should not be executed if listener and
162711755SKacheong.Poon@Sun.COM 		 * acceptor streams are the same.
162811755SKacheong.Poon@Sun.COM 		 */
162911755SKacheong.Poon@Sun.COM 		ASSERT(listener != acceptor);
1630*12644SAnders.Persson@Sun.COM 		conn_ind = tcp_get_def_conn_ind(listener);
163111755SKacheong.Poon@Sun.COM 		mutex_exit(&listener->tcp_eager_lock);
1632*12644SAnders.Persson@Sun.COM 		putnext(listener->tcp_connp->conn_rq, conn_ind);
163311755SKacheong.Poon@Sun.COM 	} else {
163411755SKacheong.Poon@Sun.COM 		mutex_exit(&listener->tcp_eager_lock);
163511755SKacheong.Poon@Sun.COM 	}
163611755SKacheong.Poon@Sun.COM 
163711755SKacheong.Poon@Sun.COM 	/*
163811755SKacheong.Poon@Sun.COM 	 * Done with the acceptor - free it
163911755SKacheong.Poon@Sun.COM 	 *
164011755SKacheong.Poon@Sun.COM 	 * Note: from this point on, no access to listener should be made
164111755SKacheong.Poon@Sun.COM 	 * as listener can be equal to acceptor.
164211755SKacheong.Poon@Sun.COM 	 */
164311755SKacheong.Poon@Sun.COM finish:
164411755SKacheong.Poon@Sun.COM 	ASSERT(acceptor->tcp_detached);
164511755SKacheong.Poon@Sun.COM 	acceptor->tcp_connp->conn_rq = NULL;
164611755SKacheong.Poon@Sun.COM 	ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp));
164711755SKacheong.Poon@Sun.COM 	acceptor->tcp_connp->conn_wq = NULL;
164811755SKacheong.Poon@Sun.COM 	(void) tcp_clean_death(acceptor, 0);
164911755SKacheong.Poon@Sun.COM 	CONN_DEC_REF(acceptor->tcp_connp);
165011755SKacheong.Poon@Sun.COM 
165111755SKacheong.Poon@Sun.COM 	/*
165211755SKacheong.Poon@Sun.COM 	 * We pass discon_mp to tcp_accept_finish to get on the right squeue.
165311755SKacheong.Poon@Sun.COM 	 *
165411755SKacheong.Poon@Sun.COM 	 * It will update the setting for sockfs/stream head and also take
165511755SKacheong.Poon@Sun.COM 	 * care of any data that arrived before accept() wad called.
165611755SKacheong.Poon@Sun.COM 	 * In case we already received a FIN then tcp_accept_finish will send up
165711755SKacheong.Poon@Sun.COM 	 * the ordrel. It will also send up a window update if the window
165811755SKacheong.Poon@Sun.COM 	 * has opened up.
165911755SKacheong.Poon@Sun.COM 	 */
166011755SKacheong.Poon@Sun.COM 
166111755SKacheong.Poon@Sun.COM 	/*
166211755SKacheong.Poon@Sun.COM 	 * XXX: we currently have a problem if XTI application closes the
166311755SKacheong.Poon@Sun.COM 	 * acceptor stream in between. This problem exists in on10-gate also
166411755SKacheong.Poon@Sun.COM 	 * and is well know but nothing can be done short of major rewrite
166511755SKacheong.Poon@Sun.COM 	 * to fix it. Now it is possible to take care of it by assigning TLI/XTI
166611755SKacheong.Poon@Sun.COM 	 * eager same squeue as listener (we can distinguish non socket
166711755SKacheong.Poon@Sun.COM 	 * listeners at the time of handling a SYN in tcp_input_listener)
166811755SKacheong.Poon@Sun.COM 	 * and do most of the work that tcp_accept_finish does here itself
166911755SKacheong.Poon@Sun.COM 	 * and then get behind the acceptor squeue to access the acceptor
167011755SKacheong.Poon@Sun.COM 	 * queue.
167111755SKacheong.Poon@Sun.COM 	 */
167211755SKacheong.Poon@Sun.COM 	/*
167311755SKacheong.Poon@Sun.COM 	 * We already have a ref on tcp so no need to do one before squeue_enter
167411755SKacheong.Poon@Sun.COM 	 */
167511755SKacheong.Poon@Sun.COM 	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, discon_mp,
167611755SKacheong.Poon@Sun.COM 	    tcp_accept_finish, eager->tcp_connp, NULL, SQ_FILL,
167711755SKacheong.Poon@Sun.COM 	    SQTAG_TCP_ACCEPT_FINISH);
167811755SKacheong.Poon@Sun.COM }
167911755SKacheong.Poon@Sun.COM 
168011755SKacheong.Poon@Sun.COM 
168111755SKacheong.Poon@Sun.COM /*
168211755SKacheong.Poon@Sun.COM  * This is the STREAMS entry point for T_CONN_RES coming down on
168311755SKacheong.Poon@Sun.COM  * Acceptor STREAM when  sockfs listener does accept processing.
168411755SKacheong.Poon@Sun.COM  * Read the block comment on top of tcp_input_listener().
168511755SKacheong.Poon@Sun.COM  */
168611755SKacheong.Poon@Sun.COM void
tcp_tpi_accept(queue_t * q,mblk_t * mp)168711755SKacheong.Poon@Sun.COM tcp_tpi_accept(queue_t *q, mblk_t *mp)
168811755SKacheong.Poon@Sun.COM {
168911755SKacheong.Poon@Sun.COM 	queue_t *rq = RD(q);
169011755SKacheong.Poon@Sun.COM 	struct T_conn_res *conn_res;
169111755SKacheong.Poon@Sun.COM 	tcp_t *eager;
169211755SKacheong.Poon@Sun.COM 	tcp_t *listener;
169311755SKacheong.Poon@Sun.COM 	struct T_ok_ack *ok;
169411755SKacheong.Poon@Sun.COM 	t_scalar_t PRIM_type;
169512643SAnders.Persson@Sun.COM 	mblk_t *discon_mp;
169611755SKacheong.Poon@Sun.COM 	conn_t *econnp;
169711755SKacheong.Poon@Sun.COM 	cred_t *cr;
169811755SKacheong.Poon@Sun.COM 
169911755SKacheong.Poon@Sun.COM 	ASSERT(DB_TYPE(mp) == M_PROTO);
170011755SKacheong.Poon@Sun.COM 
170111755SKacheong.Poon@Sun.COM 	/*
170211755SKacheong.Poon@Sun.COM 	 * All Solaris components should pass a db_credp
170311755SKacheong.Poon@Sun.COM 	 * for this TPI message, hence we ASSERT.
170411755SKacheong.Poon@Sun.COM 	 * But in case there is some other M_PROTO that looks
170511755SKacheong.Poon@Sun.COM 	 * like a TPI message sent by some other kernel
170611755SKacheong.Poon@Sun.COM 	 * component, we check and return an error.
170711755SKacheong.Poon@Sun.COM 	 */
170811755SKacheong.Poon@Sun.COM 	cr = msg_getcred(mp, NULL);
170911755SKacheong.Poon@Sun.COM 	ASSERT(cr != NULL);
171011755SKacheong.Poon@Sun.COM 	if (cr == NULL) {
171111755SKacheong.Poon@Sun.COM 		mp = mi_tpi_err_ack_alloc(mp, TSYSERR, EINVAL);
171211755SKacheong.Poon@Sun.COM 		if (mp != NULL)
171311755SKacheong.Poon@Sun.COM 			putnext(rq, mp);
171411755SKacheong.Poon@Sun.COM 		return;
171511755SKacheong.Poon@Sun.COM 	}
171611755SKacheong.Poon@Sun.COM 	conn_res = (struct T_conn_res *)mp->b_rptr;
171711755SKacheong.Poon@Sun.COM 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
171811755SKacheong.Poon@Sun.COM 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_res)) {
171911755SKacheong.Poon@Sun.COM 		mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
172011755SKacheong.Poon@Sun.COM 		if (mp != NULL)
172111755SKacheong.Poon@Sun.COM 			putnext(rq, mp);
172211755SKacheong.Poon@Sun.COM 		return;
172311755SKacheong.Poon@Sun.COM 	}
172411755SKacheong.Poon@Sun.COM 	switch (conn_res->PRIM_type) {
172511755SKacheong.Poon@Sun.COM 	case O_T_CONN_RES:
172611755SKacheong.Poon@Sun.COM 	case T_CONN_RES:
172711755SKacheong.Poon@Sun.COM 		/*
172811755SKacheong.Poon@Sun.COM 		 * We pass up an err ack if allocb fails. This will
172911755SKacheong.Poon@Sun.COM 		 * cause sockfs to issue a T_DISCON_REQ which will cause
173011755SKacheong.Poon@Sun.COM 		 * tcp_eager_blowoff to be called. sockfs will then call
173111755SKacheong.Poon@Sun.COM 		 * rq->q_qinfo->qi_qclose to cleanup the acceptor stream.
173211755SKacheong.Poon@Sun.COM 		 * we need to do the allocb up here because we have to
173311755SKacheong.Poon@Sun.COM 		 * make sure rq->q_qinfo->qi_qclose still points to the
173411755SKacheong.Poon@Sun.COM 		 * correct function (tcp_tpi_close_accept) in case allocb
173511755SKacheong.Poon@Sun.COM 		 * fails.
173611755SKacheong.Poon@Sun.COM 		 */
173711755SKacheong.Poon@Sun.COM 		bcopy(mp->b_rptr + conn_res->OPT_offset,
173811755SKacheong.Poon@Sun.COM 		    &eager, conn_res->OPT_length);
173911755SKacheong.Poon@Sun.COM 		PRIM_type = conn_res->PRIM_type;
174011755SKacheong.Poon@Sun.COM 		mp->b_datap->db_type = M_PCPROTO;
174111755SKacheong.Poon@Sun.COM 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ok_ack);
174211755SKacheong.Poon@Sun.COM 		ok = (struct T_ok_ack *)mp->b_rptr;
174311755SKacheong.Poon@Sun.COM 		ok->PRIM_type = T_OK_ACK;
174411755SKacheong.Poon@Sun.COM 		ok->CORRECT_prim = PRIM_type;
174511755SKacheong.Poon@Sun.COM 		econnp = eager->tcp_connp;
174611755SKacheong.Poon@Sun.COM 		econnp->conn_dev = (dev_t)RD(q)->q_ptr;
174711755SKacheong.Poon@Sun.COM 		econnp->conn_minor_arena = (vmem_t *)(WR(q)->q_ptr);
174811755SKacheong.Poon@Sun.COM 		econnp->conn_rq = rq;
174911755SKacheong.Poon@Sun.COM 		econnp->conn_wq = q;
175011755SKacheong.Poon@Sun.COM 		rq->q_ptr = econnp;
175111755SKacheong.Poon@Sun.COM 		rq->q_qinfo = &tcp_rinitv4;	/* No open - same as rinitv6 */
175211755SKacheong.Poon@Sun.COM 		q->q_ptr = econnp;
175311755SKacheong.Poon@Sun.COM 		q->q_qinfo = &tcp_winit;
175411755SKacheong.Poon@Sun.COM 		listener = eager->tcp_listener;
175511755SKacheong.Poon@Sun.COM 
175612643SAnders.Persson@Sun.COM 		/*
175712643SAnders.Persson@Sun.COM 		 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
175812643SAnders.Persson@Sun.COM 		 * use it if something failed.
175912643SAnders.Persson@Sun.COM 		 */
176012643SAnders.Persson@Sun.COM 		discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
176112643SAnders.Persson@Sun.COM 		    sizeof (struct stroptions)), BPRI_HI);
176212643SAnders.Persson@Sun.COM 
176312643SAnders.Persson@Sun.COM 		if (discon_mp == NULL) {
176411755SKacheong.Poon@Sun.COM 			mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
176511755SKacheong.Poon@Sun.COM 			if (mp != NULL)
176611755SKacheong.Poon@Sun.COM 				putnext(rq, mp);
176711755SKacheong.Poon@Sun.COM 			return;
176811755SKacheong.Poon@Sun.COM 		}
176911755SKacheong.Poon@Sun.COM 
177012643SAnders.Persson@Sun.COM 		eager->tcp_issocket = B_TRUE;
177112643SAnders.Persson@Sun.COM 
177212643SAnders.Persson@Sun.COM 		ASSERT(econnp->conn_netstack ==
177312643SAnders.Persson@Sun.COM 		    listener->tcp_connp->conn_netstack);
177412643SAnders.Persson@Sun.COM 		ASSERT(eager->tcp_tcps == listener->tcp_tcps);
177512643SAnders.Persson@Sun.COM 
177612643SAnders.Persson@Sun.COM 		/* Put the ref for IP */
177712643SAnders.Persson@Sun.COM 		CONN_INC_REF(econnp);
177812643SAnders.Persson@Sun.COM 
177912643SAnders.Persson@Sun.COM 		/*
178012643SAnders.Persson@Sun.COM 		 * We should have minimum of 3 references on the conn
178112643SAnders.Persson@Sun.COM 		 * at this point. One each for TCP and IP and one for
178212643SAnders.Persson@Sun.COM 		 * the T_conn_ind that was sent up when the 3-way handshake
178312643SAnders.Persson@Sun.COM 		 * completed. In the normal case we would also have another
178412643SAnders.Persson@Sun.COM 		 * reference (making a total of 4) for the conn being in the
178512643SAnders.Persson@Sun.COM 		 * classifier hash list. However the eager could have received
178612643SAnders.Persson@Sun.COM 		 * an RST subsequently and tcp_closei_local could have removed
178712643SAnders.Persson@Sun.COM 		 * the eager from the classifier hash list, hence we can't
178812643SAnders.Persson@Sun.COM 		 * assert that reference.
178912643SAnders.Persson@Sun.COM 		 */
179012643SAnders.Persson@Sun.COM 		ASSERT(econnp->conn_ref >= 3);
179112643SAnders.Persson@Sun.COM 
179212643SAnders.Persson@Sun.COM 		mutex_enter(&listener->tcp_eager_lock);
179312643SAnders.Persson@Sun.COM 		if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
1794*12644SAnders.Persson@Sun.COM 			mblk_t *conn_ind = tcp_get_def_conn_ind(listener);
179512643SAnders.Persson@Sun.COM 
179612643SAnders.Persson@Sun.COM 			/* Need to get inside the listener perimeter */
179712643SAnders.Persson@Sun.COM 			CONN_INC_REF(listener->tcp_connp);
1798*12644SAnders.Persson@Sun.COM 			SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp,
1799*12644SAnders.Persson@Sun.COM 			    conn_ind, tcp_send_pending, listener->tcp_connp,
1800*12644SAnders.Persson@Sun.COM 			    NULL, SQ_FILL, SQTAG_TCP_SEND_PENDING);
180112643SAnders.Persson@Sun.COM 		}
180212643SAnders.Persson@Sun.COM 		tcp_eager_unlink(eager);
180312643SAnders.Persson@Sun.COM 		mutex_exit(&listener->tcp_eager_lock);
180412643SAnders.Persson@Sun.COM 
180512643SAnders.Persson@Sun.COM 		/*
180612643SAnders.Persson@Sun.COM 		 * At this point, the eager is detached from the listener
180712643SAnders.Persson@Sun.COM 		 * but we still have an extra refs on eager (apart from the
180812643SAnders.Persson@Sun.COM 		 * usual tcp references). The ref was placed in tcp_input_data
180912643SAnders.Persson@Sun.COM 		 * before sending the conn_ind in tcp_send_conn_ind.
181012643SAnders.Persson@Sun.COM 		 * The ref will be dropped in tcp_accept_finish().
181112643SAnders.Persson@Sun.COM 		 */
181212643SAnders.Persson@Sun.COM 		SQUEUE_ENTER_ONE(econnp->conn_sqp, discon_mp, tcp_accept_finish,
181312643SAnders.Persson@Sun.COM 		    econnp, NULL, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
181412643SAnders.Persson@Sun.COM 
181511755SKacheong.Poon@Sun.COM 		/*
181611755SKacheong.Poon@Sun.COM 		 * Send the new local address also up to sockfs. There
181711755SKacheong.Poon@Sun.COM 		 * should already be enough space in the mp that came
181811755SKacheong.Poon@Sun.COM 		 * down from soaccept().
181911755SKacheong.Poon@Sun.COM 		 */
182011755SKacheong.Poon@Sun.COM 		if (econnp->conn_family == AF_INET) {
182111755SKacheong.Poon@Sun.COM 			sin_t *sin;
182211755SKacheong.Poon@Sun.COM 
182311755SKacheong.Poon@Sun.COM 			ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
182411755SKacheong.Poon@Sun.COM 			    (sizeof (struct T_ok_ack) + sizeof (sin_t)));
182511755SKacheong.Poon@Sun.COM 			sin = (sin_t *)mp->b_wptr;
182611755SKacheong.Poon@Sun.COM 			mp->b_wptr += sizeof (sin_t);
182711755SKacheong.Poon@Sun.COM 			sin->sin_family = AF_INET;
182811755SKacheong.Poon@Sun.COM 			sin->sin_port = econnp->conn_lport;
182911755SKacheong.Poon@Sun.COM 			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
183011755SKacheong.Poon@Sun.COM 		} else {
183111755SKacheong.Poon@Sun.COM 			sin6_t *sin6;
183211755SKacheong.Poon@Sun.COM 
183311755SKacheong.Poon@Sun.COM 			ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
183411755SKacheong.Poon@Sun.COM 			    sizeof (struct T_ok_ack) + sizeof (sin6_t));
183511755SKacheong.Poon@Sun.COM 			sin6 = (sin6_t *)mp->b_wptr;
183611755SKacheong.Poon@Sun.COM 			mp->b_wptr += sizeof (sin6_t);
183711755SKacheong.Poon@Sun.COM 			sin6->sin6_family = AF_INET6;
183811755SKacheong.Poon@Sun.COM 			sin6->sin6_port = econnp->conn_lport;
183911755SKacheong.Poon@Sun.COM 			sin6->sin6_addr = econnp->conn_laddr_v6;
184011755SKacheong.Poon@Sun.COM 			if (econnp->conn_ipversion == IPV4_VERSION)
184111755SKacheong.Poon@Sun.COM 				sin6->sin6_flowinfo = 0;
184211755SKacheong.Poon@Sun.COM 			else
184311755SKacheong.Poon@Sun.COM 				sin6->sin6_flowinfo = econnp->conn_flowinfo;
184411755SKacheong.Poon@Sun.COM 			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
184511755SKacheong.Poon@Sun.COM 			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
184611755SKacheong.Poon@Sun.COM 				sin6->sin6_scope_id =
184711755SKacheong.Poon@Sun.COM 				    econnp->conn_ixa->ixa_scopeid;
184811755SKacheong.Poon@Sun.COM 			} else {
184911755SKacheong.Poon@Sun.COM 				sin6->sin6_scope_id = 0;
185011755SKacheong.Poon@Sun.COM 			}
185111755SKacheong.Poon@Sun.COM 			sin6->__sin6_src_id = 0;
185211755SKacheong.Poon@Sun.COM 		}
185311755SKacheong.Poon@Sun.COM 
185411755SKacheong.Poon@Sun.COM 		putnext(rq, mp);
185511755SKacheong.Poon@Sun.COM 		return;
185611755SKacheong.Poon@Sun.COM 	default:
185711755SKacheong.Poon@Sun.COM 		mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0);
185811755SKacheong.Poon@Sun.COM 		if (mp != NULL)
185911755SKacheong.Poon@Sun.COM 			putnext(rq, mp);
186011755SKacheong.Poon@Sun.COM 		return;
186111755SKacheong.Poon@Sun.COM 	}
186211755SKacheong.Poon@Sun.COM }
186311755SKacheong.Poon@Sun.COM 
186411755SKacheong.Poon@Sun.COM /*
186511755SKacheong.Poon@Sun.COM  * The function called through squeue to get behind listener's perimeter to
186611755SKacheong.Poon@Sun.COM  * send a deferred conn_ind.
186711755SKacheong.Poon@Sun.COM  */
186811755SKacheong.Poon@Sun.COM /* ARGSUSED */
186911755SKacheong.Poon@Sun.COM void
tcp_send_pending(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)187011755SKacheong.Poon@Sun.COM tcp_send_pending(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
187111755SKacheong.Poon@Sun.COM {
187211755SKacheong.Poon@Sun.COM 	conn_t	*lconnp = (conn_t *)arg;
187311755SKacheong.Poon@Sun.COM 	tcp_t *listener = lconnp->conn_tcp;
187411755SKacheong.Poon@Sun.COM 	struct T_conn_ind *conn_ind;
187511755SKacheong.Poon@Sun.COM 	tcp_t *tcp;
187611755SKacheong.Poon@Sun.COM 
187711755SKacheong.Poon@Sun.COM 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
187811755SKacheong.Poon@Sun.COM 	bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
187911755SKacheong.Poon@Sun.COM 	    conn_ind->OPT_length);
188011755SKacheong.Poon@Sun.COM 
188111755SKacheong.Poon@Sun.COM 	if (listener->tcp_state != TCPS_LISTEN) {
188211755SKacheong.Poon@Sun.COM 		/*
188311755SKacheong.Poon@Sun.COM 		 * If listener has closed, it would have caused a
188411755SKacheong.Poon@Sun.COM 		 * a cleanup/blowoff to happen for the eager, so
188511755SKacheong.Poon@Sun.COM 		 * we don't need to do anything more.
188611755SKacheong.Poon@Sun.COM 		 */
188711755SKacheong.Poon@Sun.COM 		freemsg(mp);
188811755SKacheong.Poon@Sun.COM 		return;
188911755SKacheong.Poon@Sun.COM 	}
189011755SKacheong.Poon@Sun.COM 
189112643SAnders.Persson@Sun.COM 	putnext(lconnp->conn_rq, mp);
189211755SKacheong.Poon@Sun.COM }
189311755SKacheong.Poon@Sun.COM 
189411755SKacheong.Poon@Sun.COM /*
189511755SKacheong.Poon@Sun.COM  * Sends the T_CONN_IND to the listener. The caller calls this
189611755SKacheong.Poon@Sun.COM  * functions via squeue to get inside the listener's perimeter
189711755SKacheong.Poon@Sun.COM  * once the 3 way hand shake is done a T_CONN_IND needs to be
189811755SKacheong.Poon@Sun.COM  * sent. As an optimization, the caller can call this directly
189911755SKacheong.Poon@Sun.COM  * if listener's perimeter is same as eager's.
190011755SKacheong.Poon@Sun.COM  */
190111755SKacheong.Poon@Sun.COM /* ARGSUSED */
190211755SKacheong.Poon@Sun.COM void
tcp_send_conn_ind(void * arg,mblk_t * mp,void * arg2)190311755SKacheong.Poon@Sun.COM tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
190411755SKacheong.Poon@Sun.COM {
190511755SKacheong.Poon@Sun.COM 	conn_t			*lconnp = (conn_t *)arg;
190611755SKacheong.Poon@Sun.COM 	tcp_t			*listener = lconnp->conn_tcp;
190711755SKacheong.Poon@Sun.COM 	tcp_t			*tcp;
190811755SKacheong.Poon@Sun.COM 	struct T_conn_ind	*conn_ind;
190911755SKacheong.Poon@Sun.COM 	ipaddr_t 		*addr_cache;
191011755SKacheong.Poon@Sun.COM 	boolean_t		need_send_conn_ind = B_FALSE;
191111755SKacheong.Poon@Sun.COM 	tcp_stack_t		*tcps = listener->tcp_tcps;
191211755SKacheong.Poon@Sun.COM 
191311755SKacheong.Poon@Sun.COM 	/* retrieve the eager */
191411755SKacheong.Poon@Sun.COM 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
191511755SKacheong.Poon@Sun.COM 	ASSERT(conn_ind->OPT_offset != 0 &&
191611755SKacheong.Poon@Sun.COM 	    conn_ind->OPT_length == sizeof (intptr_t));
191711755SKacheong.Poon@Sun.COM 	bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
191811755SKacheong.Poon@Sun.COM 	    conn_ind->OPT_length);
191911755SKacheong.Poon@Sun.COM 
192011755SKacheong.Poon@Sun.COM 	/*
192111755SKacheong.Poon@Sun.COM 	 * TLI/XTI applications will get confused by
192211755SKacheong.Poon@Sun.COM 	 * sending eager as an option since it violates
192311755SKacheong.Poon@Sun.COM 	 * the option semantics. So remove the eager as
192411755SKacheong.Poon@Sun.COM 	 * option since TLI/XTI app doesn't need it anyway.
192511755SKacheong.Poon@Sun.COM 	 */
192611755SKacheong.Poon@Sun.COM 	if (!TCP_IS_SOCKET(listener)) {
192711755SKacheong.Poon@Sun.COM 		conn_ind->OPT_length = 0;
192811755SKacheong.Poon@Sun.COM 		conn_ind->OPT_offset = 0;
192911755SKacheong.Poon@Sun.COM 	}
193011755SKacheong.Poon@Sun.COM 	if (listener->tcp_state != TCPS_LISTEN) {
193111755SKacheong.Poon@Sun.COM 		/*
193211755SKacheong.Poon@Sun.COM 		 * If listener has closed, it would have caused a
193311755SKacheong.Poon@Sun.COM 		 * a cleanup/blowoff to happen for the eager. We
193411755SKacheong.Poon@Sun.COM 		 * just need to return.
193511755SKacheong.Poon@Sun.COM 		 */
193611755SKacheong.Poon@Sun.COM 		freemsg(mp);
193711755SKacheong.Poon@Sun.COM 		return;
193811755SKacheong.Poon@Sun.COM 	}
193911755SKacheong.Poon@Sun.COM 
194011755SKacheong.Poon@Sun.COM 
194111755SKacheong.Poon@Sun.COM 	/*
194211755SKacheong.Poon@Sun.COM 	 * if the conn_req_q is full defer passing up the
194311755SKacheong.Poon@Sun.COM 	 * T_CONN_IND until space is availabe after t_accept()
194411755SKacheong.Poon@Sun.COM 	 * processing
194511755SKacheong.Poon@Sun.COM 	 */
194611755SKacheong.Poon@Sun.COM 	mutex_enter(&listener->tcp_eager_lock);
194711755SKacheong.Poon@Sun.COM 
194811755SKacheong.Poon@Sun.COM 	/*
194911755SKacheong.Poon@Sun.COM 	 * Take the eager out, if it is in the list of droppable eagers
195011755SKacheong.Poon@Sun.COM 	 * as we are here because the 3W handshake is over.
195111755SKacheong.Poon@Sun.COM 	 */
195211755SKacheong.Poon@Sun.COM 	MAKE_UNDROPPABLE(tcp);
195311755SKacheong.Poon@Sun.COM 
195411755SKacheong.Poon@Sun.COM 	if (listener->tcp_conn_req_cnt_q < listener->tcp_conn_req_max) {
195511755SKacheong.Poon@Sun.COM 		tcp_t *tail;
195611755SKacheong.Poon@Sun.COM 
195711755SKacheong.Poon@Sun.COM 		/*
195811755SKacheong.Poon@Sun.COM 		 * The eager already has an extra ref put in tcp_input_data
195911755SKacheong.Poon@Sun.COM 		 * so that it stays till accept comes back even though it
196011755SKacheong.Poon@Sun.COM 		 * might get into TCPS_CLOSED as a result of a TH_RST etc.
196111755SKacheong.Poon@Sun.COM 		 */
196211755SKacheong.Poon@Sun.COM 		ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
196311755SKacheong.Poon@Sun.COM 		listener->tcp_conn_req_cnt_q0--;
196411755SKacheong.Poon@Sun.COM 		listener->tcp_conn_req_cnt_q++;
196511755SKacheong.Poon@Sun.COM 
196611755SKacheong.Poon@Sun.COM 		/* Move from SYN_RCVD to ESTABLISHED list  */
196711755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
196811755SKacheong.Poon@Sun.COM 		    tcp->tcp_eager_prev_q0;
196911755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
197011755SKacheong.Poon@Sun.COM 		    tcp->tcp_eager_next_q0;
197111755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_prev_q0 = NULL;
197211755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_next_q0 = NULL;
197311755SKacheong.Poon@Sun.COM 
197411755SKacheong.Poon@Sun.COM 		/*
197511755SKacheong.Poon@Sun.COM 		 * Insert at end of the queue because sockfs
197611755SKacheong.Poon@Sun.COM 		 * sends down T_CONN_RES in chronological
197711755SKacheong.Poon@Sun.COM 		 * order. Leaving the older conn indications
197811755SKacheong.Poon@Sun.COM 		 * at front of the queue helps reducing search
197911755SKacheong.Poon@Sun.COM 		 * time.
198011755SKacheong.Poon@Sun.COM 		 */
198111755SKacheong.Poon@Sun.COM 		tail = listener->tcp_eager_last_q;
198211755SKacheong.Poon@Sun.COM 		if (tail != NULL)
198311755SKacheong.Poon@Sun.COM 			tail->tcp_eager_next_q = tcp;
198411755SKacheong.Poon@Sun.COM 		else
198511755SKacheong.Poon@Sun.COM 			listener->tcp_eager_next_q = tcp;
198611755SKacheong.Poon@Sun.COM 		listener->tcp_eager_last_q = tcp;
198711755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_next_q = NULL;
198811755SKacheong.Poon@Sun.COM 		/*
198911755SKacheong.Poon@Sun.COM 		 * Delay sending up the T_conn_ind until we are
199011755SKacheong.Poon@Sun.COM 		 * done with the eager. Once we have have sent up
199111755SKacheong.Poon@Sun.COM 		 * the T_conn_ind, the accept can potentially complete
199211755SKacheong.Poon@Sun.COM 		 * any time and release the refhold we have on the eager.
199311755SKacheong.Poon@Sun.COM 		 */
199411755SKacheong.Poon@Sun.COM 		need_send_conn_ind = B_TRUE;
199511755SKacheong.Poon@Sun.COM 	} else {
199611755SKacheong.Poon@Sun.COM 		/*
199711755SKacheong.Poon@Sun.COM 		 * Defer connection on q0 and set deferred
199811755SKacheong.Poon@Sun.COM 		 * connection bit true
199911755SKacheong.Poon@Sun.COM 		 */
200011755SKacheong.Poon@Sun.COM 		tcp->tcp_conn_def_q0 = B_TRUE;
200111755SKacheong.Poon@Sun.COM 
200211755SKacheong.Poon@Sun.COM 		/* take tcp out of q0 ... */
200311755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
200411755SKacheong.Poon@Sun.COM 		    tcp->tcp_eager_next_q0;
200511755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
200611755SKacheong.Poon@Sun.COM 		    tcp->tcp_eager_prev_q0;
200711755SKacheong.Poon@Sun.COM 
200811755SKacheong.Poon@Sun.COM 		/* ... and place it at the end of q0 */
200911755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_prev_q0 = listener->tcp_eager_prev_q0;
201011755SKacheong.Poon@Sun.COM 		tcp->tcp_eager_next_q0 = listener;
201111755SKacheong.Poon@Sun.COM 		listener->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp;
201211755SKacheong.Poon@Sun.COM 		listener->tcp_eager_prev_q0 = tcp;
201311755SKacheong.Poon@Sun.COM 		tcp->tcp_conn.tcp_eager_conn_ind = mp;
201411755SKacheong.Poon@Sun.COM 	}
201511755SKacheong.Poon@Sun.COM 
201611755SKacheong.Poon@Sun.COM 	/* we have timed out before */
201711755SKacheong.Poon@Sun.COM 	if (tcp->tcp_syn_rcvd_timeout != 0) {
201811755SKacheong.Poon@Sun.COM 		tcp->tcp_syn_rcvd_timeout = 0;
201911755SKacheong.Poon@Sun.COM 		listener->tcp_syn_rcvd_timeout--;
202011755SKacheong.Poon@Sun.COM 		if (listener->tcp_syn_defense &&
202111755SKacheong.Poon@Sun.COM 		    listener->tcp_syn_rcvd_timeout <=
202211755SKacheong.Poon@Sun.COM 		    (tcps->tcps_conn_req_max_q0 >> 5) &&
202311755SKacheong.Poon@Sun.COM 		    10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
202411755SKacheong.Poon@Sun.COM 		    listener->tcp_last_rcv_lbolt)) {
202511755SKacheong.Poon@Sun.COM 			/*
202611755SKacheong.Poon@Sun.COM 			 * Turn off the defense mode if we
202711755SKacheong.Poon@Sun.COM 			 * believe the SYN attack is over.
202811755SKacheong.Poon@Sun.COM 			 */
202911755SKacheong.Poon@Sun.COM 			listener->tcp_syn_defense = B_FALSE;
203011755SKacheong.Poon@Sun.COM 			if (listener->tcp_ip_addr_cache) {
203111755SKacheong.Poon@Sun.COM 				kmem_free((void *)listener->tcp_ip_addr_cache,
203211755SKacheong.Poon@Sun.COM 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
203311755SKacheong.Poon@Sun.COM 				listener->tcp_ip_addr_cache = NULL;
203411755SKacheong.Poon@Sun.COM 			}
203511755SKacheong.Poon@Sun.COM 		}
203611755SKacheong.Poon@Sun.COM 	}
203711755SKacheong.Poon@Sun.COM 	addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
203811755SKacheong.Poon@Sun.COM 	if (addr_cache != NULL) {
203911755SKacheong.Poon@Sun.COM 		/*
204011755SKacheong.Poon@Sun.COM 		 * We have finished a 3-way handshake with this
204111755SKacheong.Poon@Sun.COM 		 * remote host. This proves the IP addr is good.
204211755SKacheong.Poon@Sun.COM 		 * Cache it!
204311755SKacheong.Poon@Sun.COM 		 */
204411755SKacheong.Poon@Sun.COM 		addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
204511755SKacheong.Poon@Sun.COM 		    tcp->tcp_connp->conn_faddr_v4;
204611755SKacheong.Poon@Sun.COM 	}
204711755SKacheong.Poon@Sun.COM 	mutex_exit(&listener->tcp_eager_lock);
204811755SKacheong.Poon@Sun.COM 	if (need_send_conn_ind)
204912643SAnders.Persson@Sun.COM 		putnext(lconnp->conn_rq, mp);
205011755SKacheong.Poon@Sun.COM }
2051