xref: /onnv-gate/usr/src/uts/common/fs/sockfs/sockstr.c (revision 8348:4137e18bfaf0)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51548Srshoaib  * Common Development and Distribution License (the "License").
61548Srshoaib  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211548Srshoaib 
220Sstevel@tonic-gate /*
236707Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/inttypes.h>
290Sstevel@tonic-gate #include <sys/t_lock.h>
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/systm.h>
320Sstevel@tonic-gate #include <sys/buf.h>
330Sstevel@tonic-gate #include <sys/conf.h>
340Sstevel@tonic-gate #include <sys/cred.h>
350Sstevel@tonic-gate #include <sys/kmem.h>
360Sstevel@tonic-gate #include <sys/sysmacros.h>
370Sstevel@tonic-gate #include <sys/vfs.h>
380Sstevel@tonic-gate #include <sys/vnode.h>
390Sstevel@tonic-gate #include <sys/debug.h>
400Sstevel@tonic-gate #include <sys/errno.h>
410Sstevel@tonic-gate #include <sys/time.h>
420Sstevel@tonic-gate #include <sys/file.h>
430Sstevel@tonic-gate #include <sys/user.h>
440Sstevel@tonic-gate #include <sys/stream.h>
450Sstevel@tonic-gate #include <sys/strsubr.h>
460Sstevel@tonic-gate #include <sys/esunddi.h>
470Sstevel@tonic-gate #include <sys/flock.h>
480Sstevel@tonic-gate #include <sys/modctl.h>
490Sstevel@tonic-gate #include <sys/vtrace.h>
500Sstevel@tonic-gate #include <sys/strsun.h>
510Sstevel@tonic-gate #include <sys/cmn_err.h>
520Sstevel@tonic-gate #include <sys/proc.h>
530Sstevel@tonic-gate #include <sys/ddi.h>
540Sstevel@tonic-gate 
550Sstevel@tonic-gate #include <sys/suntpi.h>
560Sstevel@tonic-gate #include <sys/socket.h>
570Sstevel@tonic-gate #include <sys/sockio.h>
580Sstevel@tonic-gate #include <sys/socketvar.h>
59*8348SEric.Yu@Sun.COM #include <sys/sodirect.h>
600Sstevel@tonic-gate #include <netinet/in.h>
61*8348SEric.Yu@Sun.COM #include <inet/common.h>
62*8348SEric.Yu@Sun.COM #include <inet/proto_set.h>
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #include <sys/tiuser.h>
650Sstevel@tonic-gate #define	_SUN_TPI_VERSION	2
660Sstevel@tonic-gate #include <sys/tihdr.h>
670Sstevel@tonic-gate 
68898Skais #include <inet/kssl/ksslapi.h>
69898Skais 
700Sstevel@tonic-gate #include <c2/audit.h>
710Sstevel@tonic-gate 
72*8348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi.h>
73*8348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi_impl.h>
746707Sbrutus #include <sys/dcopy.h>
756707Sbrutus 
760Sstevel@tonic-gate int so_default_version = SOV_SOCKSTREAM;
770Sstevel@tonic-gate 
780Sstevel@tonic-gate #ifdef DEBUG
790Sstevel@tonic-gate /* Set sockdebug to print debug messages when SO_DEBUG is set */
800Sstevel@tonic-gate int sockdebug = 0;
810Sstevel@tonic-gate 
820Sstevel@tonic-gate /* Set sockprinterr to print error messages when SO_DEBUG is set */
830Sstevel@tonic-gate int sockprinterr = 0;
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * Set so_default_options to SO_DEBUG is all sockets should be created
870Sstevel@tonic-gate  * with SO_DEBUG set. This is needed to get debug printouts from the
880Sstevel@tonic-gate  * socket() call itself.
890Sstevel@tonic-gate  */
900Sstevel@tonic-gate int so_default_options = 0;
910Sstevel@tonic-gate #endif /* DEBUG */
920Sstevel@tonic-gate 
930Sstevel@tonic-gate #ifdef SOCK_TEST
940Sstevel@tonic-gate /*
950Sstevel@tonic-gate  * Set to number of ticks to limit cv_waits for code coverage testing.
960Sstevel@tonic-gate  * Set to 1000 when SO_DEBUG is set to 2.
970Sstevel@tonic-gate  */
980Sstevel@tonic-gate clock_t sock_test_timelimit = 0;
990Sstevel@tonic-gate #endif /* SOCK_TEST */
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * For concurrency testing of e.g. opening /dev/ip which does not
1030Sstevel@tonic-gate  * handle T_INFO_REQ messages.
1040Sstevel@tonic-gate  */
1050Sstevel@tonic-gate int so_no_tinfo = 0;
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
1090Sstevel@tonic-gate  * to simply ignore the T_CAPABILITY_REQ.
1100Sstevel@tonic-gate  */
1110Sstevel@tonic-gate clock_t	sock_capability_timeout	= 2;	/* seconds */
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
1140Sstevel@tonic-gate static void	so_removehooks(struct sonode *so);
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
1170Sstevel@tonic-gate 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1180Sstevel@tonic-gate 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
1190Sstevel@tonic-gate static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
1200Sstevel@tonic-gate 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1210Sstevel@tonic-gate 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
1220Sstevel@tonic-gate /*
123*8348SEric.Yu@Sun.COM  * STREAMS based sodirect put/wakeup functions.
1246707Sbrutus  */
1256707Sbrutus static int sodput(sodirect_t *, mblk_t *);
1266707Sbrutus static void sodwakeup(sodirect_t *);
1276707Sbrutus 
1286707Sbrutus /*
1296707Sbrutus  * Called by sockinit() when sockfs is loaded.
1306707Sbrutus  */
1316707Sbrutus int
1326707Sbrutus sostr_init()
1336707Sbrutus {
134*8348SEric.Yu@Sun.COM 	sod_init();
1356707Sbrutus 	return (0);
1366707Sbrutus }
1376707Sbrutus 
1386707Sbrutus /*
1390Sstevel@tonic-gate  * Convert a socket to a stream. Invoked when the illusory sockmod
1400Sstevel@tonic-gate  * is popped from the stream.
1410Sstevel@tonic-gate  * Change the stream head back to default operation without losing
1420Sstevel@tonic-gate  * any messages (T_conn_ind's are moved to the stream head queue).
1430Sstevel@tonic-gate  */
1440Sstevel@tonic-gate int
1450Sstevel@tonic-gate so_sock2stream(struct sonode *so)
1460Sstevel@tonic-gate {
1470Sstevel@tonic-gate 	struct vnode		*vp = SOTOV(so);
1480Sstevel@tonic-gate 	queue_t			*rq;
1490Sstevel@tonic-gate 	mblk_t			*mp;
1500Sstevel@tonic-gate 	int			error = 0;
151*8348SEric.Yu@Sun.COM 	sotpi_info_t		*sti = SOTOTPI(so);
1520Sstevel@tonic-gate 
153*8348SEric.Yu@Sun.COM 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1560Sstevel@tonic-gate 	so_lock_single(so);
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 	ASSERT(so->so_version != SOV_STREAM);
1590Sstevel@tonic-gate 
160*8348SEric.Yu@Sun.COM 	if (sti->sti_direct) {
161741Smasputra 		mblk_t **mpp;
162741Smasputra 		int rval;
1630Sstevel@tonic-gate 
164741Smasputra 		/*
165741Smasputra 		 * Tell the transport below that sockmod is being popped
166741Smasputra 		 */
1670Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
168741Smasputra 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
1690Sstevel@tonic-gate 		    &rval);
1700Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
1710Sstevel@tonic-gate 		if (error != 0) {
172741Smasputra 			dprintso(so, 0, ("so_sock2stream(%p): "
1737240Srh87107 			    "_SIOCSOCKFALLBACK failed\n", (void *)so));
1740Sstevel@tonic-gate 			goto exit;
1750Sstevel@tonic-gate 		}
176*8348SEric.Yu@Sun.COM 		sti->sti_direct = 0;
1770Sstevel@tonic-gate 
178*8348SEric.Yu@Sun.COM 		for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
1790Sstevel@tonic-gate 		    mpp = &mp->b_next) {
1800Sstevel@tonic-gate 			struct T_conn_ind	*conn_ind;
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 			/*
1830Sstevel@tonic-gate 			 * strsock_proto() has already verified the length of
1840Sstevel@tonic-gate 			 * this message block.
1850Sstevel@tonic-gate 			 */
1860Sstevel@tonic-gate 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
1890Sstevel@tonic-gate 			if (conn_ind->OPT_length == 0 &&
1900Sstevel@tonic-gate 			    conn_ind->OPT_offset == 0)
1910Sstevel@tonic-gate 				continue;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 			if (DB_REF(mp) > 1) {
1940Sstevel@tonic-gate 				mblk_t	*newmp;
1950Sstevel@tonic-gate 				size_t	length;
1960Sstevel@tonic-gate 				cred_t	*cr;
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 				/*
1990Sstevel@tonic-gate 				 * Copy the message block because it is used
2000Sstevel@tonic-gate 				 * elsewhere, too.
2010Sstevel@tonic-gate 				 */
2020Sstevel@tonic-gate 				length = MBLKL(mp);
2030Sstevel@tonic-gate 				newmp = soallocproto(length, _ALLOC_INTR);
2040Sstevel@tonic-gate 				if (newmp == NULL) {
2050Sstevel@tonic-gate 					error = EINTR;
2060Sstevel@tonic-gate 					goto exit;
2070Sstevel@tonic-gate 				}
2080Sstevel@tonic-gate 				bcopy(mp->b_rptr, newmp->b_wptr, length);
2090Sstevel@tonic-gate 				newmp->b_wptr += length;
2100Sstevel@tonic-gate 				newmp->b_next = mp->b_next;
2110Sstevel@tonic-gate 				cr = DB_CRED(mp);
2120Sstevel@tonic-gate 				if (cr != NULL)
2130Sstevel@tonic-gate 					mblk_setcred(newmp, cr);
2140Sstevel@tonic-gate 				DB_CPID(newmp) = DB_CPID(mp);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 				/*
2170Sstevel@tonic-gate 				 * Link the new message block into the queue
2180Sstevel@tonic-gate 				 * and free the old one.
2190Sstevel@tonic-gate 				 */
2200Sstevel@tonic-gate 				*mpp = newmp;
2210Sstevel@tonic-gate 				mp->b_next = NULL;
2220Sstevel@tonic-gate 				freemsg(mp);
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 				mp = newmp;
2250Sstevel@tonic-gate 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
2260Sstevel@tonic-gate 			}
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate 			/*
2290Sstevel@tonic-gate 			 * Remove options added by TCP for accept fast-path.
2300Sstevel@tonic-gate 			 */
2310Sstevel@tonic-gate 			conn_ind->OPT_length = 0;
2320Sstevel@tonic-gate 			conn_ind->OPT_offset = 0;
2330Sstevel@tonic-gate 		}
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 	so->so_version = SOV_STREAM;
237*8348SEric.Yu@Sun.COM 	so->so_proto_handle = NULL;
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 	/*
2400Sstevel@tonic-gate 	 * Remove the hooks in the stream head to avoid queuing more
2410Sstevel@tonic-gate 	 * packets in sockfs.
2420Sstevel@tonic-gate 	 */
2430Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2440Sstevel@tonic-gate 	so_removehooks(so);
2450Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	/*
2480Sstevel@tonic-gate 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
2490Sstevel@tonic-gate 	 * on the queue - the behavior of urgent data after a switch is
2500Sstevel@tonic-gate 	 * left undefined.
2510Sstevel@tonic-gate 	 */
252*8348SEric.Yu@Sun.COM 	so->so_error = sti->sti_delayed_error = 0;
2530Sstevel@tonic-gate 	freemsg(so->so_oobmsg);
2540Sstevel@tonic-gate 	so->so_oobmsg = NULL;
255*8348SEric.Yu@Sun.COM 	sti->sti_oobsigcnt = sti->sti_oobcnt = 0;
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
258*8348SEric.Yu@Sun.COM 	    SS_SAVEDEOR);
2590Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
2600Sstevel@tonic-gate 
261*8348SEric.Yu@Sun.COM 	freemsg(sti->sti_ack_mp);
262*8348SEric.Yu@Sun.COM 	sti->sti_ack_mp = NULL;
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate 	/*
265*8348SEric.Yu@Sun.COM 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
2660Sstevel@tonic-gate 	 */
2670Sstevel@tonic-gate 	so_flush_discon_ind(so);
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate 	/*
2700Sstevel@tonic-gate 	 * Move any queued T_CONN_IND messages to stream head queue.
2710Sstevel@tonic-gate 	 */
2720Sstevel@tonic-gate 	rq = RD(strvp2wq(vp));
273*8348SEric.Yu@Sun.COM 	while ((mp = sti->sti_conn_ind_head) != NULL) {
274*8348SEric.Yu@Sun.COM 		sti->sti_conn_ind_head = mp->b_next;
2750Sstevel@tonic-gate 		mp->b_next = NULL;
276*8348SEric.Yu@Sun.COM 		if (sti->sti_conn_ind_head == NULL) {
277*8348SEric.Yu@Sun.COM 			ASSERT(sti->sti_conn_ind_tail == mp);
278*8348SEric.Yu@Sun.COM 			sti->sti_conn_ind_tail = NULL;
2790Sstevel@tonic-gate 		}
2800Sstevel@tonic-gate 		dprintso(so, 0,
281*8348SEric.Yu@Sun.COM 		    ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 		/* Drop lock across put() */
2840Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2850Sstevel@tonic-gate 		put(rq, mp);
2860Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2870Sstevel@tonic-gate 	}
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate exit:
2900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
2910Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
2920Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2930Sstevel@tonic-gate 	return (error);
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate /*
2970Sstevel@tonic-gate  * Covert a stream back to a socket. This is invoked when the illusory
2980Sstevel@tonic-gate  * sockmod is pushed on a stream (where the stream was "created" by
2990Sstevel@tonic-gate  * popping the illusory sockmod).
3000Sstevel@tonic-gate  * This routine can not recreate the socket state (certain aspects of
3010Sstevel@tonic-gate  * it like urgent data state and the bound/connected addresses for AF_UNIX
3020Sstevel@tonic-gate  * sockets can not be recreated by asking the transport for information).
3030Sstevel@tonic-gate  * Thus this routine implicitly assumes that the socket is in an initial
3040Sstevel@tonic-gate  * state (as if it was just created). It flushes any messages queued on the
3050Sstevel@tonic-gate  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
3060Sstevel@tonic-gate  */
3070Sstevel@tonic-gate void
3080Sstevel@tonic-gate so_stream2sock(struct sonode *so)
3090Sstevel@tonic-gate {
3100Sstevel@tonic-gate 	struct vnode *vp = SOTOV(so);
311*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
3120Sstevel@tonic-gate 
313*8348SEric.Yu@Sun.COM 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
3160Sstevel@tonic-gate 	so_lock_single(so);
3170Sstevel@tonic-gate 	ASSERT(so->so_version == SOV_STREAM);
3180Sstevel@tonic-gate 	so->so_version = SOV_SOCKSTREAM;
319*8348SEric.Yu@Sun.COM 	sti->sti_pushcnt = 0;
3200Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	/*
3230Sstevel@tonic-gate 	 * Set a permenent error to force any thread in sorecvmsg to
3240Sstevel@tonic-gate 	 * return (and drop SOREADLOCKED). Clear the error once
3250Sstevel@tonic-gate 	 * we have SOREADLOCKED.
3260Sstevel@tonic-gate 	 * This makes a read sleeping during the I_PUSH of sockmod return
3270Sstevel@tonic-gate 	 * EIO.
3280Sstevel@tonic-gate 	 */
3290Sstevel@tonic-gate 	strsetrerror(SOTOV(so), EIO, 1, NULL);
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	/*
3320Sstevel@tonic-gate 	 * Get the read lock before flushing data to avoid
3330Sstevel@tonic-gate 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
3340Sstevel@tonic-gate 	 */
3350Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
3360Sstevel@tonic-gate 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
3370Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	strsetrerror(SOTOV(so), 0, 0, NULL);
3400Sstevel@tonic-gate 	so_installhooks(so);
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	/*
3430Sstevel@tonic-gate 	 * Flush everything on the read queue.
3440Sstevel@tonic-gate 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
3450Sstevel@tonic-gate 	 * remain; those types of messages would confuse sockfs.
3460Sstevel@tonic-gate 	 */
3470Sstevel@tonic-gate 	strflushrq(vp, FLUSHALL);
3480Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	/*
351*8348SEric.Yu@Sun.COM 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
3520Sstevel@tonic-gate 	 */
3530Sstevel@tonic-gate 	so_flush_discon_ind(so);
3540Sstevel@tonic-gate 	so_unlock_read(so);	/* Clear SOREADLOCKED */
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
3570Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
3580Sstevel@tonic-gate }
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate /*
3610Sstevel@tonic-gate  * Install the hooks in the stream head.
3620Sstevel@tonic-gate  */
3630Sstevel@tonic-gate void
3640Sstevel@tonic-gate so_installhooks(struct sonode *so)
3650Sstevel@tonic-gate {
3660Sstevel@tonic-gate 	struct vnode *vp = SOTOV(so);
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
3690Sstevel@tonic-gate 	    strsock_proto, strsock_misc);
3700Sstevel@tonic-gate 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
3710Sstevel@tonic-gate }
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate /*
3740Sstevel@tonic-gate  * Remove the hooks in the stream head.
3750Sstevel@tonic-gate  */
3760Sstevel@tonic-gate static void
3770Sstevel@tonic-gate so_removehooks(struct sonode *so)
3780Sstevel@tonic-gate {
3790Sstevel@tonic-gate 	struct vnode *vp = SOTOV(so);
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 	strsetrputhooks(vp, 0, NULL, NULL);
3820Sstevel@tonic-gate 	strsetwputhooks(vp, 0, STRTIMOUT);
3830Sstevel@tonic-gate 	/*
3840Sstevel@tonic-gate 	 * Leave read behavior as it would have been for a normal
3850Sstevel@tonic-gate 	 * stream i.e. a read of an M_PROTO will fail.
3860Sstevel@tonic-gate 	 */
3870Sstevel@tonic-gate }
3880Sstevel@tonic-gate 
389*8348SEric.Yu@Sun.COM void
390*8348SEric.Yu@Sun.COM so_basic_strinit(struct sonode *so)
3910Sstevel@tonic-gate {
3920Sstevel@tonic-gate 	struct vnode *vp = SOTOV(so);
3930Sstevel@tonic-gate 	struct stdata *stp;
3940Sstevel@tonic-gate 	mblk_t *mp;
395*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate 	/* Preallocate an unbind_req message */
3980Sstevel@tonic-gate 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
3990Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
400*8348SEric.Yu@Sun.COM 	sti->sti_unbind_mp = mp;
4010Sstevel@tonic-gate #ifdef DEBUG
4020Sstevel@tonic-gate 	so->so_options = so_default_options;
4030Sstevel@tonic-gate #endif /* DEBUG */
4040Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 	so_installhooks(so);
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	stp = vp->v_stream;
4090Sstevel@tonic-gate 	/*
4100Sstevel@tonic-gate 	 * Have to keep minpsz at zero in order to allow write/send of zero
4110Sstevel@tonic-gate 	 * bytes.
4120Sstevel@tonic-gate 	 */
4130Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
4140Sstevel@tonic-gate 	if (stp->sd_qn_minpsz == 1)
4150Sstevel@tonic-gate 		stp->sd_qn_minpsz = 0;
4160Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
4170Sstevel@tonic-gate 
4186707Sbrutus 	/*
4196707Sbrutus 	 * If sodirect capable allocate and initialize sodirect_t.
4206707Sbrutus 	 * Note, SS_SODIRECT is set in socktpi_open().
4216707Sbrutus 	 */
422*8348SEric.Yu@Sun.COM 	if ((so->so_state & SS_SODIRECT) &&
423*8348SEric.Yu@Sun.COM 	    !(so->so_state & SS_FALLBACK_PENDING)) {
424*8348SEric.Yu@Sun.COM 		sod_sock_init(so, stp, sodput, sodwakeup, &stp->sd_lock);
425*8348SEric.Yu@Sun.COM 	}
426*8348SEric.Yu@Sun.COM }
4276707Sbrutus 
428*8348SEric.Yu@Sun.COM /*
429*8348SEric.Yu@Sun.COM  * Initialize the streams side of a socket including
430*8348SEric.Yu@Sun.COM  * T_info_req/ack processing. If tso is not NULL its values are used thereby
431*8348SEric.Yu@Sun.COM  * avoiding the T_INFO_REQ.
432*8348SEric.Yu@Sun.COM  */
433*8348SEric.Yu@Sun.COM int
434*8348SEric.Yu@Sun.COM so_strinit(struct sonode *so, struct sonode *tso)
435*8348SEric.Yu@Sun.COM {
436*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
437*8348SEric.Yu@Sun.COM 	sotpi_info_t *tsti;
438*8348SEric.Yu@Sun.COM 	int error;
439*8348SEric.Yu@Sun.COM 
440*8348SEric.Yu@Sun.COM 	so_basic_strinit(so);
441*8348SEric.Yu@Sun.COM 
442*8348SEric.Yu@Sun.COM 	/*
443*8348SEric.Yu@Sun.COM 	 * The T_CAPABILITY_REQ should be the first message sent down because
444*8348SEric.Yu@Sun.COM 	 * at least TCP has a fast-path for this which avoids timeouts while
445*8348SEric.Yu@Sun.COM 	 * waiting for the T_CAPABILITY_ACK under high system load.
446*8348SEric.Yu@Sun.COM 	 */
447*8348SEric.Yu@Sun.COM 	if (tso == NULL) {
448*8348SEric.Yu@Sun.COM 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
449*8348SEric.Yu@Sun.COM 		if (error)
450*8348SEric.Yu@Sun.COM 			return (error);
451*8348SEric.Yu@Sun.COM 	} else {
452*8348SEric.Yu@Sun.COM 		tsti = SOTOTPI(tso);
4536707Sbrutus 
454*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
455*8348SEric.Yu@Sun.COM 		sti->sti_tsdu_size = tsti->sti_tsdu_size;
456*8348SEric.Yu@Sun.COM 		sti->sti_etsdu_size = tsti->sti_etsdu_size;
457*8348SEric.Yu@Sun.COM 		sti->sti_addr_size = tsti->sti_addr_size;
458*8348SEric.Yu@Sun.COM 		sti->sti_opt_size = tsti->sti_opt_size;
459*8348SEric.Yu@Sun.COM 		sti->sti_tidu_size = tsti->sti_tidu_size;
460*8348SEric.Yu@Sun.COM 		sti->sti_serv_type = tsti->sti_serv_type;
461*8348SEric.Yu@Sun.COM 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
462*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
463*8348SEric.Yu@Sun.COM 
464*8348SEric.Yu@Sun.COM 		/* the following do_tcapability may update so->so_mode */
465*8348SEric.Yu@Sun.COM 		if ((tsti->sti_serv_type != T_CLTS) &&
466*8348SEric.Yu@Sun.COM 		    (sti->sti_direct == 0)) {
467*8348SEric.Yu@Sun.COM 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
468*8348SEric.Yu@Sun.COM 			if (error)
469*8348SEric.Yu@Sun.COM 				return (error);
470*8348SEric.Yu@Sun.COM 		}
4716707Sbrutus 	}
472*8348SEric.Yu@Sun.COM 	/*
473*8348SEric.Yu@Sun.COM 	 * If the addr_size is 0 we treat it as already bound
474*8348SEric.Yu@Sun.COM 	 * and connected. This is used by the routing socket.
475*8348SEric.Yu@Sun.COM 	 * We set the addr_size to something to allocate a the address
476*8348SEric.Yu@Sun.COM 	 * structures.
477*8348SEric.Yu@Sun.COM 	 */
478*8348SEric.Yu@Sun.COM 	if (sti->sti_addr_size == 0) {
479*8348SEric.Yu@Sun.COM 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
480*8348SEric.Yu@Sun.COM 		/* Address size can vary with address families. */
481*8348SEric.Yu@Sun.COM 		if (so->so_family == AF_INET6)
482*8348SEric.Yu@Sun.COM 			sti->sti_addr_size =
483*8348SEric.Yu@Sun.COM 			    (t_scalar_t)sizeof (struct sockaddr_in6);
484*8348SEric.Yu@Sun.COM 		else
485*8348SEric.Yu@Sun.COM 			sti->sti_addr_size =
486*8348SEric.Yu@Sun.COM 			    (t_scalar_t)sizeof (struct sockaddr_in);
487*8348SEric.Yu@Sun.COM 		ASSERT(sti->sti_unbind_mp);
488*8348SEric.Yu@Sun.COM 	}
489*8348SEric.Yu@Sun.COM 
490*8348SEric.Yu@Sun.COM 	so_alloc_addr(so, sti->sti_addr_size);
4916707Sbrutus 
4920Sstevel@tonic-gate 	return (0);
4930Sstevel@tonic-gate }
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate static void
4960Sstevel@tonic-gate copy_tinfo(struct sonode *so, struct T_info_ack *tia)
4970Sstevel@tonic-gate {
498*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
499*8348SEric.Yu@Sun.COM 
500*8348SEric.Yu@Sun.COM 	sti->sti_tsdu_size = tia->TSDU_size;
501*8348SEric.Yu@Sun.COM 	sti->sti_etsdu_size = tia->ETSDU_size;
502*8348SEric.Yu@Sun.COM 	sti->sti_addr_size = tia->ADDR_size;
503*8348SEric.Yu@Sun.COM 	sti->sti_opt_size = tia->OPT_size;
504*8348SEric.Yu@Sun.COM 	sti->sti_tidu_size = tia->TIDU_size;
505*8348SEric.Yu@Sun.COM 	sti->sti_serv_type = tia->SERV_type;
5060Sstevel@tonic-gate 	switch (tia->CURRENT_state) {
5070Sstevel@tonic-gate 	case TS_UNBND:
5080Sstevel@tonic-gate 		break;
5090Sstevel@tonic-gate 	case TS_IDLE:
5100Sstevel@tonic-gate 		so->so_state |= SS_ISBOUND;
511*8348SEric.Yu@Sun.COM 		sti->sti_laddr_len = 0;
512*8348SEric.Yu@Sun.COM 		sti->sti_laddr_valid = 0;
5130Sstevel@tonic-gate 		break;
5140Sstevel@tonic-gate 	case TS_DATA_XFER:
5150Sstevel@tonic-gate 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
516*8348SEric.Yu@Sun.COM 		sti->sti_laddr_len = 0;
517*8348SEric.Yu@Sun.COM 		sti->sti_faddr_len = 0;
518*8348SEric.Yu@Sun.COM 		sti->sti_laddr_valid = 0;
519*8348SEric.Yu@Sun.COM 		sti->sti_faddr_valid = 0;
5200Sstevel@tonic-gate 		break;
5210Sstevel@tonic-gate 	}
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	/*
5240Sstevel@tonic-gate 	 * Heuristics for determining the socket mode flags
5250Sstevel@tonic-gate 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
5260Sstevel@tonic-gate 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
5270Sstevel@tonic-gate 	 * from the info ack.
5280Sstevel@tonic-gate 	 */
529*8348SEric.Yu@Sun.COM 	if (sti->sti_serv_type == T_CLTS) {
5300Sstevel@tonic-gate 		so->so_mode |= SM_ATOMIC | SM_ADDR;
5310Sstevel@tonic-gate 	} else {
5320Sstevel@tonic-gate 		so->so_mode |= SM_CONNREQUIRED;
533*8348SEric.Yu@Sun.COM 		if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
5340Sstevel@tonic-gate 			so->so_mode |= SM_EXDATA;
5350Sstevel@tonic-gate 	}
5360Sstevel@tonic-gate 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
5370Sstevel@tonic-gate 		/* Semantics are to discard tail end of messages */
5380Sstevel@tonic-gate 		so->so_mode |= SM_ATOMIC;
5390Sstevel@tonic-gate 	}
5400Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
5410Sstevel@tonic-gate 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
542*8348SEric.Yu@Sun.COM 		if (sti->sti_addr_size == -1) {
5430Sstevel@tonic-gate 			/* MAXPATHLEN + soun_family + nul termination */
544*8348SEric.Yu@Sun.COM 			sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
5455753Sgww 			    sizeof (short) + 1);
5460Sstevel@tonic-gate 		}
5470Sstevel@tonic-gate 		if (so->so_type == SOCK_STREAM) {
5480Sstevel@tonic-gate 			/*
5490Sstevel@tonic-gate 			 * Make it into a byte-stream transport.
5500Sstevel@tonic-gate 			 * SOCK_SEQPACKET sockets are unchanged.
5510Sstevel@tonic-gate 			 */
552*8348SEric.Yu@Sun.COM 			sti->sti_tsdu_size = 0;
5530Sstevel@tonic-gate 		}
554*8348SEric.Yu@Sun.COM 	} else if (sti->sti_addr_size == -1) {
5550Sstevel@tonic-gate 		/*
5560Sstevel@tonic-gate 		 * Logic extracted from sockmod - have to pick some max address
5570Sstevel@tonic-gate 		 * length in order to preallocate the addresses.
5580Sstevel@tonic-gate 		 */
559*8348SEric.Yu@Sun.COM 		sti->sti_addr_size = SOA_DEFSIZE;
5600Sstevel@tonic-gate 	}
561*8348SEric.Yu@Sun.COM 	if (sti->sti_tsdu_size == 0)
5620Sstevel@tonic-gate 		so->so_mode |= SM_BYTESTREAM;
5630Sstevel@tonic-gate }
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate static int
5660Sstevel@tonic-gate check_tinfo(struct sonode *so)
5670Sstevel@tonic-gate {
568*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
569*8348SEric.Yu@Sun.COM 
5700Sstevel@tonic-gate 	/* Consistency checks */
571*8348SEric.Yu@Sun.COM 	if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
5720Sstevel@tonic-gate 		eprintso(so, ("service type and socket type mismatch\n"));
5730Sstevel@tonic-gate 		eprintsoline(so, EPROTO);
5740Sstevel@tonic-gate 		return (EPROTO);
5750Sstevel@tonic-gate 	}
576*8348SEric.Yu@Sun.COM 	if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
5770Sstevel@tonic-gate 		eprintso(so, ("service type and socket type mismatch\n"));
5780Sstevel@tonic-gate 		eprintsoline(so, EPROTO);
5790Sstevel@tonic-gate 		return (EPROTO);
5800Sstevel@tonic-gate 	}
581*8348SEric.Yu@Sun.COM 	if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
5820Sstevel@tonic-gate 		eprintso(so, ("service type and socket type mismatch\n"));
5830Sstevel@tonic-gate 		eprintsoline(so, EPROTO);
5840Sstevel@tonic-gate 		return (EPROTO);
5850Sstevel@tonic-gate 	}
5860Sstevel@tonic-gate 	if (so->so_family == AF_INET &&
587*8348SEric.Yu@Sun.COM 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
5880Sstevel@tonic-gate 		eprintso(so,
5890Sstevel@tonic-gate 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
590*8348SEric.Yu@Sun.COM 		    sti->sti_addr_size));
5910Sstevel@tonic-gate 		eprintsoline(so, EMSGSIZE);
5920Sstevel@tonic-gate 		return (EMSGSIZE);
5930Sstevel@tonic-gate 	}
5940Sstevel@tonic-gate 	if (so->so_family == AF_INET6 &&
595*8348SEric.Yu@Sun.COM 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
5960Sstevel@tonic-gate 		eprintso(so,
5970Sstevel@tonic-gate 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
598*8348SEric.Yu@Sun.COM 		    sti->sti_addr_size));
5990Sstevel@tonic-gate 		eprintsoline(so, EMSGSIZE);
6000Sstevel@tonic-gate 		return (EMSGSIZE);
6010Sstevel@tonic-gate 	}
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 	dprintso(so, 1, (
6040Sstevel@tonic-gate 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
605*8348SEric.Yu@Sun.COM 	    sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
606*8348SEric.Yu@Sun.COM 	    sti->sti_addr_size, sti->sti_opt_size,
607*8348SEric.Yu@Sun.COM 	    sti->sti_tidu_size));
6080Sstevel@tonic-gate 	dprintso(so, 1, ("tinfo: so_state %s\n",
6095753Sgww 	    pr_state(so->so_state, so->so_mode)));
6100Sstevel@tonic-gate 	return (0);
6110Sstevel@tonic-gate }
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate /*
6140Sstevel@tonic-gate  * Send down T_info_req and wait for the ack.
6150Sstevel@tonic-gate  * Record interesting T_info_ack values in the sonode.
6160Sstevel@tonic-gate  */
6170Sstevel@tonic-gate static int
6180Sstevel@tonic-gate do_tinfo(struct sonode *so)
6190Sstevel@tonic-gate {
6200Sstevel@tonic-gate 	struct T_info_req tir;
6210Sstevel@tonic-gate 	mblk_t *mp;
6220Sstevel@tonic-gate 	int error;
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	if (so_no_tinfo) {
627*8348SEric.Yu@Sun.COM 		SOTOTPI(so)->sti_addr_size = 0;
6280Sstevel@tonic-gate 		return (0);
6290Sstevel@tonic-gate 	}
6300Sstevel@tonic-gate 
6317240Srh87107 	dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 	/* Send T_INFO_REQ */
6340Sstevel@tonic-gate 	tir.PRIM_type = T_INFO_REQ;
6350Sstevel@tonic-gate 	mp = soallocproto1(&tir, sizeof (tir),
6360Sstevel@tonic-gate 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
6370Sstevel@tonic-gate 	    _ALLOC_INTR);
6380Sstevel@tonic-gate 	if (mp == NULL) {
6390Sstevel@tonic-gate 		eprintsoline(so, ENOBUFS);
6400Sstevel@tonic-gate 		return (ENOBUFS);
6410Sstevel@tonic-gate 	}
6420Sstevel@tonic-gate 	/* T_INFO_REQ has to be M_PCPROTO */
6430Sstevel@tonic-gate 	DB_TYPE(mp) = M_PCPROTO;
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
6465753Sgww 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
6470Sstevel@tonic-gate 	if (error) {
6480Sstevel@tonic-gate 		eprintsoline(so, error);
6490Sstevel@tonic-gate 		return (error);
6500Sstevel@tonic-gate 	}
6510Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
6520Sstevel@tonic-gate 	/* Wait for T_INFO_ACK */
6530Sstevel@tonic-gate 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
6540Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
6550Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
6560Sstevel@tonic-gate 		eprintsoline(so, error);
6570Sstevel@tonic-gate 		return (error);
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate 
6600Sstevel@tonic-gate 	ASSERT(mp);
6610Sstevel@tonic-gate 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
6620Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
6630Sstevel@tonic-gate 	freemsg(mp);
6640Sstevel@tonic-gate 	return (check_tinfo(so));
6650Sstevel@tonic-gate }
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate /*
6680Sstevel@tonic-gate  * Send down T_capability_req and wait for the ack.
6690Sstevel@tonic-gate  * Record interesting T_capability_ack values in the sonode.
6700Sstevel@tonic-gate  */
6710Sstevel@tonic-gate static int
6720Sstevel@tonic-gate do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
6730Sstevel@tonic-gate {
6740Sstevel@tonic-gate 	struct T_capability_req tcr;
6750Sstevel@tonic-gate 	struct T_capability_ack *tca;
6760Sstevel@tonic-gate 	mblk_t *mp;
6770Sstevel@tonic-gate 	int error;
678*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate 	ASSERT(cap_bits1 != 0);
6810Sstevel@tonic-gate 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
6820Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
6830Sstevel@tonic-gate 
684*8348SEric.Yu@Sun.COM 	if (sti->sti_provinfo->tpi_capability == PI_NO)
6850Sstevel@tonic-gate 		return (do_tinfo(so));
6860Sstevel@tonic-gate 
6870Sstevel@tonic-gate 	if (so_no_tinfo) {
688*8348SEric.Yu@Sun.COM 		sti->sti_addr_size = 0;
6890Sstevel@tonic-gate 		if ((cap_bits1 &= ~TC1_INFO) == 0)
6900Sstevel@tonic-gate 			return (0);
6910Sstevel@tonic-gate 	}
6920Sstevel@tonic-gate 
6937240Srh87107 	dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 	/* Send T_CAPABILITY_REQ */
6960Sstevel@tonic-gate 	tcr.PRIM_type = T_CAPABILITY_REQ;
6970Sstevel@tonic-gate 	tcr.CAP_bits1 = cap_bits1;
6980Sstevel@tonic-gate 	mp = soallocproto1(&tcr, sizeof (tcr),
6990Sstevel@tonic-gate 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
7000Sstevel@tonic-gate 	    _ALLOC_INTR);
7010Sstevel@tonic-gate 	if (mp == NULL) {
7020Sstevel@tonic-gate 		eprintsoline(so, ENOBUFS);
7030Sstevel@tonic-gate 		return (ENOBUFS);
7040Sstevel@tonic-gate 	}
7050Sstevel@tonic-gate 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
7060Sstevel@tonic-gate 	DB_TYPE(mp) = M_PCPROTO;
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
7090Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
7100Sstevel@tonic-gate 	if (error) {
7110Sstevel@tonic-gate 		eprintsoline(so, error);
7120Sstevel@tonic-gate 		return (error);
7130Sstevel@tonic-gate 	}
7140Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
7150Sstevel@tonic-gate 	/* Wait for T_CAPABILITY_ACK */
7160Sstevel@tonic-gate 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
7170Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
7180Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
719*8348SEric.Yu@Sun.COM 		PI_PROVLOCK(sti->sti_provinfo);
720*8348SEric.Yu@Sun.COM 		if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
721*8348SEric.Yu@Sun.COM 			sti->sti_provinfo->tpi_capability = PI_NO;
722*8348SEric.Yu@Sun.COM 		PI_PROVUNLOCK(sti->sti_provinfo);
7230Sstevel@tonic-gate 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
7240Sstevel@tonic-gate 		if (cap_bits1 & TC1_INFO) {
7250Sstevel@tonic-gate 			/*
7260Sstevel@tonic-gate 			 * If the T_CAPABILITY_REQ timed out and then a
7270Sstevel@tonic-gate 			 * T_INFO_REQ gets a protocol error, most likely
7280Sstevel@tonic-gate 			 * the capability was slow (vs. unsupported). Return
7290Sstevel@tonic-gate 			 * ENOSR for this case as a best guess.
7300Sstevel@tonic-gate 			 */
7310Sstevel@tonic-gate 			if (error == ETIME) {
7320Sstevel@tonic-gate 				return ((error = do_tinfo(so)) == EPROTO ?
7330Sstevel@tonic-gate 				    ENOSR : error);
7340Sstevel@tonic-gate 			}
7350Sstevel@tonic-gate 			return (do_tinfo(so));
7360Sstevel@tonic-gate 		}
7370Sstevel@tonic-gate 		return (0);
7380Sstevel@tonic-gate 	}
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate 	ASSERT(mp);
7410Sstevel@tonic-gate 	tca = (struct T_capability_ack *)mp->b_rptr;
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
744*8348SEric.Yu@Sun.COM 	so_proc_tcapability_ack(so, tca);
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	cap_bits1 = tca->CAP_bits1;
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
7490Sstevel@tonic-gate 	freemsg(mp);
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 	if (cap_bits1 & TC1_INFO)
7520Sstevel@tonic-gate 		return (check_tinfo(so));
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate 	return (0);
7550Sstevel@tonic-gate }
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate /*
758*8348SEric.Yu@Sun.COM  * Process a T_CAPABILITY_ACK
759*8348SEric.Yu@Sun.COM  */
760*8348SEric.Yu@Sun.COM void
761*8348SEric.Yu@Sun.COM so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
762*8348SEric.Yu@Sun.COM {
763*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
764*8348SEric.Yu@Sun.COM 
765*8348SEric.Yu@Sun.COM 	if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
766*8348SEric.Yu@Sun.COM 		PI_PROVLOCK(sti->sti_provinfo);
767*8348SEric.Yu@Sun.COM 		sti->sti_provinfo->tpi_capability = PI_YES;
768*8348SEric.Yu@Sun.COM 		PI_PROVUNLOCK(sti->sti_provinfo);
769*8348SEric.Yu@Sun.COM 	}
770*8348SEric.Yu@Sun.COM 
771*8348SEric.Yu@Sun.COM 	if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
772*8348SEric.Yu@Sun.COM 		sti->sti_acceptor_id = tca->ACCEPTOR_id;
773*8348SEric.Yu@Sun.COM 		so->so_mode |= SM_ACCEPTOR_ID;
774*8348SEric.Yu@Sun.COM 	}
775*8348SEric.Yu@Sun.COM 
776*8348SEric.Yu@Sun.COM 	if (tca->CAP_bits1 & TC1_INFO)
777*8348SEric.Yu@Sun.COM 		copy_tinfo(so, &tca->INFO_ack);
778*8348SEric.Yu@Sun.COM }
779*8348SEric.Yu@Sun.COM 
780*8348SEric.Yu@Sun.COM /*
781*8348SEric.Yu@Sun.COM  * Retrieve socket error, clear error if not peek.
7820Sstevel@tonic-gate  */
7830Sstevel@tonic-gate int
784*8348SEric.Yu@Sun.COM sogeterr(struct sonode *so, boolean_t clear_err)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate 	int error;
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	error = so->so_error;
791*8348SEric.Yu@Sun.COM 	if (clear_err)
792*8348SEric.Yu@Sun.COM 		so->so_error = 0;
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate 	return (error);
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate  * This routine is registered with the stream head to retrieve read
7990Sstevel@tonic-gate  * side errors.
8000Sstevel@tonic-gate  * It does not clear the socket error for a peeking read side operation.
8010Sstevel@tonic-gate  * It the error is to be cleared it sets *clearerr.
8020Sstevel@tonic-gate  */
8030Sstevel@tonic-gate int
8040Sstevel@tonic-gate sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
8050Sstevel@tonic-gate {
8060Sstevel@tonic-gate 	struct sonode *so = VTOSO(vp);
8070Sstevel@tonic-gate 	int error;
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
8100Sstevel@tonic-gate 	if (ispeek) {
8110Sstevel@tonic-gate 		error = so->so_error;
8120Sstevel@tonic-gate 		*clearerr = 0;
8130Sstevel@tonic-gate 	} else {
8140Sstevel@tonic-gate 		error = so->so_error;
8150Sstevel@tonic-gate 		so->so_error = 0;
8160Sstevel@tonic-gate 		*clearerr = 1;
8170Sstevel@tonic-gate 	}
8180Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
8190Sstevel@tonic-gate 	return (error);
8200Sstevel@tonic-gate }
8210Sstevel@tonic-gate 
8220Sstevel@tonic-gate /*
8230Sstevel@tonic-gate  * This routine is registered with the stream head to retrieve write
8240Sstevel@tonic-gate  * side errors.
8250Sstevel@tonic-gate  * It does not clear the socket error for a peeking read side operation.
8260Sstevel@tonic-gate  * It the error is to be cleared it sets *clearerr.
8270Sstevel@tonic-gate  */
8280Sstevel@tonic-gate int
8290Sstevel@tonic-gate sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
8300Sstevel@tonic-gate {
8310Sstevel@tonic-gate 	struct sonode *so = VTOSO(vp);
8320Sstevel@tonic-gate 	int error;
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
8350Sstevel@tonic-gate 	if (so->so_state & SS_CANTSENDMORE) {
8360Sstevel@tonic-gate 		error = EPIPE;
8370Sstevel@tonic-gate 		*clearerr = 0;
8380Sstevel@tonic-gate 	} else {
8390Sstevel@tonic-gate 		error = so->so_error;
8400Sstevel@tonic-gate 		if (ispeek) {
8410Sstevel@tonic-gate 			*clearerr = 0;
8420Sstevel@tonic-gate 		} else {
8430Sstevel@tonic-gate 			so->so_error = 0;
8440Sstevel@tonic-gate 			*clearerr = 1;
8450Sstevel@tonic-gate 		}
8460Sstevel@tonic-gate 	}
8470Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
8480Sstevel@tonic-gate 	return (error);
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate /*
8520Sstevel@tonic-gate  * Set a nonpersistent read and write error on the socket.
8530Sstevel@tonic-gate  * Used when there is a T_uderror_ind for a connected socket.
8540Sstevel@tonic-gate  * The caller also needs to call strsetrerror and strsetwerror
8550Sstevel@tonic-gate  * after dropping the lock.
8560Sstevel@tonic-gate  */
8570Sstevel@tonic-gate void
8580Sstevel@tonic-gate soseterror(struct sonode *so, int error)
8590Sstevel@tonic-gate {
8600Sstevel@tonic-gate 	ASSERT(error != 0);
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
8630Sstevel@tonic-gate 	so->so_error = (ushort_t)error;
8640Sstevel@tonic-gate }
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate void
8670Sstevel@tonic-gate soisconnecting(struct sonode *so)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
8700Sstevel@tonic-gate 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
8710Sstevel@tonic-gate 	so->so_state |= SS_ISCONNECTING;
8720Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
8730Sstevel@tonic-gate }
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate void
8760Sstevel@tonic-gate soisconnected(struct sonode *so)
8770Sstevel@tonic-gate {
8780Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
8790Sstevel@tonic-gate 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
8800Sstevel@tonic-gate 	so->so_state |= SS_ISCONNECTED;
8810Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
8820Sstevel@tonic-gate }
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate /*
8850Sstevel@tonic-gate  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
8860Sstevel@tonic-gate  */
8870Sstevel@tonic-gate void
8880Sstevel@tonic-gate soisdisconnected(struct sonode *so, int error)
8890Sstevel@tonic-gate {
8900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
891*8348SEric.Yu@Sun.COM 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
8920Sstevel@tonic-gate 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
8930Sstevel@tonic-gate 	so->so_error = (ushort_t)error;
8940Sstevel@tonic-gate 	if (so->so_peercred != NULL) {
8950Sstevel@tonic-gate 		crfree(so->so_peercred);
8960Sstevel@tonic-gate 		so->so_peercred = NULL;
8970Sstevel@tonic-gate 	}
8980Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
8990Sstevel@tonic-gate }
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate /*
9020Sstevel@tonic-gate  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
9030Sstevel@tonic-gate  * Does not affect write side.
9040Sstevel@tonic-gate  * The caller also has to call strsetrerror.
9050Sstevel@tonic-gate  */
9060Sstevel@tonic-gate static void
9070Sstevel@tonic-gate sobreakconn(struct sonode *so, int error)
9080Sstevel@tonic-gate {
9090Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
9100Sstevel@tonic-gate 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9110Sstevel@tonic-gate 	so->so_error = (ushort_t)error;
9120Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
9130Sstevel@tonic-gate }
9140Sstevel@tonic-gate 
9150Sstevel@tonic-gate /*
9160Sstevel@tonic-gate  * Can no longer send.
9170Sstevel@tonic-gate  * Caller must also call strsetwerror.
9180Sstevel@tonic-gate  *
9190Sstevel@tonic-gate  * We mark the peer address as no longer valid for getpeername, but
9200Sstevel@tonic-gate  * leave it around for so_unix_close to notify the peer (that
9210Sstevel@tonic-gate  * transport has no addressing held at that layer).
9220Sstevel@tonic-gate  */
9230Sstevel@tonic-gate void
9240Sstevel@tonic-gate socantsendmore(struct sonode *so)
9250Sstevel@tonic-gate {
9260Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
927*8348SEric.Yu@Sun.COM 	so->so_state |= SS_CANTSENDMORE;
9280Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
9290Sstevel@tonic-gate }
9300Sstevel@tonic-gate 
9310Sstevel@tonic-gate /*
9320Sstevel@tonic-gate  * The caller must call strseteof(,1) as well as this routine
9330Sstevel@tonic-gate  * to change the socket state.
9340Sstevel@tonic-gate  */
9350Sstevel@tonic-gate void
9360Sstevel@tonic-gate socantrcvmore(struct sonode *so)
9370Sstevel@tonic-gate {
9380Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
9390Sstevel@tonic-gate 	so->so_state |= SS_CANTRCVMORE;
9400Sstevel@tonic-gate 	cv_broadcast(&so->so_state_cv);
9410Sstevel@tonic-gate }
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate /*
9440Sstevel@tonic-gate  * The caller has sent down a "request_prim" primitive and wants to wait for
9450Sstevel@tonic-gate  * an ack ("ack_prim") or an T_ERROR_ACK for it.
9460Sstevel@tonic-gate  * The specified "ack_prim" can be a T_OK_ACK.
9470Sstevel@tonic-gate  *
9480Sstevel@tonic-gate  * Assumes that all the TPI acks are M_PCPROTO messages.
9490Sstevel@tonic-gate  *
9500Sstevel@tonic-gate  * Note that the socket is single-threaded (using so_lock_single)
9510Sstevel@tonic-gate  * for all operations that generate TPI ack messages. Since
9520Sstevel@tonic-gate  * only TPI ack messages are M_PCPROTO we should never receive
9530Sstevel@tonic-gate  * anything except either the ack we are expecting or a T_ERROR_ACK
9540Sstevel@tonic-gate  * for the same primitive.
9550Sstevel@tonic-gate  */
9560Sstevel@tonic-gate int
9570Sstevel@tonic-gate sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
9580Sstevel@tonic-gate 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
9590Sstevel@tonic-gate {
9600Sstevel@tonic-gate 	mblk_t *mp;
9610Sstevel@tonic-gate 	union T_primitives *tpr;
9620Sstevel@tonic-gate 	int error;
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
9657240Srh87107 	    (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 	error = sowaitack(so, &mp, wait);
9700Sstevel@tonic-gate 	if (error)
9710Sstevel@tonic-gate 		return (error);
9720Sstevel@tonic-gate 
9737240Srh87107 	dprintso(so, 1, ("got msg %p\n", (void *)mp));
9740Sstevel@tonic-gate 	if (DB_TYPE(mp) != M_PCPROTO ||
9750Sstevel@tonic-gate 	    MBLKL(mp) < sizeof (tpr->type)) {
9760Sstevel@tonic-gate 		freemsg(mp);
9770Sstevel@tonic-gate 		eprintsoline(so, EPROTO);
9780Sstevel@tonic-gate 		return (EPROTO);
9790Sstevel@tonic-gate 	}
9800Sstevel@tonic-gate 	tpr = (union T_primitives *)mp->b_rptr;
9810Sstevel@tonic-gate 	/*
9820Sstevel@tonic-gate 	 * Did we get the primitive that we were asking for?
9830Sstevel@tonic-gate 	 * For T_OK_ACK we also check that it matches the request primitive.
9840Sstevel@tonic-gate 	 */
9850Sstevel@tonic-gate 	if (tpr->type == ack_prim &&
9860Sstevel@tonic-gate 	    (ack_prim != T_OK_ACK ||
9870Sstevel@tonic-gate 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
9880Sstevel@tonic-gate 		if (MBLKL(mp) >= (ssize_t)min_size) {
9890Sstevel@tonic-gate 			/* Found what we are looking for */
9900Sstevel@tonic-gate 			*mpp = mp;
9910Sstevel@tonic-gate 			return (0);
9920Sstevel@tonic-gate 		}
9930Sstevel@tonic-gate 		/* Too short */
9940Sstevel@tonic-gate 		freemsg(mp);
9950Sstevel@tonic-gate 		eprintsoline(so, EPROTO);
9960Sstevel@tonic-gate 		return (EPROTO);
9970Sstevel@tonic-gate 	}
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	if (tpr->type == T_ERROR_ACK &&
10000Sstevel@tonic-gate 	    tpr->error_ack.ERROR_prim == request_prim) {
10010Sstevel@tonic-gate 		/* Error to the primitive we were looking for */
10020Sstevel@tonic-gate 		if (tpr->error_ack.TLI_error == TSYSERR) {
10030Sstevel@tonic-gate 			error = tpr->error_ack.UNIX_error;
10040Sstevel@tonic-gate 		} else {
1005*8348SEric.Yu@Sun.COM 			error = proto_tlitosyserr(tpr->error_ack.TLI_error);
10060Sstevel@tonic-gate 		}
10070Sstevel@tonic-gate 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1008*8348SEric.Yu@Sun.COM 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
1009*8348SEric.Yu@Sun.COM 		    tpr->error_ack.UNIX_error, error));
10100Sstevel@tonic-gate 		freemsg(mp);
10110Sstevel@tonic-gate 		return (error);
10120Sstevel@tonic-gate 	}
10130Sstevel@tonic-gate 	/*
10140Sstevel@tonic-gate 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
10150Sstevel@tonic-gate 	 */
10160Sstevel@tonic-gate #ifdef DEBUG
10170Sstevel@tonic-gate 	if (tpr->type == T_ERROR_ACK) {
10180Sstevel@tonic-gate 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1019*8348SEric.Yu@Sun.COM 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
10205753Sgww 		    tpr->error_ack.UNIX_error));
10210Sstevel@tonic-gate 	} else if (tpr->type == T_OK_ACK) {
10220Sstevel@tonic-gate 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1023*8348SEric.Yu@Sun.COM 		    tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
10240Sstevel@tonic-gate 	} else {
10250Sstevel@tonic-gate 		dprintso(so, 0,
10265753Sgww 		    ("unexpected primitive %d, expected %d for %d\n",
10275753Sgww 		    tpr->type, ack_prim, request_prim));
10280Sstevel@tonic-gate 	}
10290Sstevel@tonic-gate #endif /* DEBUG */
10300Sstevel@tonic-gate 
10310Sstevel@tonic-gate 	freemsg(mp);
10320Sstevel@tonic-gate 	eprintsoline(so, EPROTO);
10330Sstevel@tonic-gate 	return (EPROTO);
10340Sstevel@tonic-gate }
10350Sstevel@tonic-gate 
10360Sstevel@tonic-gate /*
10370Sstevel@tonic-gate  * Wait for a T_OK_ACK for the specified primitive.
10380Sstevel@tonic-gate  */
10390Sstevel@tonic-gate int
10400Sstevel@tonic-gate sowaitokack(struct sonode *so, t_scalar_t request_prim)
10410Sstevel@tonic-gate {
10420Sstevel@tonic-gate 	mblk_t *mp;
10430Sstevel@tonic-gate 	int error;
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	error = sowaitprim(so, request_prim, T_OK_ACK,
10460Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
10470Sstevel@tonic-gate 	if (error)
10480Sstevel@tonic-gate 		return (error);
10490Sstevel@tonic-gate 	freemsg(mp);
10500Sstevel@tonic-gate 	return (0);
10510Sstevel@tonic-gate }
10520Sstevel@tonic-gate 
10530Sstevel@tonic-gate /*
1054*8348SEric.Yu@Sun.COM  * Queue a received TPI ack message on sti_ack_mp.
10550Sstevel@tonic-gate  */
10560Sstevel@tonic-gate void
10570Sstevel@tonic-gate soqueueack(struct sonode *so, mblk_t *mp)
10580Sstevel@tonic-gate {
1059*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1060*8348SEric.Yu@Sun.COM 
10610Sstevel@tonic-gate 	if (DB_TYPE(mp) != M_PCPROTO) {
10621548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
10630Sstevel@tonic-gate 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
10640Sstevel@tonic-gate 		    *(t_scalar_t *)mp->b_rptr);
10650Sstevel@tonic-gate 		freemsg(mp);
10660Sstevel@tonic-gate 		return;
10670Sstevel@tonic-gate 	}
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1070*8348SEric.Yu@Sun.COM 	if (sti->sti_ack_mp != NULL) {
1071*8348SEric.Yu@Sun.COM 		dprintso(so, 1, ("sti_ack_mp already set\n"));
1072*8348SEric.Yu@Sun.COM 		freemsg(sti->sti_ack_mp);
1073*8348SEric.Yu@Sun.COM 		sti->sti_ack_mp = NULL;
10740Sstevel@tonic-gate 	}
1075*8348SEric.Yu@Sun.COM 	sti->sti_ack_mp = mp;
1076*8348SEric.Yu@Sun.COM 	cv_broadcast(&sti->sti_ack_cv);
10770Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
10780Sstevel@tonic-gate }
10790Sstevel@tonic-gate 
10800Sstevel@tonic-gate /*
10810Sstevel@tonic-gate  * Wait for a TPI ack ignoring signals and errors.
10820Sstevel@tonic-gate  */
10830Sstevel@tonic-gate int
10840Sstevel@tonic-gate sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
10850Sstevel@tonic-gate {
1086*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1087*8348SEric.Yu@Sun.COM 
10880Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
10890Sstevel@tonic-gate 
1090*8348SEric.Yu@Sun.COM 	while (sti->sti_ack_mp == NULL) {
10910Sstevel@tonic-gate #ifdef SOCK_TEST
10920Sstevel@tonic-gate 		if (wait == 0 && sock_test_timelimit != 0)
10930Sstevel@tonic-gate 			wait = sock_test_timelimit;
10940Sstevel@tonic-gate #endif
10950Sstevel@tonic-gate 		if (wait != 0) {
10960Sstevel@tonic-gate 			/*
10970Sstevel@tonic-gate 			 * Only wait for the time limit.
10980Sstevel@tonic-gate 			 */
10990Sstevel@tonic-gate 			clock_t now;
11000Sstevel@tonic-gate 
11010Sstevel@tonic-gate 			time_to_wait(&now, wait);
1102*8348SEric.Yu@Sun.COM 			if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock,
11030Sstevel@tonic-gate 			    now) == -1) {
11040Sstevel@tonic-gate 				eprintsoline(so, ETIME);
11050Sstevel@tonic-gate 				return (ETIME);
11060Sstevel@tonic-gate 			}
11070Sstevel@tonic-gate 		}
11080Sstevel@tonic-gate 		else
1109*8348SEric.Yu@Sun.COM 			cv_wait(&sti->sti_ack_cv, &so->so_lock);
11100Sstevel@tonic-gate 	}
1111*8348SEric.Yu@Sun.COM 	*mpp = sti->sti_ack_mp;
11120Sstevel@tonic-gate #ifdef DEBUG
11130Sstevel@tonic-gate 	{
11140Sstevel@tonic-gate 		union T_primitives *tpr;
11150Sstevel@tonic-gate 		mblk_t *mp = *mpp;
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 		tpr = (union T_primitives *)mp->b_rptr;
11180Sstevel@tonic-gate 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
11190Sstevel@tonic-gate 		ASSERT(tpr->type == T_OK_ACK ||
11205753Sgww 		    tpr->type == T_ERROR_ACK ||
11215753Sgww 		    tpr->type == T_BIND_ACK ||
11225753Sgww 		    tpr->type == T_CAPABILITY_ACK ||
11235753Sgww 		    tpr->type == T_INFO_ACK ||
11245753Sgww 		    tpr->type == T_OPTMGMT_ACK);
11250Sstevel@tonic-gate 	}
11260Sstevel@tonic-gate #endif /* DEBUG */
1127*8348SEric.Yu@Sun.COM 	sti->sti_ack_mp = NULL;
11280Sstevel@tonic-gate 	return (0);
11290Sstevel@tonic-gate }
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate /*
1132*8348SEric.Yu@Sun.COM  * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
11330Sstevel@tonic-gate  */
11340Sstevel@tonic-gate void
11350Sstevel@tonic-gate soqueueconnind(struct sonode *so, mblk_t *mp)
11360Sstevel@tonic-gate {
1137*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1138*8348SEric.Yu@Sun.COM 
11390Sstevel@tonic-gate 	if (DB_TYPE(mp) != M_PROTO) {
11401548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
11410Sstevel@tonic-gate 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
11420Sstevel@tonic-gate 		freemsg(mp);
11430Sstevel@tonic-gate 		return;
11440Sstevel@tonic-gate 	}
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
11470Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL);
1148*8348SEric.Yu@Sun.COM 	if (sti->sti_conn_ind_head == NULL) {
1149*8348SEric.Yu@Sun.COM 		sti->sti_conn_ind_head = mp;
11500Sstevel@tonic-gate 	} else {
1151*8348SEric.Yu@Sun.COM 		ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
1152*8348SEric.Yu@Sun.COM 		sti->sti_conn_ind_tail->b_next = mp;
11530Sstevel@tonic-gate 	}
1154*8348SEric.Yu@Sun.COM 	sti->sti_conn_ind_tail = mp;
11550Sstevel@tonic-gate 	/* Wakeup a single consumer of the T_CONN_IND */
1156*8348SEric.Yu@Sun.COM 	cv_signal(&so->so_acceptq_cv);
11570Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
11580Sstevel@tonic-gate }
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate /*
11610Sstevel@tonic-gate  * Wait for a T_CONN_IND.
11620Sstevel@tonic-gate  * Don't wait if nonblocking.
11630Sstevel@tonic-gate  * Accept signals and socket errors.
11640Sstevel@tonic-gate  */
11650Sstevel@tonic-gate int
11660Sstevel@tonic-gate sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
11670Sstevel@tonic-gate {
11680Sstevel@tonic-gate 	mblk_t *mp;
1169*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
11700Sstevel@tonic-gate 	int error = 0;
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
11730Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
11740Sstevel@tonic-gate check_error:
11750Sstevel@tonic-gate 	if (so->so_error) {
1176*8348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
11770Sstevel@tonic-gate 		if (error) {
11780Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
11790Sstevel@tonic-gate 			return (error);
11800Sstevel@tonic-gate 		}
11810Sstevel@tonic-gate 	}
11820Sstevel@tonic-gate 
1183*8348SEric.Yu@Sun.COM 	if (sti->sti_conn_ind_head == NULL) {
11840Sstevel@tonic-gate 		if (fmode & (FNDELAY|FNONBLOCK)) {
11850Sstevel@tonic-gate 			error = EWOULDBLOCK;
11860Sstevel@tonic-gate 			goto done;
11870Sstevel@tonic-gate 		}
1188*8348SEric.Yu@Sun.COM 
1189*8348SEric.Yu@Sun.COM 		if (so->so_state & SS_CLOSING) {
1190*8348SEric.Yu@Sun.COM 			error = EINTR;
1191*8348SEric.Yu@Sun.COM 			goto done;
1192*8348SEric.Yu@Sun.COM 		}
1193*8348SEric.Yu@Sun.COM 
1194*8348SEric.Yu@Sun.COM 		if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
11950Sstevel@tonic-gate 			error = EINTR;
11960Sstevel@tonic-gate 			goto done;
11970Sstevel@tonic-gate 		}
11980Sstevel@tonic-gate 		goto check_error;
11990Sstevel@tonic-gate 	}
1200*8348SEric.Yu@Sun.COM 	mp = sti->sti_conn_ind_head;
1201*8348SEric.Yu@Sun.COM 	sti->sti_conn_ind_head = mp->b_next;
12020Sstevel@tonic-gate 	mp->b_next = NULL;
1203*8348SEric.Yu@Sun.COM 	if (sti->sti_conn_ind_head == NULL) {
1204*8348SEric.Yu@Sun.COM 		ASSERT(sti->sti_conn_ind_tail == mp);
1205*8348SEric.Yu@Sun.COM 		sti->sti_conn_ind_tail = NULL;
12060Sstevel@tonic-gate 	}
12070Sstevel@tonic-gate 	*mpp = mp;
12080Sstevel@tonic-gate done:
12090Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
12100Sstevel@tonic-gate 	return (error);
12110Sstevel@tonic-gate }
12120Sstevel@tonic-gate 
12130Sstevel@tonic-gate /*
12140Sstevel@tonic-gate  * Flush a T_CONN_IND matching the sequence number from the list.
12150Sstevel@tonic-gate  * Return zero if found; non-zero otherwise.
12160Sstevel@tonic-gate  * This is called very infrequently thus it is ok to do a linear search.
12170Sstevel@tonic-gate  */
12180Sstevel@tonic-gate int
12190Sstevel@tonic-gate soflushconnind(struct sonode *so, t_scalar_t seqno)
12200Sstevel@tonic-gate {
12210Sstevel@tonic-gate 	mblk_t *prevmp, *mp;
12220Sstevel@tonic-gate 	struct T_conn_ind *tci;
1223*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1226*8348SEric.Yu@Sun.COM 	for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
12270Sstevel@tonic-gate 	    prevmp = mp, mp = mp->b_next) {
12280Sstevel@tonic-gate 		tci = (struct T_conn_ind *)mp->b_rptr;
12290Sstevel@tonic-gate 		if (tci->SEQ_number == seqno) {
12300Sstevel@tonic-gate 			dprintso(so, 1,
12315753Sgww 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
12320Sstevel@tonic-gate 			/* Deleting last? */
1233*8348SEric.Yu@Sun.COM 			if (sti->sti_conn_ind_tail == mp) {
1234*8348SEric.Yu@Sun.COM 				sti->sti_conn_ind_tail = prevmp;
12350Sstevel@tonic-gate 			}
12360Sstevel@tonic-gate 			if (prevmp == NULL) {
12370Sstevel@tonic-gate 				/* Deleting first */
1238*8348SEric.Yu@Sun.COM 				sti->sti_conn_ind_head = mp->b_next;
12390Sstevel@tonic-gate 			} else {
12400Sstevel@tonic-gate 				prevmp->b_next = mp->b_next;
12410Sstevel@tonic-gate 			}
12420Sstevel@tonic-gate 			mp->b_next = NULL;
1243*8348SEric.Yu@Sun.COM 
1244*8348SEric.Yu@Sun.COM 			ASSERT((sti->sti_conn_ind_head == NULL &&
1245*8348SEric.Yu@Sun.COM 			    sti->sti_conn_ind_tail == NULL) ||
1246*8348SEric.Yu@Sun.COM 			    (sti->sti_conn_ind_head != NULL &&
1247*8348SEric.Yu@Sun.COM 			    sti->sti_conn_ind_tail != NULL));
1248*8348SEric.Yu@Sun.COM 
12490Sstevel@tonic-gate 			so->so_error = ECONNABORTED;
12500Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
1251898Skais 
1252898Skais 			/*
1253898Skais 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1254898Skais 			 * an SSL context, and needs to be released.
1255898Skais 			 */
1256898Skais 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1257898Skais 			    (mp->b_cont != NULL)) {
1258898Skais 				kssl_ctx_t kssl_ctx;
1259898Skais 
1260898Skais 				ASSERT(MBLKL(mp->b_cont) ==
1261898Skais 				    sizeof (kssl_ctx_t));
1262898Skais 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1263898Skais 				kssl_release_ctx(kssl_ctx);
1264898Skais 			}
12650Sstevel@tonic-gate 			freemsg(mp);
12660Sstevel@tonic-gate 			return (0);
12670Sstevel@tonic-gate 		}
12680Sstevel@tonic-gate 	}
12690Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
12700Sstevel@tonic-gate 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
12710Sstevel@tonic-gate 	return (-1);
12720Sstevel@tonic-gate }
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate /*
12750Sstevel@tonic-gate  * Wait until the socket is connected or there is an error.
12760Sstevel@tonic-gate  * fmode should contain any nonblocking flags. nosig should be
12770Sstevel@tonic-gate  * set if the caller does not want the wait to be interrupted by a signal.
12780Sstevel@tonic-gate  */
12790Sstevel@tonic-gate int
12800Sstevel@tonic-gate sowaitconnected(struct sonode *so, int fmode, int nosig)
12810Sstevel@tonic-gate {
12820Sstevel@tonic-gate 	int error;
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
12875753Sgww 	    SS_ISCONNECTING && so->so_error == 0) {
12880Sstevel@tonic-gate 
12897240Srh87107 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
12907240Srh87107 		    (void *)so));
12910Sstevel@tonic-gate 		if (fmode & (FNDELAY|FNONBLOCK))
12920Sstevel@tonic-gate 			return (EINPROGRESS);
12930Sstevel@tonic-gate 
1294*8348SEric.Yu@Sun.COM 		if (so->so_state & SS_CLOSING)
1295*8348SEric.Yu@Sun.COM 			return (EINTR);
1296*8348SEric.Yu@Sun.COM 
12970Sstevel@tonic-gate 		if (nosig)
12980Sstevel@tonic-gate 			cv_wait(&so->so_state_cv, &so->so_lock);
12990Sstevel@tonic-gate 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
13000Sstevel@tonic-gate 			/*
13010Sstevel@tonic-gate 			 * Return EINTR and let the application use
13020Sstevel@tonic-gate 			 * nonblocking techniques for detecting when
13030Sstevel@tonic-gate 			 * the connection has been established.
13040Sstevel@tonic-gate 			 */
13050Sstevel@tonic-gate 			return (EINTR);
13060Sstevel@tonic-gate 		}
13077240Srh87107 		dprintso(so, 1, ("awoken on %p\n", (void *)so));
13080Sstevel@tonic-gate 	}
13090Sstevel@tonic-gate 
13100Sstevel@tonic-gate 	if (so->so_error != 0) {
1311*8348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
13120Sstevel@tonic-gate 		ASSERT(error != 0);
13130Sstevel@tonic-gate 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
13140Sstevel@tonic-gate 		return (error);
13150Sstevel@tonic-gate 	}
13160Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
13170Sstevel@tonic-gate 		/*
13180Sstevel@tonic-gate 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
13190Sstevel@tonic-gate 		 * zero errno. Or another thread could have consumed so_error
13200Sstevel@tonic-gate 		 * e.g. by calling read.
13210Sstevel@tonic-gate 		 */
13220Sstevel@tonic-gate 		error = ECONNREFUSED;
13230Sstevel@tonic-gate 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
13240Sstevel@tonic-gate 		return (error);
13250Sstevel@tonic-gate 	}
13260Sstevel@tonic-gate 	return (0);
13270Sstevel@tonic-gate }
13280Sstevel@tonic-gate 
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate /*
13310Sstevel@tonic-gate  * Handle the signal generation aspect of urgent data.
13320Sstevel@tonic-gate  */
13330Sstevel@tonic-gate static void
13340Sstevel@tonic-gate so_oob_sig(struct sonode *so, int extrasig,
13350Sstevel@tonic-gate     strsigset_t *signals, strpollset_t *pollwakeups)
13360Sstevel@tonic-gate {
1337*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1338*8348SEric.Yu@Sun.COM 
13390Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
1342*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1343*8348SEric.Yu@Sun.COM 	if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
13440Sstevel@tonic-gate 		/*
13450Sstevel@tonic-gate 		 * Signal has already been generated once for this
13460Sstevel@tonic-gate 		 * urgent "event". However, since TCP can receive updated
13470Sstevel@tonic-gate 		 * urgent pointers we still generate a signal.
13480Sstevel@tonic-gate 		 */
13490Sstevel@tonic-gate 		ASSERT(so->so_state & SS_OOBPEND);
13500Sstevel@tonic-gate 		if (extrasig) {
13510Sstevel@tonic-gate 			*signals |= S_RDBAND;
13520Sstevel@tonic-gate 			*pollwakeups |= POLLRDBAND;
13530Sstevel@tonic-gate 		}
13540Sstevel@tonic-gate 		return;
13550Sstevel@tonic-gate 	}
13560Sstevel@tonic-gate 
1357*8348SEric.Yu@Sun.COM 	sti->sti_oobsigcnt++;
1358*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt > 0);	/* Wraparound */
1359*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate 	/*
13620Sstevel@tonic-gate 	 * Record (for select/poll) that urgent data is pending.
13630Sstevel@tonic-gate 	 */
13640Sstevel@tonic-gate 	so->so_state |= SS_OOBPEND;
13650Sstevel@tonic-gate 	/*
13660Sstevel@tonic-gate 	 * New urgent data on the way so forget about any old
13670Sstevel@tonic-gate 	 * urgent data.
13680Sstevel@tonic-gate 	 */
13690Sstevel@tonic-gate 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
13700Sstevel@tonic-gate 	if (so->so_oobmsg != NULL) {
13710Sstevel@tonic-gate 		dprintso(so, 1, ("sock: discarding old oob\n"));
13720Sstevel@tonic-gate 		freemsg(so->so_oobmsg);
13730Sstevel@tonic-gate 		so->so_oobmsg = NULL;
13740Sstevel@tonic-gate 	}
13750Sstevel@tonic-gate 	*signals |= S_RDBAND;
13760Sstevel@tonic-gate 	*pollwakeups |= POLLRDBAND;
13770Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
13780Sstevel@tonic-gate }
13790Sstevel@tonic-gate 
13800Sstevel@tonic-gate /*
13810Sstevel@tonic-gate  * Handle the processing of the T_EXDATA_IND with urgent data.
13820Sstevel@tonic-gate  * Returns the T_EXDATA_IND if it should be queued on the read queue.
13830Sstevel@tonic-gate  */
13840Sstevel@tonic-gate /* ARGSUSED2 */
13850Sstevel@tonic-gate static mblk_t *
13860Sstevel@tonic-gate so_oob_exdata(struct sonode *so, mblk_t *mp,
13870Sstevel@tonic-gate 	strsigset_t *signals, strpollset_t *pollwakeups)
13880Sstevel@tonic-gate {
1389*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1390*8348SEric.Yu@Sun.COM 
13910Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
13920Sstevel@tonic-gate 
13930Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
13940Sstevel@tonic-gate 
1395*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
13960Sstevel@tonic-gate 
1397*8348SEric.Yu@Sun.COM 	sti->sti_oobcnt++;
1398*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobcnt > 0);	/* wraparound? */
1399*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
14000Sstevel@tonic-gate 
14010Sstevel@tonic-gate 	/*
14020Sstevel@tonic-gate 	 * Set MSGMARK for SIOCATMARK.
14030Sstevel@tonic-gate 	 */
14040Sstevel@tonic-gate 	mp->b_flag |= MSGMARK;
14050Sstevel@tonic-gate 
14060Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
14070Sstevel@tonic-gate 	return (mp);
14080Sstevel@tonic-gate }
14090Sstevel@tonic-gate 
14100Sstevel@tonic-gate /*
14110Sstevel@tonic-gate  * Handle the processing of the actual urgent data.
14120Sstevel@tonic-gate  * Returns the data mblk if it should be queued on the read queue.
14130Sstevel@tonic-gate  */
14140Sstevel@tonic-gate static mblk_t *
14150Sstevel@tonic-gate so_oob_data(struct sonode *so, mblk_t *mp,
14160Sstevel@tonic-gate 	strsigset_t *signals, strpollset_t *pollwakeups)
14170Sstevel@tonic-gate {
1418*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1419*8348SEric.Yu@Sun.COM 
14200Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
14230Sstevel@tonic-gate 
1424*8348SEric.Yu@Sun.COM 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
14250Sstevel@tonic-gate 	ASSERT(mp != NULL);
14260Sstevel@tonic-gate 	/*
14270Sstevel@tonic-gate 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
14280Sstevel@tonic-gate 	 * Otherwise we store it in so_oobmsg.
14290Sstevel@tonic-gate 	 */
14300Sstevel@tonic-gate 	ASSERT(so->so_oobmsg == NULL);
14310Sstevel@tonic-gate 	if (so->so_options & SO_OOBINLINE) {
14320Sstevel@tonic-gate 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
14330Sstevel@tonic-gate 		*signals |= S_INPUT | S_RDNORM;
14340Sstevel@tonic-gate 	} else {
14350Sstevel@tonic-gate 		*pollwakeups |= POLLRDBAND;
14360Sstevel@tonic-gate 		so->so_state |= SS_HAVEOOBDATA;
14370Sstevel@tonic-gate 		so->so_oobmsg = mp;
14380Sstevel@tonic-gate 		mp = NULL;
14390Sstevel@tonic-gate 	}
14400Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
14410Sstevel@tonic-gate 	return (mp);
14420Sstevel@tonic-gate }
14430Sstevel@tonic-gate 
14440Sstevel@tonic-gate /*
14450Sstevel@tonic-gate  * Caller must hold the mutex.
14460Sstevel@tonic-gate  * For delayed processing, save the T_DISCON_IND received
1447*8348SEric.Yu@Sun.COM  * from below on sti_discon_ind_mp.
14480Sstevel@tonic-gate  * When the message is processed the framework will call:
14490Sstevel@tonic-gate  *      (*func)(so, mp);
14500Sstevel@tonic-gate  */
14510Sstevel@tonic-gate static void
14520Sstevel@tonic-gate so_save_discon_ind(struct sonode *so,
14530Sstevel@tonic-gate 	mblk_t *mp,
14540Sstevel@tonic-gate 	void (*func)(struct sonode *so, mblk_t *))
14550Sstevel@tonic-gate {
1456*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
1457*8348SEric.Yu@Sun.COM 
14580Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
14590Sstevel@tonic-gate 
14600Sstevel@tonic-gate 	/*
14610Sstevel@tonic-gate 	 * Discard new T_DISCON_IND if we have already received another.
1462*8348SEric.Yu@Sun.COM 	 * Currently the earlier message can either be on sti_discon_ind_mp
14630Sstevel@tonic-gate 	 * or being processed.
14640Sstevel@tonic-gate 	 */
1465*8348SEric.Yu@Sun.COM 	if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
14661548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
14670Sstevel@tonic-gate 		    "sockfs: received unexpected additional T_DISCON_IND\n");
14680Sstevel@tonic-gate 		freemsg(mp);
14690Sstevel@tonic-gate 		return;
14700Sstevel@tonic-gate 	}
14710Sstevel@tonic-gate 	mp->b_prev = (mblk_t *)func;
14720Sstevel@tonic-gate 	mp->b_next = NULL;
1473*8348SEric.Yu@Sun.COM 	sti->sti_discon_ind_mp = mp;
14740Sstevel@tonic-gate }
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate /*
14770Sstevel@tonic-gate  * Caller must hold the mutex and make sure that either SOLOCKED
14780Sstevel@tonic-gate  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1479*8348SEric.Yu@Sun.COM  * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
14800Sstevel@tonic-gate  * Need to ensure that strsock_proto() will not end up sleeping for
14810Sstevel@tonic-gate  * SOASYNC_UNBIND, while executing this function.
14820Sstevel@tonic-gate  */
14830Sstevel@tonic-gate void
14840Sstevel@tonic-gate so_drain_discon_ind(struct sonode *so)
14850Sstevel@tonic-gate {
14860Sstevel@tonic-gate 	mblk_t	*bp;
14870Sstevel@tonic-gate 	void (*func)(struct sonode *so, mblk_t *);
1488*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
14910Sstevel@tonic-gate 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
14920Sstevel@tonic-gate 
1493*8348SEric.Yu@Sun.COM 	/* Process T_DISCON_IND on sti_discon_ind_mp */
1494*8348SEric.Yu@Sun.COM 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
1495*8348SEric.Yu@Sun.COM 		sti->sti_discon_ind_mp = NULL;
14960Sstevel@tonic-gate 		func = (void (*)())bp->b_prev;
14970Sstevel@tonic-gate 		bp->b_prev = NULL;
14980Sstevel@tonic-gate 
14990Sstevel@tonic-gate 		/*
15000Sstevel@tonic-gate 		 * This (*func) is supposed to generate a message downstream
15010Sstevel@tonic-gate 		 * and we need to have a flag set until the corresponding
15020Sstevel@tonic-gate 		 * upstream message reaches stream head.
15030Sstevel@tonic-gate 		 * When processing T_DISCON_IND in strsock_discon_ind
15040Sstevel@tonic-gate 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
15050Sstevel@tonic-gate 		 * drop the flag after we get the ACK in strsock_proto.
15060Sstevel@tonic-gate 		 */
15070Sstevel@tonic-gate 		(void) (*func)(so, bp);
15080Sstevel@tonic-gate 	}
15090Sstevel@tonic-gate }
15100Sstevel@tonic-gate 
15110Sstevel@tonic-gate /*
15120Sstevel@tonic-gate  * Caller must hold the mutex.
1513*8348SEric.Yu@Sun.COM  * Remove the T_DISCON_IND on sti_discon_ind_mp.
15140Sstevel@tonic-gate  */
15150Sstevel@tonic-gate void
15160Sstevel@tonic-gate so_flush_discon_ind(struct sonode *so)
15170Sstevel@tonic-gate {
15180Sstevel@tonic-gate 	mblk_t	*bp;
1519*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
15220Sstevel@tonic-gate 
15230Sstevel@tonic-gate 	/*
1524*8348SEric.Yu@Sun.COM 	 * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
15250Sstevel@tonic-gate 	 */
1526*8348SEric.Yu@Sun.COM 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
1527*8348SEric.Yu@Sun.COM 		sti->sti_discon_ind_mp = NULL;
15280Sstevel@tonic-gate 		bp->b_prev = NULL;
15290Sstevel@tonic-gate 		freemsg(bp);
15300Sstevel@tonic-gate 	}
15310Sstevel@tonic-gate }
15320Sstevel@tonic-gate 
15330Sstevel@tonic-gate /*
15340Sstevel@tonic-gate  * Caller must hold the mutex.
15350Sstevel@tonic-gate  *
15360Sstevel@tonic-gate  * This function is used to process the T_DISCON_IND message. It does
15370Sstevel@tonic-gate  * immediate processing when called from strsock_proto and delayed
1538*8348SEric.Yu@Sun.COM  * processing of discon_ind saved on sti_discon_ind_mp when called from
15390Sstevel@tonic-gate  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1540*8348SEric.Yu@Sun.COM  * sti_discon_ind_mp for delayed processing, this function is registered
15410Sstevel@tonic-gate  * as the callback function to process the message.
15420Sstevel@tonic-gate  *
15430Sstevel@tonic-gate  * SOASYNC_UNBIND should be held in this function, during the non-blocking
15440Sstevel@tonic-gate  * unbind operation, and should be released only after we receive the ACK
15450Sstevel@tonic-gate  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
15460Sstevel@tonic-gate  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
15470Sstevel@tonic-gate  * sent from either this function or tcp_unbind(), flushing away any TPI
15480Sstevel@tonic-gate  * message that is being sent down and stays in a lower module's queue.
15490Sstevel@tonic-gate  *
15500Sstevel@tonic-gate  * This function drops so_lock and grabs it again.
15510Sstevel@tonic-gate  */
15520Sstevel@tonic-gate static void
15530Sstevel@tonic-gate strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
15540Sstevel@tonic-gate {
15550Sstevel@tonic-gate 	struct vnode *vp;
15560Sstevel@tonic-gate 	struct stdata *stp;
15570Sstevel@tonic-gate 	union T_primitives *tpr;
15580Sstevel@tonic-gate 	struct T_unbind_req *ubr;
15590Sstevel@tonic-gate 	mblk_t *mp;
15600Sstevel@tonic-gate 	int error;
1561*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti = SOTOTPI(so);
15620Sstevel@tonic-gate 
15630Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
15640Sstevel@tonic-gate 	ASSERT(discon_mp);
15650Sstevel@tonic-gate 	ASSERT(discon_mp->b_rptr);
15660Sstevel@tonic-gate 
15670Sstevel@tonic-gate 	tpr = (union T_primitives *)discon_mp->b_rptr;
15680Sstevel@tonic-gate 	ASSERT(tpr->type == T_DISCON_IND);
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate 	vp = SOTOV(so);
15710Sstevel@tonic-gate 	stp = vp->v_stream;
15720Sstevel@tonic-gate 	ASSERT(stp);
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate 	/*
15750Sstevel@tonic-gate 	 * Not a listener
15760Sstevel@tonic-gate 	 */
15770Sstevel@tonic-gate 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 	/*
15800Sstevel@tonic-gate 	 * This assumes that the name space for DISCON_reason
15810Sstevel@tonic-gate 	 * is the errno name space.
15820Sstevel@tonic-gate 	 */
15830Sstevel@tonic-gate 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1584*8348SEric.Yu@Sun.COM 	sti->sti_laddr_valid = 0;
1585*8348SEric.Yu@Sun.COM 	sti->sti_faddr_valid = 0;
15860Sstevel@tonic-gate 
15870Sstevel@tonic-gate 	/*
15880Sstevel@tonic-gate 	 * Unbind with the transport without blocking.
15890Sstevel@tonic-gate 	 * If we've already received a T_DISCON_IND do not unbind.
15900Sstevel@tonic-gate 	 *
15910Sstevel@tonic-gate 	 * If there is no preallocated unbind message, we have already
15920Sstevel@tonic-gate 	 * unbound with the transport
15930Sstevel@tonic-gate 	 *
15940Sstevel@tonic-gate 	 * If the socket is not bound, no need to unbind.
15950Sstevel@tonic-gate 	 */
1596*8348SEric.Yu@Sun.COM 	mp = sti->sti_unbind_mp;
15970Sstevel@tonic-gate 	if (mp == NULL) {
15980Sstevel@tonic-gate 		ASSERT(!(so->so_state & SS_ISBOUND));
15990Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
16000Sstevel@tonic-gate 	} else if (!(so->so_state & SS_ISBOUND))  {
16010Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
16020Sstevel@tonic-gate 	} else {
1603*8348SEric.Yu@Sun.COM 		sti->sti_unbind_mp = NULL;
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 		/*
16060Sstevel@tonic-gate 		 * Is another T_DISCON_IND being processed.
16070Sstevel@tonic-gate 		 */
16080Sstevel@tonic-gate 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
16090Sstevel@tonic-gate 
16100Sstevel@tonic-gate 		/*
16110Sstevel@tonic-gate 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
16120Sstevel@tonic-gate 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
16130Sstevel@tonic-gate 		 * only after we receive the ACK in strsock_proto.
16140Sstevel@tonic-gate 		 */
16150Sstevel@tonic-gate 		so->so_flag |= SOASYNC_UNBIND;
16160Sstevel@tonic-gate 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1617*8348SEric.Yu@Sun.COM 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1618*8348SEric.Yu@Sun.COM 		sti->sti_laddr_valid = 0;
16190Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
16200Sstevel@tonic-gate 
16210Sstevel@tonic-gate 		/*
16220Sstevel@tonic-gate 		 * Send down T_UNBIND_REQ ignoring flow control.
16230Sstevel@tonic-gate 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
16240Sstevel@tonic-gate 		 * does not run service procedures.
16250Sstevel@tonic-gate 		 */
16260Sstevel@tonic-gate 		ASSERT(DB_TYPE(mp) == M_PROTO);
16270Sstevel@tonic-gate 		ubr = (struct T_unbind_req *)mp->b_rptr;
16280Sstevel@tonic-gate 		mp->b_wptr += sizeof (*ubr);
16290Sstevel@tonic-gate 		ubr->PRIM_type = T_UNBIND_REQ;
16300Sstevel@tonic-gate 
16310Sstevel@tonic-gate 		/*
16320Sstevel@tonic-gate 		 * Flush the read and write side (except stream head read queue)
16330Sstevel@tonic-gate 		 * and send down T_UNBIND_REQ.
16340Sstevel@tonic-gate 		 */
16350Sstevel@tonic-gate 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
16360Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
16375753Sgww 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
16380Sstevel@tonic-gate 		/* LINTED - warning: statement has no consequent: if */
16390Sstevel@tonic-gate 		if (error) {
16400Sstevel@tonic-gate 			eprintsoline(so, error);
16410Sstevel@tonic-gate 		}
16420Sstevel@tonic-gate 	}
16430Sstevel@tonic-gate 
16440Sstevel@tonic-gate 	if (tpr->discon_ind.DISCON_reason != 0)
16450Sstevel@tonic-gate 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
16460Sstevel@tonic-gate 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
16470Sstevel@tonic-gate 	strseteof(SOTOV(so), 1);
16480Sstevel@tonic-gate 	/*
16490Sstevel@tonic-gate 	 * strseteof takes care of read side wakeups,
16500Sstevel@tonic-gate 	 * pollwakeups, and signals.
16510Sstevel@tonic-gate 	 */
16520Sstevel@tonic-gate 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
16530Sstevel@tonic-gate 	freemsg(discon_mp);
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 
16560Sstevel@tonic-gate 	pollwakeup(&stp->sd_pollist, POLLOUT);
16570Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
16580Sstevel@tonic-gate 
16590Sstevel@tonic-gate 	/*
16600Sstevel@tonic-gate 	 * Wake sleeping write
16610Sstevel@tonic-gate 	 */
16620Sstevel@tonic-gate 	if (stp->sd_flag & WSLEEP) {
16630Sstevel@tonic-gate 		stp->sd_flag &= ~WSLEEP;
16640Sstevel@tonic-gate 		cv_broadcast(&stp->sd_wrq->q_wait);
16650Sstevel@tonic-gate 	}
16660Sstevel@tonic-gate 
16670Sstevel@tonic-gate 	/*
16680Sstevel@tonic-gate 	 * strsendsig can handle multiple signals with a
16690Sstevel@tonic-gate 	 * single call.  Send SIGPOLL for S_OUTPUT event.
16700Sstevel@tonic-gate 	 */
16710Sstevel@tonic-gate 	if (stp->sd_sigflags & S_OUTPUT)
16720Sstevel@tonic-gate 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
16730Sstevel@tonic-gate 
16740Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
16750Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
16760Sstevel@tonic-gate }
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate /*
16790Sstevel@tonic-gate  * This routine is registered with the stream head to receive M_PROTO
16800Sstevel@tonic-gate  * and M_PCPROTO messages.
16810Sstevel@tonic-gate  *
16820Sstevel@tonic-gate  * Returns NULL if the message was consumed.
16830Sstevel@tonic-gate  * Returns an mblk to make that mblk be processed (and queued) by the stream
16840Sstevel@tonic-gate  * head.
16850Sstevel@tonic-gate  *
16860Sstevel@tonic-gate  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
16870Sstevel@tonic-gate  * *pollwakeups) for the stream head to take action on. Note that since
16880Sstevel@tonic-gate  * sockets always deliver SIGIO for every new piece of data this routine
16890Sstevel@tonic-gate  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
16900Sstevel@tonic-gate  *
16910Sstevel@tonic-gate  * This routine handles all data related TPI messages independent of
16920Sstevel@tonic-gate  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
16930Sstevel@tonic-gate  * arrive on a SOCK_STREAM.
16940Sstevel@tonic-gate  */
16950Sstevel@tonic-gate static mblk_t *
16960Sstevel@tonic-gate strsock_proto(vnode_t *vp, mblk_t *mp,
16970Sstevel@tonic-gate 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
16980Sstevel@tonic-gate 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
16990Sstevel@tonic-gate {
17000Sstevel@tonic-gate 	union T_primitives *tpr;
17010Sstevel@tonic-gate 	struct sonode *so;
1702*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti;
17030Sstevel@tonic-gate 
17040Sstevel@tonic-gate 	so = VTOSO(vp);
1705*8348SEric.Yu@Sun.COM 	sti = SOTOTPI(so);
17060Sstevel@tonic-gate 
17077240Srh87107 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
17080Sstevel@tonic-gate 
17090Sstevel@tonic-gate 	/* Set default return values */
17100Sstevel@tonic-gate 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
17110Sstevel@tonic-gate 
17120Sstevel@tonic-gate 	ASSERT(DB_TYPE(mp) == M_PROTO ||
17130Sstevel@tonic-gate 	    DB_TYPE(mp) == M_PCPROTO);
17140Sstevel@tonic-gate 
17150Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (tpr->type)) {
17160Sstevel@tonic-gate 		/* The message is too short to even contain the primitive */
17171548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
17180Sstevel@tonic-gate 		    "sockfs: Too short TPI message received. Len = %ld\n",
17190Sstevel@tonic-gate 		    (ptrdiff_t)(MBLKL(mp)));
17200Sstevel@tonic-gate 		freemsg(mp);
17210Sstevel@tonic-gate 		return (NULL);
17220Sstevel@tonic-gate 	}
17230Sstevel@tonic-gate 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
17240Sstevel@tonic-gate 		/* The read pointer is not aligned correctly for TPI */
17251548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
17260Sstevel@tonic-gate 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
17270Sstevel@tonic-gate 		    (void *)mp->b_rptr);
17280Sstevel@tonic-gate 		freemsg(mp);
17290Sstevel@tonic-gate 		return (NULL);
17300Sstevel@tonic-gate 	}
17310Sstevel@tonic-gate 	tpr = (union T_primitives *)mp->b_rptr;
17320Sstevel@tonic-gate 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
17330Sstevel@tonic-gate 
17340Sstevel@tonic-gate 	switch (tpr->type) {
17350Sstevel@tonic-gate 
17360Sstevel@tonic-gate 	case T_DATA_IND:
17370Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
17381548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
17390Sstevel@tonic-gate 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
17400Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
17410Sstevel@tonic-gate 			freemsg(mp);
17420Sstevel@tonic-gate 			return (NULL);
17430Sstevel@tonic-gate 		}
17440Sstevel@tonic-gate 		/*
17450Sstevel@tonic-gate 		 * Ignore zero-length T_DATA_IND messages. These might be
17460Sstevel@tonic-gate 		 * generated by some transports.
17470Sstevel@tonic-gate 		 * This is needed to prevent read (which skips the M_PROTO
17480Sstevel@tonic-gate 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
17490Sstevel@tonic-gate 		 * on a non-blocking socket after select/poll has indicated
17500Sstevel@tonic-gate 		 * that data is available).
17510Sstevel@tonic-gate 		 */
17520Sstevel@tonic-gate 		if (msgdsize(mp->b_cont) == 0) {
17530Sstevel@tonic-gate 			dprintso(so, 0,
17540Sstevel@tonic-gate 			    ("strsock_proto: zero length T_DATA_IND\n"));
17550Sstevel@tonic-gate 			freemsg(mp);
17560Sstevel@tonic-gate 			return (NULL);
17570Sstevel@tonic-gate 		}
17580Sstevel@tonic-gate 		*allmsgsigs = S_INPUT | S_RDNORM;
17590Sstevel@tonic-gate 		*pollwakeups = POLLIN | POLLRDNORM;
17600Sstevel@tonic-gate 		*wakeups = RSLEEP;
17610Sstevel@tonic-gate 		return (mp);
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 	case T_UNITDATA_IND: {
17640Sstevel@tonic-gate 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
17650Sstevel@tonic-gate 		void			*addr;
17660Sstevel@tonic-gate 		t_uscalar_t		addrlen;
17670Sstevel@tonic-gate 
17680Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
17691548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
17700Sstevel@tonic-gate 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
17710Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
17720Sstevel@tonic-gate 			freemsg(mp);
17730Sstevel@tonic-gate 			return (NULL);
17740Sstevel@tonic-gate 		}
17750Sstevel@tonic-gate 
17760Sstevel@tonic-gate 		/* Is this is not a connected datagram socket? */
17770Sstevel@tonic-gate 		if ((so->so_mode & SM_CONNREQUIRED) ||
17780Sstevel@tonic-gate 		    !(so->so_state & SS_ISCONNECTED)) {
17790Sstevel@tonic-gate 			/*
17800Sstevel@tonic-gate 			 * Not a connected datagram socket. Look for
17810Sstevel@tonic-gate 			 * the SO_UNIX_CLOSE option. If such an option is found
17820Sstevel@tonic-gate 			 * discard the message (since it has no meaning
17830Sstevel@tonic-gate 			 * unless connected).
17840Sstevel@tonic-gate 			 */
17850Sstevel@tonic-gate 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
17860Sstevel@tonic-gate 			    tudi->OPT_length != 0) {
17870Sstevel@tonic-gate 				void *opt;
17880Sstevel@tonic-gate 				t_uscalar_t optlen = tudi->OPT_length;
17890Sstevel@tonic-gate 
17900Sstevel@tonic-gate 				opt = sogetoff(mp, tudi->OPT_offset,
17915753Sgww 				    optlen, __TPI_ALIGN_SIZE);
17920Sstevel@tonic-gate 				if (opt == NULL) {
17930Sstevel@tonic-gate 					/* The len/off falls outside mp */
17940Sstevel@tonic-gate 					freemsg(mp);
17950Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
17960Sstevel@tonic-gate 					soseterror(so, EPROTO);
17970Sstevel@tonic-gate 					mutex_exit(&so->so_lock);
17981548Srshoaib 					zcmn_err(getzoneid(), CE_WARN,
17990Sstevel@tonic-gate 					    "sockfs: T_unidata_ind with "
18000Sstevel@tonic-gate 					    "invalid optlen/offset %u/%d\n",
18010Sstevel@tonic-gate 					    optlen, tudi->OPT_offset);
18020Sstevel@tonic-gate 					return (NULL);
18030Sstevel@tonic-gate 				}
18040Sstevel@tonic-gate 				if (so_getopt_unix_close(opt, optlen)) {
18050Sstevel@tonic-gate 					freemsg(mp);
18060Sstevel@tonic-gate 					return (NULL);
18070Sstevel@tonic-gate 				}
18080Sstevel@tonic-gate 			}
18090Sstevel@tonic-gate 			*allmsgsigs = S_INPUT | S_RDNORM;
18100Sstevel@tonic-gate 			*pollwakeups = POLLIN | POLLRDNORM;
18110Sstevel@tonic-gate 			*wakeups = RSLEEP;
18120Sstevel@tonic-gate 			if (audit_active)
18130Sstevel@tonic-gate 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
18145753Sgww 				    mp, 0);
18150Sstevel@tonic-gate 			return (mp);
18160Sstevel@tonic-gate 		}
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate 		/*
18190Sstevel@tonic-gate 		 * A connect datagram socket. For AF_INET{,6} we verify that
18200Sstevel@tonic-gate 		 * the source address matches the "connected to" address.
18210Sstevel@tonic-gate 		 * The semantics of AF_UNIX sockets is to not verify
18220Sstevel@tonic-gate 		 * the source address.
18230Sstevel@tonic-gate 		 * Note that this source address verification is transport
18240Sstevel@tonic-gate 		 * specific. Thus the real fix would be to extent TPI
18250Sstevel@tonic-gate 		 * to allow T_CONN_REQ messages to be send to connectionless
18260Sstevel@tonic-gate 		 * transport providers and always let the transport provider
18270Sstevel@tonic-gate 		 * do whatever filtering is needed.
18280Sstevel@tonic-gate 		 *
18290Sstevel@tonic-gate 		 * The verification/filtering semantics for transports
18300Sstevel@tonic-gate 		 * other than AF_INET and AF_UNIX are unknown. The choice
18310Sstevel@tonic-gate 		 * would be to either filter using bcmp or let all messages
18320Sstevel@tonic-gate 		 * get through. This code does not filter other address
18330Sstevel@tonic-gate 		 * families since this at least allows the application to
18340Sstevel@tonic-gate 		 * work around any missing filtering.
18350Sstevel@tonic-gate 		 *
18360Sstevel@tonic-gate 		 * XXX Should we move filtering to UDP/ICMP???
18370Sstevel@tonic-gate 		 * That would require passing e.g. a T_DISCON_REQ to UDP
18380Sstevel@tonic-gate 		 * when the socket becomes unconnected.
18390Sstevel@tonic-gate 		 */
18400Sstevel@tonic-gate 		addrlen = tudi->SRC_length;
18410Sstevel@tonic-gate 		/*
18420Sstevel@tonic-gate 		 * The alignment restriction is really to strict but
18430Sstevel@tonic-gate 		 * we want enough alignment to inspect the fields of
18440Sstevel@tonic-gate 		 * a sockaddr_in.
18450Sstevel@tonic-gate 		 */
18460Sstevel@tonic-gate 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
18475753Sgww 		    __TPI_ALIGN_SIZE);
18480Sstevel@tonic-gate 		if (addr == NULL) {
18490Sstevel@tonic-gate 			freemsg(mp);
18500Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
18510Sstevel@tonic-gate 			soseterror(so, EPROTO);
18520Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
18531548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
18540Sstevel@tonic-gate 			    "sockfs: T_unidata_ind with invalid "
18550Sstevel@tonic-gate 			    "addrlen/offset %u/%d\n",
18560Sstevel@tonic-gate 			    addrlen, tudi->SRC_offset);
18570Sstevel@tonic-gate 			return (NULL);
18580Sstevel@tonic-gate 		}
18590Sstevel@tonic-gate 
18600Sstevel@tonic-gate 		if (so->so_family == AF_INET) {
18610Sstevel@tonic-gate 			/*
18620Sstevel@tonic-gate 			 * For AF_INET we allow wildcarding both sin_addr
18630Sstevel@tonic-gate 			 * and sin_port.
18640Sstevel@tonic-gate 			 */
18650Sstevel@tonic-gate 			struct sockaddr_in *faddr, *sin;
18660Sstevel@tonic-gate 
1867*8348SEric.Yu@Sun.COM 			/* Prevent sti_faddr_sa from changing while accessed */
18680Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
1869*8348SEric.Yu@Sun.COM 			ASSERT(sti->sti_faddr_len ==
18705753Sgww 			    (socklen_t)sizeof (struct sockaddr_in));
1871*8348SEric.Yu@Sun.COM 			faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
18720Sstevel@tonic-gate 			sin = (struct sockaddr_in *)addr;
18730Sstevel@tonic-gate 			if (addrlen !=
18745753Sgww 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
18750Sstevel@tonic-gate 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
18760Sstevel@tonic-gate 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
18770Sstevel@tonic-gate 			    (so->so_type != SOCK_RAW &&
18780Sstevel@tonic-gate 			    sin->sin_port != faddr->sin_port &&
18790Sstevel@tonic-gate 			    faddr->sin_port != 0)) {
18800Sstevel@tonic-gate #ifdef DEBUG
18810Sstevel@tonic-gate 				dprintso(so, 0,
18825753Sgww 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
18835753Sgww 				    pr_addr(so->so_family,
1884*8348SEric.Yu@Sun.COM 				    (struct sockaddr *)addr, addrlen)));
18850Sstevel@tonic-gate 				dprintso(so, 0, (" - %s\n",
1886*8348SEric.Yu@Sun.COM 				    pr_addr(so->so_family, sti->sti_faddr_sa,
1887*8348SEric.Yu@Sun.COM 				    (t_uscalar_t)sti->sti_faddr_len)));
18880Sstevel@tonic-gate #endif /* DEBUG */
18890Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
18900Sstevel@tonic-gate 				freemsg(mp);
18910Sstevel@tonic-gate 				return (NULL);
18920Sstevel@tonic-gate 			}
18930Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
18940Sstevel@tonic-gate 		} else if (so->so_family == AF_INET6) {
18950Sstevel@tonic-gate 			/*
18960Sstevel@tonic-gate 			 * For AF_INET6 we allow wildcarding both sin6_addr
18970Sstevel@tonic-gate 			 * and sin6_port.
18980Sstevel@tonic-gate 			 */
18990Sstevel@tonic-gate 			struct sockaddr_in6 *faddr6, *sin6;
19000Sstevel@tonic-gate 			static struct in6_addr zeroes; /* inits to all zeros */
19010Sstevel@tonic-gate 
1902*8348SEric.Yu@Sun.COM 			/* Prevent sti_faddr_sa from changing while accessed */
19030Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
1904*8348SEric.Yu@Sun.COM 			ASSERT(sti->sti_faddr_len ==
19050Sstevel@tonic-gate 			    (socklen_t)sizeof (struct sockaddr_in6));
1906*8348SEric.Yu@Sun.COM 			faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
19070Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)addr;
19080Sstevel@tonic-gate 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
19090Sstevel@tonic-gate 			if (addrlen !=
19100Sstevel@tonic-gate 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
19110Sstevel@tonic-gate 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
19125753Sgww 			    &faddr6->sin6_addr) &&
19130Sstevel@tonic-gate 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
19140Sstevel@tonic-gate 			    (so->so_type != SOCK_RAW &&
19150Sstevel@tonic-gate 			    sin6->sin6_port != faddr6->sin6_port &&
19160Sstevel@tonic-gate 			    faddr6->sin6_port != 0)) {
19170Sstevel@tonic-gate #ifdef DEBUG
19180Sstevel@tonic-gate 				dprintso(so, 0,
19190Sstevel@tonic-gate 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
19205753Sgww 				    pr_addr(so->so_family,
1921*8348SEric.Yu@Sun.COM 				    (struct sockaddr *)addr, addrlen)));
19220Sstevel@tonic-gate 				dprintso(so, 0, (" - %s\n",
1923*8348SEric.Yu@Sun.COM 				    pr_addr(so->so_family, sti->sti_faddr_sa,
1924*8348SEric.Yu@Sun.COM 				    (t_uscalar_t)sti->sti_faddr_len)));
19250Sstevel@tonic-gate #endif /* DEBUG */
19260Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
19270Sstevel@tonic-gate 				freemsg(mp);
19280Sstevel@tonic-gate 				return (NULL);
19290Sstevel@tonic-gate 			}
19300Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
19310Sstevel@tonic-gate 		} else if (so->so_family == AF_UNIX &&
19320Sstevel@tonic-gate 		    msgdsize(mp->b_cont) == 0 &&
19330Sstevel@tonic-gate 		    tudi->OPT_length != 0) {
19340Sstevel@tonic-gate 			/*
19350Sstevel@tonic-gate 			 * Attempt to extract AF_UNIX
19360Sstevel@tonic-gate 			 * SO_UNIX_CLOSE indication from options.
19370Sstevel@tonic-gate 			 */
19380Sstevel@tonic-gate 			void *opt;
19390Sstevel@tonic-gate 			t_uscalar_t optlen = tudi->OPT_length;
19400Sstevel@tonic-gate 
19410Sstevel@tonic-gate 			opt = sogetoff(mp, tudi->OPT_offset,
19425753Sgww 			    optlen, __TPI_ALIGN_SIZE);
19430Sstevel@tonic-gate 			if (opt == NULL) {
19440Sstevel@tonic-gate 				/* The len/off falls outside mp */
19450Sstevel@tonic-gate 				freemsg(mp);
19460Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
19470Sstevel@tonic-gate 				soseterror(so, EPROTO);
19480Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
19491548Srshoaib 				zcmn_err(getzoneid(), CE_WARN,
19500Sstevel@tonic-gate 				    "sockfs: T_unidata_ind with invalid "
19510Sstevel@tonic-gate 				    "optlen/offset %u/%d\n",
19520Sstevel@tonic-gate 				    optlen, tudi->OPT_offset);
19530Sstevel@tonic-gate 				return (NULL);
19540Sstevel@tonic-gate 			}
19550Sstevel@tonic-gate 			/*
19560Sstevel@tonic-gate 			 * If we received a unix close indication mark the
19570Sstevel@tonic-gate 			 * socket and discard this message.
19580Sstevel@tonic-gate 			 */
19590Sstevel@tonic-gate 			if (so_getopt_unix_close(opt, optlen)) {
19600Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
19610Sstevel@tonic-gate 				sobreakconn(so, ECONNRESET);
19620Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
19630Sstevel@tonic-gate 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
19640Sstevel@tonic-gate 				freemsg(mp);
19650Sstevel@tonic-gate 				*pollwakeups = POLLIN | POLLRDNORM;
19660Sstevel@tonic-gate 				*allmsgsigs = S_INPUT | S_RDNORM;
19670Sstevel@tonic-gate 				*wakeups = RSLEEP;
19680Sstevel@tonic-gate 				return (NULL);
19690Sstevel@tonic-gate 			}
19700Sstevel@tonic-gate 		}
19710Sstevel@tonic-gate 		*allmsgsigs = S_INPUT | S_RDNORM;
19720Sstevel@tonic-gate 		*pollwakeups = POLLIN | POLLRDNORM;
19730Sstevel@tonic-gate 		*wakeups = RSLEEP;
19740Sstevel@tonic-gate 		return (mp);
19750Sstevel@tonic-gate 	}
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate 	case T_OPTDATA_IND: {
19780Sstevel@tonic-gate 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
19790Sstevel@tonic-gate 
19800Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
19811548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
19820Sstevel@tonic-gate 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
19830Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
19840Sstevel@tonic-gate 			freemsg(mp);
19850Sstevel@tonic-gate 			return (NULL);
19860Sstevel@tonic-gate 		}
19870Sstevel@tonic-gate 		/*
19880Sstevel@tonic-gate 		 * Allow zero-length messages carrying options.
19890Sstevel@tonic-gate 		 * This is used when carrying the SO_UNIX_CLOSE option.
19900Sstevel@tonic-gate 		 */
19910Sstevel@tonic-gate 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
19920Sstevel@tonic-gate 		    tdi->OPT_length != 0) {
19930Sstevel@tonic-gate 			/*
19940Sstevel@tonic-gate 			 * Attempt to extract AF_UNIX close indication
19950Sstevel@tonic-gate 			 * from the options. Ignore any other options -
19960Sstevel@tonic-gate 			 * those are handled once the message is removed
19970Sstevel@tonic-gate 			 * from the queue.
19980Sstevel@tonic-gate 			 * The close indication message should not carry data.
19990Sstevel@tonic-gate 			 */
20000Sstevel@tonic-gate 			void *opt;
20010Sstevel@tonic-gate 			t_uscalar_t optlen = tdi->OPT_length;
20020Sstevel@tonic-gate 
20030Sstevel@tonic-gate 			opt = sogetoff(mp, tdi->OPT_offset,
20045753Sgww 			    optlen, __TPI_ALIGN_SIZE);
20050Sstevel@tonic-gate 			if (opt == NULL) {
20060Sstevel@tonic-gate 				/* The len/off falls outside mp */
20070Sstevel@tonic-gate 				freemsg(mp);
20080Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
20090Sstevel@tonic-gate 				soseterror(so, EPROTO);
20100Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
20111548Srshoaib 				zcmn_err(getzoneid(), CE_WARN,
20120Sstevel@tonic-gate 				    "sockfs: T_optdata_ind with invalid "
20130Sstevel@tonic-gate 				    "optlen/offset %u/%d\n",
20140Sstevel@tonic-gate 				    optlen, tdi->OPT_offset);
20150Sstevel@tonic-gate 				return (NULL);
20160Sstevel@tonic-gate 			}
20170Sstevel@tonic-gate 			/*
20180Sstevel@tonic-gate 			 * If we received a close indication mark the
20190Sstevel@tonic-gate 			 * socket and discard this message.
20200Sstevel@tonic-gate 			 */
20210Sstevel@tonic-gate 			if (so_getopt_unix_close(opt, optlen)) {
20220Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
20230Sstevel@tonic-gate 				socantsendmore(so);
2024*8348SEric.Yu@Sun.COM 				sti->sti_faddr_valid = 0;
20250Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
20260Sstevel@tonic-gate 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
20270Sstevel@tonic-gate 				freemsg(mp);
20280Sstevel@tonic-gate 				return (NULL);
20290Sstevel@tonic-gate 			}
20300Sstevel@tonic-gate 		}
20310Sstevel@tonic-gate 		*allmsgsigs = S_INPUT | S_RDNORM;
20320Sstevel@tonic-gate 		*pollwakeups = POLLIN | POLLRDNORM;
20330Sstevel@tonic-gate 		*wakeups = RSLEEP;
20340Sstevel@tonic-gate 		return (mp);
20350Sstevel@tonic-gate 	}
20360Sstevel@tonic-gate 
20370Sstevel@tonic-gate 	case T_EXDATA_IND: {
20380Sstevel@tonic-gate 		mblk_t		*mctl, *mdata;
20392933Sss146032 		mblk_t *lbp;
20402933Sss146032 		union T_primitives *tprp;
20412933Sss146032 		struct stdata   *stp;
20422933Sss146032 		queue_t *qp;
20430Sstevel@tonic-gate 
20440Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
20451548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
20460Sstevel@tonic-gate 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
20470Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
20480Sstevel@tonic-gate 			freemsg(mp);
20490Sstevel@tonic-gate 			return (NULL);
20500Sstevel@tonic-gate 		}
20510Sstevel@tonic-gate 		/*
20520Sstevel@tonic-gate 		 * Ignore zero-length T_EXDATA_IND messages. These might be
20530Sstevel@tonic-gate 		 * generated by some transports.
20540Sstevel@tonic-gate 		 *
20550Sstevel@tonic-gate 		 * This is needed to prevent read (which skips the M_PROTO
20560Sstevel@tonic-gate 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
20570Sstevel@tonic-gate 		 * on a non-blocking socket after select/poll has indicated
20580Sstevel@tonic-gate 		 * that data is available).
20590Sstevel@tonic-gate 		 */
20600Sstevel@tonic-gate 		dprintso(so, 1,
20615753Sgww 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2062*8348SEric.Yu@Sun.COM 		    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
20635753Sgww 		    pr_state(so->so_state, so->so_mode)));
20640Sstevel@tonic-gate 
20650Sstevel@tonic-gate 		if (msgdsize(mp->b_cont) == 0) {
20660Sstevel@tonic-gate 			dprintso(so, 0,
20675753Sgww 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
20680Sstevel@tonic-gate 			freemsg(mp);
20690Sstevel@tonic-gate 			return (NULL);
20700Sstevel@tonic-gate 		}
20710Sstevel@tonic-gate 
20720Sstevel@tonic-gate 		/*
20730Sstevel@tonic-gate 		 * Split into the T_EXDATA_IND and the M_DATA part.
20740Sstevel@tonic-gate 		 * We process these three pieces separately:
20750Sstevel@tonic-gate 		 *	signal generation
20760Sstevel@tonic-gate 		 *	handling T_EXDATA_IND
20770Sstevel@tonic-gate 		 *	handling M_DATA component
20780Sstevel@tonic-gate 		 */
20790Sstevel@tonic-gate 		mctl = mp;
20800Sstevel@tonic-gate 		mdata = mctl->b_cont;
20810Sstevel@tonic-gate 		mctl->b_cont = NULL;
20820Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
20830Sstevel@tonic-gate 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
20840Sstevel@tonic-gate 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
20850Sstevel@tonic-gate 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
20860Sstevel@tonic-gate 
20872933Sss146032 		stp = vp->v_stream;
20882933Sss146032 		ASSERT(stp != NULL);
20892933Sss146032 		qp = _RD(stp->sd_wrq);
20902933Sss146032 
20912933Sss146032 		mutex_enter(QLOCK(qp));
20922933Sss146032 		lbp = qp->q_last;
20932933Sss146032 
20942933Sss146032 		/*
20952933Sss146032 		 * We want to avoid queueing up a string of T_EXDATA_IND
20962933Sss146032 		 * messages with no intervening data messages at the stream
20972933Sss146032 		 * head. These messages contribute to the total message
20982933Sss146032 		 * count. Eventually this can lead to STREAMS flow contol
20992933Sss146032 		 * and also cause TCP to advertise a zero window condition
21002933Sss146032 		 * to the peer. This can happen in the degenerate case where
21012933Sss146032 		 * the sender and receiver exchange only OOB data. The sender
21022933Sss146032 		 * only sends messages with MSG_OOB flag and the receiver
21032933Sss146032 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
21042933Sss146032 		 * An example of this scenario has been reported in applications
21052933Sss146032 		 * that use OOB data to exchange heart beats. Flow control
21062933Sss146032 		 * relief will never happen if the application only reads OOB
21072933Sss146032 		 * data which is done directly by sorecvoob() and the
21082933Sss146032 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
21092933Sss146032 		 * Note that there is no correctness issue in compressing the
21102933Sss146032 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
21112933Sss146032 		 * message. A single read that does not specify MSG_OOB will
21122933Sss146032 		 * read across all the marks in a loop in sotpi_recvmsg().
21132933Sss146032 		 * Each mark is individually distinguishable only if the
21142933Sss146032 		 * T_EXDATA_IND messages are separated by data messages.
21152933Sss146032 		 */
21162933Sss146032 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
21172933Sss146032 			tprp = (union T_primitives *)lbp->b_rptr;
21182933Sss146032 			if ((tprp->type == T_EXDATA_IND) &&
21192933Sss146032 			    !(so->so_options & SO_OOBINLINE)) {
21202933Sss146032 
21212933Sss146032 				/*
21222933Sss146032 				 * free the new M_PROTO message
21232933Sss146032 				 */
21242933Sss146032 				freemsg(mctl);
21252933Sss146032 
21262933Sss146032 				/*
21272933Sss146032 				 * adjust the OOB count and OOB	signal count
21282933Sss146032 				 * just incremented for the new OOB data.
21292933Sss146032 				 */
2130*8348SEric.Yu@Sun.COM 				sti->sti_oobcnt--;
2131*8348SEric.Yu@Sun.COM 				sti->sti_oobsigcnt--;
21322933Sss146032 				mutex_exit(QLOCK(qp));
21332933Sss146032 				mutex_exit(&so->so_lock);
21342933Sss146032 				return (NULL);
21352933Sss146032 			}
21362933Sss146032 		}
21372933Sss146032 		mutex_exit(QLOCK(qp));
21382933Sss146032 
21390Sstevel@tonic-gate 		/*
21400Sstevel@tonic-gate 		 * Pass the T_EXDATA_IND and the M_DATA back separately
21410Sstevel@tonic-gate 		 * by using b_next linkage. (The stream head will queue any
21420Sstevel@tonic-gate 		 * b_next linked messages separately.) This is needed
21430Sstevel@tonic-gate 		 * since MSGMARK applies to the last by of the message
21440Sstevel@tonic-gate 		 * hence we can not have any M_DATA component attached
21450Sstevel@tonic-gate 		 * to the marked T_EXDATA_IND. Note that the stream head
21460Sstevel@tonic-gate 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
21470Sstevel@tonic-gate 		 * message in order to preserve the constraint that
21480Sstevel@tonic-gate 		 * the T_EXDATA_IND always is a separate message.
21490Sstevel@tonic-gate 		 */
21500Sstevel@tonic-gate 		ASSERT(mctl != NULL);
21510Sstevel@tonic-gate 		mctl->b_next = mdata;
21520Sstevel@tonic-gate 		mp = mctl;
21530Sstevel@tonic-gate #ifdef DEBUG
21540Sstevel@tonic-gate 		if (mdata == NULL) {
21550Sstevel@tonic-gate 			dprintso(so, 1,
21565753Sgww 			    ("after outofline T_EXDATA_IND(%p): "
21575753Sgww 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2158*8348SEric.Yu@Sun.COM 			    (void *)vp, sti->sti_oobsigcnt,
2159*8348SEric.Yu@Sun.COM 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
21605753Sgww 			    pr_state(so->so_state, so->so_mode)));
21610Sstevel@tonic-gate 		} else {
21620Sstevel@tonic-gate 			dprintso(so, 1,
21635753Sgww 			    ("after inline T_EXDATA_IND(%p): "
21645753Sgww 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2165*8348SEric.Yu@Sun.COM 			    (void *)vp, sti->sti_oobsigcnt,
2166*8348SEric.Yu@Sun.COM 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
21675753Sgww 			    pr_state(so->so_state, so->so_mode)));
21680Sstevel@tonic-gate 		}
21690Sstevel@tonic-gate #endif /* DEBUG */
21700Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
21710Sstevel@tonic-gate 		*wakeups = RSLEEP;
21720Sstevel@tonic-gate 		return (mp);
21730Sstevel@tonic-gate 	}
21740Sstevel@tonic-gate 
21750Sstevel@tonic-gate 	case T_CONN_CON: {
21760Sstevel@tonic-gate 		struct T_conn_con	*conn_con;
21770Sstevel@tonic-gate 		void			*addr;
21780Sstevel@tonic-gate 		t_uscalar_t		addrlen;
21790Sstevel@tonic-gate 
21800Sstevel@tonic-gate 		/*
21810Sstevel@tonic-gate 		 * Verify the state, update the state to ISCONNECTED,
21820Sstevel@tonic-gate 		 * record the potentially new address in the message,
21830Sstevel@tonic-gate 		 * and drop the message.
21840Sstevel@tonic-gate 		 */
21850Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
21861548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
21870Sstevel@tonic-gate 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
21880Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
21890Sstevel@tonic-gate 			freemsg(mp);
21900Sstevel@tonic-gate 			return (NULL);
21910Sstevel@tonic-gate 		}
21920Sstevel@tonic-gate 
21930Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
21940Sstevel@tonic-gate 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
21950Sstevel@tonic-gate 		    SS_ISCONNECTING) {
21960Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
21970Sstevel@tonic-gate 			dprintso(so, 1,
21985753Sgww 			    ("T_CONN_CON: state %x\n", so->so_state));
21990Sstevel@tonic-gate 			freemsg(mp);
22000Sstevel@tonic-gate 			return (NULL);
22010Sstevel@tonic-gate 		}
22020Sstevel@tonic-gate 
22030Sstevel@tonic-gate 		conn_con = &tpr->conn_con;
22040Sstevel@tonic-gate 		addrlen = conn_con->RES_length;
22050Sstevel@tonic-gate 		/*
22060Sstevel@tonic-gate 		 * Allow the address to be of different size than sent down
22070Sstevel@tonic-gate 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
22080Sstevel@tonic-gate 		 * For AF_UNIX require the identical length.
22090Sstevel@tonic-gate 		 */
22100Sstevel@tonic-gate 		if (so->so_family == AF_UNIX ?
2211*8348SEric.Yu@Sun.COM 		    addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
2212*8348SEric.Yu@Sun.COM 		    addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
22131548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
22140Sstevel@tonic-gate 			    "sockfs: T_conn_con with different "
22150Sstevel@tonic-gate 			    "length %u/%d\n",
22160Sstevel@tonic-gate 			    addrlen, conn_con->RES_length);
22170Sstevel@tonic-gate 			soisdisconnected(so, EPROTO);
2218*8348SEric.Yu@Sun.COM 			sti->sti_laddr_valid = 0;
2219*8348SEric.Yu@Sun.COM 			sti->sti_faddr_valid = 0;
22200Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
22210Sstevel@tonic-gate 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
22220Sstevel@tonic-gate 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
22230Sstevel@tonic-gate 			strseteof(SOTOV(so), 1);
22240Sstevel@tonic-gate 			freemsg(mp);
22250Sstevel@tonic-gate 			/*
22260Sstevel@tonic-gate 			 * strseteof takes care of read side wakeups,
22270Sstevel@tonic-gate 			 * pollwakeups, and signals.
22280Sstevel@tonic-gate 			 */
22290Sstevel@tonic-gate 			*wakeups = WSLEEP;
22300Sstevel@tonic-gate 			*allmsgsigs = S_OUTPUT;
22310Sstevel@tonic-gate 			*pollwakeups = POLLOUT;
22320Sstevel@tonic-gate 			return (NULL);
22330Sstevel@tonic-gate 		}
22340Sstevel@tonic-gate 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
22350Sstevel@tonic-gate 		if (addr == NULL) {
22361548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
22370Sstevel@tonic-gate 			    "sockfs: T_conn_con with invalid "
22380Sstevel@tonic-gate 			    "addrlen/offset %u/%d\n",
22390Sstevel@tonic-gate 			    addrlen, conn_con->RES_offset);
22400Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
22410Sstevel@tonic-gate 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
22420Sstevel@tonic-gate 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
22430Sstevel@tonic-gate 			strseteof(SOTOV(so), 1);
22440Sstevel@tonic-gate 			freemsg(mp);
22450Sstevel@tonic-gate 			/*
22460Sstevel@tonic-gate 			 * strseteof takes care of read side wakeups,
22470Sstevel@tonic-gate 			 * pollwakeups, and signals.
22480Sstevel@tonic-gate 			 */
22490Sstevel@tonic-gate 			*wakeups = WSLEEP;
22500Sstevel@tonic-gate 			*allmsgsigs = S_OUTPUT;
22510Sstevel@tonic-gate 			*pollwakeups = POLLOUT;
22520Sstevel@tonic-gate 			return (NULL);
22530Sstevel@tonic-gate 		}
22540Sstevel@tonic-gate 
22550Sstevel@tonic-gate 		/*
22560Sstevel@tonic-gate 		 * Save for getpeername.
22570Sstevel@tonic-gate 		 */
22580Sstevel@tonic-gate 		if (so->so_family != AF_UNIX) {
2259*8348SEric.Yu@Sun.COM 			sti->sti_faddr_len = (socklen_t)addrlen;
2260*8348SEric.Yu@Sun.COM 			ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
2261*8348SEric.Yu@Sun.COM 			bcopy(addr, sti->sti_faddr_sa, addrlen);
2262*8348SEric.Yu@Sun.COM 			sti->sti_faddr_valid = 1;
22630Sstevel@tonic-gate 		}
22640Sstevel@tonic-gate 
22650Sstevel@tonic-gate 		if (so->so_peercred != NULL)
22660Sstevel@tonic-gate 			crfree(so->so_peercred);
22670Sstevel@tonic-gate 		so->so_peercred = DB_CRED(mp);
22680Sstevel@tonic-gate 		so->so_cpid = DB_CPID(mp);
22690Sstevel@tonic-gate 		if (so->so_peercred != NULL)
22700Sstevel@tonic-gate 			crhold(so->so_peercred);
22710Sstevel@tonic-gate 
22720Sstevel@tonic-gate 		/* Wakeup anybody sleeping in sowaitconnected */
22730Sstevel@tonic-gate 		soisconnected(so);
22740Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
22750Sstevel@tonic-gate 
22760Sstevel@tonic-gate 		/*
22770Sstevel@tonic-gate 		 * The socket is now available for sending data.
22780Sstevel@tonic-gate 		 */
22790Sstevel@tonic-gate 		*wakeups = WSLEEP;
22800Sstevel@tonic-gate 		*allmsgsigs = S_OUTPUT;
22810Sstevel@tonic-gate 		*pollwakeups = POLLOUT;
22820Sstevel@tonic-gate 		freemsg(mp);
22830Sstevel@tonic-gate 		return (NULL);
22840Sstevel@tonic-gate 	}
22850Sstevel@tonic-gate 
2286898Skais 	/*
2287898Skais 	 * Extra processing in case of an SSL proxy, before queuing or
2288898Skais 	 * forwarding to the fallback endpoint
2289898Skais 	 */
2290898Skais 	case T_SSL_PROXY_CONN_IND:
22910Sstevel@tonic-gate 	case T_CONN_IND:
22920Sstevel@tonic-gate 		/*
22930Sstevel@tonic-gate 		 * Verify the min size and queue the message on
2294*8348SEric.Yu@Sun.COM 		 * the sti_conn_ind_head/tail list.
22950Sstevel@tonic-gate 		 */
22960Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
22971548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
22980Sstevel@tonic-gate 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
22990Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
23000Sstevel@tonic-gate 			freemsg(mp);
23010Sstevel@tonic-gate 			return (NULL);
23020Sstevel@tonic-gate 		}
23030Sstevel@tonic-gate 
23040Sstevel@tonic-gate 		if (audit_active)
23050Sstevel@tonic-gate 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
23060Sstevel@tonic-gate 		if (!(so->so_state & SS_ACCEPTCONN)) {
23071548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
23080Sstevel@tonic-gate 			    "sockfs: T_conn_ind on non-listening socket\n");
23090Sstevel@tonic-gate 			freemsg(mp);
23100Sstevel@tonic-gate 			return (NULL);
23110Sstevel@tonic-gate 		}
2312898Skais 
2313898Skais 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2314898Skais 			/* No context: need to fall back */
2315898Skais 			struct sonode *fbso;
2316898Skais 			stdata_t *fbstp;
2317898Skais 
2318898Skais 			tpr->type = T_CONN_IND;
2319898Skais 
2320*8348SEric.Yu@Sun.COM 			fbso = kssl_find_fallback(sti->sti_kssl_ent);
2321898Skais 
2322898Skais 			/*
2323898Skais 			 * No fallback: the remote will timeout and
2324898Skais 			 * disconnect.
2325898Skais 			 */
2326898Skais 			if (fbso == NULL) {
2327898Skais 				freemsg(mp);
2328898Skais 				return (NULL);
2329898Skais 			}
2330898Skais 			fbstp = SOTOV(fbso)->v_stream;
2331898Skais 			qreply(fbstp->sd_wrq->q_next, mp);
2332898Skais 			return (NULL);
2333898Skais 		}
23340Sstevel@tonic-gate 		soqueueconnind(so, mp);
23350Sstevel@tonic-gate 		*allmsgsigs = S_INPUT | S_RDNORM;
23360Sstevel@tonic-gate 		*pollwakeups = POLLIN | POLLRDNORM;
23370Sstevel@tonic-gate 		*wakeups = RSLEEP;
23380Sstevel@tonic-gate 		return (NULL);
23390Sstevel@tonic-gate 
23400Sstevel@tonic-gate 	case T_ORDREL_IND:
23410Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
23421548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
23430Sstevel@tonic-gate 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
23440Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
23450Sstevel@tonic-gate 			freemsg(mp);
23460Sstevel@tonic-gate 			return (NULL);
23470Sstevel@tonic-gate 		}
23480Sstevel@tonic-gate 
23490Sstevel@tonic-gate 		/*
23500Sstevel@tonic-gate 		 * Some providers send this when not fully connected.
23510Sstevel@tonic-gate 		 * SunLink X.25 needs to retrieve disconnect reason after
23520Sstevel@tonic-gate 		 * disconnect for compatibility. It uses T_ORDREL_IND
23530Sstevel@tonic-gate 		 * instead of T_DISCON_IND so that it may use the
23540Sstevel@tonic-gate 		 * endpoint after a connect failure to retrieve the
23550Sstevel@tonic-gate 		 * reason using an ioctl. Thus we explicitly clear
23560Sstevel@tonic-gate 		 * SS_ISCONNECTING here for SunLink X.25.
23570Sstevel@tonic-gate 		 * This is a needed TPI violation.
23580Sstevel@tonic-gate 		 */
23590Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
23600Sstevel@tonic-gate 		so->so_state &= ~SS_ISCONNECTING;
23610Sstevel@tonic-gate 		socantrcvmore(so);
23620Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
23630Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
23640Sstevel@tonic-gate 		/*
23650Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
23660Sstevel@tonic-gate 		 * pollwakeups, and signals.
23670Sstevel@tonic-gate 		 */
23680Sstevel@tonic-gate 		freemsg(mp);
23690Sstevel@tonic-gate 		return (NULL);
23700Sstevel@tonic-gate 
23710Sstevel@tonic-gate 	case T_DISCON_IND:
23720Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
23731548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
23740Sstevel@tonic-gate 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
23750Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
23760Sstevel@tonic-gate 			freemsg(mp);
23770Sstevel@tonic-gate 			return (NULL);
23780Sstevel@tonic-gate 		}
23790Sstevel@tonic-gate 		if (so->so_state & SS_ACCEPTCONN) {
23800Sstevel@tonic-gate 			/*
23810Sstevel@tonic-gate 			 * This is a listener. Look for a queued T_CONN_IND
23820Sstevel@tonic-gate 			 * with a matching sequence number and remove it
23830Sstevel@tonic-gate 			 * from the list.
23840Sstevel@tonic-gate 			 * It is normal to not find the sequence number since
23850Sstevel@tonic-gate 			 * the soaccept might have already dequeued it
23860Sstevel@tonic-gate 			 * (in which case the T_CONN_RES will fail with
23870Sstevel@tonic-gate 			 * TBADSEQ).
23880Sstevel@tonic-gate 			 */
23890Sstevel@tonic-gate 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
23900Sstevel@tonic-gate 			freemsg(mp);
23910Sstevel@tonic-gate 			return (0);
23920Sstevel@tonic-gate 		}
23930Sstevel@tonic-gate 
23940Sstevel@tonic-gate 		/*
23950Sstevel@tonic-gate 		 * Not a listener
23960Sstevel@tonic-gate 		 *
23970Sstevel@tonic-gate 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
23980Sstevel@tonic-gate 		 * Such a discon_ind appears when the peer has first done
23990Sstevel@tonic-gate 		 * a shutdown() followed by a close() in which case we just
24000Sstevel@tonic-gate 		 * want to record socantsendmore.
24010Sstevel@tonic-gate 		 * In this case sockfs first receives a T_ORDREL_IND followed
24020Sstevel@tonic-gate 		 * by a T_DISCON_IND.
24030Sstevel@tonic-gate 		 * Note that for other transports (e.g. TCP) we need to handle
24040Sstevel@tonic-gate 		 * the discon_ind in this case since it signals an error.
24050Sstevel@tonic-gate 		 */
24060Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24070Sstevel@tonic-gate 		if ((so->so_state & SS_CANTRCVMORE) &&
24080Sstevel@tonic-gate 		    (so->so_family == AF_UNIX)) {
24090Sstevel@tonic-gate 			socantsendmore(so);
2410*8348SEric.Yu@Sun.COM 			sti->sti_faddr_valid = 0;
24110Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
24120Sstevel@tonic-gate 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
24130Sstevel@tonic-gate 			dprintso(so, 1,
24145753Sgww 			    ("T_DISCON_IND: error %d\n", so->so_error));
24150Sstevel@tonic-gate 			freemsg(mp);
24160Sstevel@tonic-gate 			/*
24170Sstevel@tonic-gate 			 * Set these variables for caller to process them.
24180Sstevel@tonic-gate 			 * For the else part where T_DISCON_IND is processed,
24190Sstevel@tonic-gate 			 * this will be done in the function being called
24200Sstevel@tonic-gate 			 * (strsock_discon_ind())
24210Sstevel@tonic-gate 			 */
24220Sstevel@tonic-gate 			*wakeups = WSLEEP;
24230Sstevel@tonic-gate 			*allmsgsigs = S_OUTPUT;
24240Sstevel@tonic-gate 			*pollwakeups = POLLOUT;
24250Sstevel@tonic-gate 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
24260Sstevel@tonic-gate 			/*
24270Sstevel@tonic-gate 			 * Deferred processing of T_DISCON_IND
24280Sstevel@tonic-gate 			 */
24290Sstevel@tonic-gate 			so_save_discon_ind(so, mp, strsock_discon_ind);
24300Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
24310Sstevel@tonic-gate 		} else {
24320Sstevel@tonic-gate 			/*
24330Sstevel@tonic-gate 			 * Process T_DISCON_IND now
24340Sstevel@tonic-gate 			 */
24350Sstevel@tonic-gate 			(void) strsock_discon_ind(so, mp);
24360Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
24370Sstevel@tonic-gate 		}
24380Sstevel@tonic-gate 		return (NULL);
24390Sstevel@tonic-gate 
24400Sstevel@tonic-gate 	case T_UDERROR_IND: {
24410Sstevel@tonic-gate 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
24420Sstevel@tonic-gate 		void			*addr;
24430Sstevel@tonic-gate 		t_uscalar_t		addrlen;
24440Sstevel@tonic-gate 		int			error;
24450Sstevel@tonic-gate 
24460Sstevel@tonic-gate 		dprintso(so, 0,
24475753Sgww 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
24480Sstevel@tonic-gate 
24490Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
24501548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
24510Sstevel@tonic-gate 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
24520Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
24530Sstevel@tonic-gate 			freemsg(mp);
24540Sstevel@tonic-gate 			return (NULL);
24550Sstevel@tonic-gate 		}
24560Sstevel@tonic-gate 		/* Ignore on connection-oriented transports */
24570Sstevel@tonic-gate 		if (so->so_mode & SM_CONNREQUIRED) {
24580Sstevel@tonic-gate 			freemsg(mp);
24590Sstevel@tonic-gate 			eprintsoline(so, 0);
24601548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
24610Sstevel@tonic-gate 			    "sockfs: T_uderror_ind on connection-oriented "
24620Sstevel@tonic-gate 			    "transport\n");
24630Sstevel@tonic-gate 			return (NULL);
24640Sstevel@tonic-gate 		}
24650Sstevel@tonic-gate 		addrlen = tudi->DEST_length;
24660Sstevel@tonic-gate 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
24670Sstevel@tonic-gate 		if (addr == NULL) {
24681548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
24690Sstevel@tonic-gate 			    "sockfs: T_uderror_ind with invalid "
24700Sstevel@tonic-gate 			    "addrlen/offset %u/%d\n",
24710Sstevel@tonic-gate 			    addrlen, tudi->DEST_offset);
24720Sstevel@tonic-gate 			freemsg(mp);
24730Sstevel@tonic-gate 			return (NULL);
24740Sstevel@tonic-gate 		}
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 		/* Verify source address for connected socket. */
24770Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24780Sstevel@tonic-gate 		if (so->so_state & SS_ISCONNECTED) {
24790Sstevel@tonic-gate 			void *faddr;
24800Sstevel@tonic-gate 			t_uscalar_t faddr_len;
24810Sstevel@tonic-gate 			boolean_t match = B_FALSE;
24820Sstevel@tonic-gate 
24830Sstevel@tonic-gate 			switch (so->so_family) {
24840Sstevel@tonic-gate 			case AF_INET: {
24850Sstevel@tonic-gate 				/* Compare just IP address and port */
24860Sstevel@tonic-gate 				struct sockaddr_in *sin1, *sin2;
24870Sstevel@tonic-gate 
2488*8348SEric.Yu@Sun.COM 				sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
24890Sstevel@tonic-gate 				sin2 = (struct sockaddr_in *)addr;
24900Sstevel@tonic-gate 				if (addrlen == sizeof (struct sockaddr_in) &&
24910Sstevel@tonic-gate 				    sin1->sin_port == sin2->sin_port &&
24920Sstevel@tonic-gate 				    sin1->sin_addr.s_addr ==
24930Sstevel@tonic-gate 				    sin2->sin_addr.s_addr)
24940Sstevel@tonic-gate 					match = B_TRUE;
24950Sstevel@tonic-gate 				break;
24960Sstevel@tonic-gate 			}
24970Sstevel@tonic-gate 			case AF_INET6: {
24980Sstevel@tonic-gate 				/* Compare just IP address and port. Not flow */
24990Sstevel@tonic-gate 				struct sockaddr_in6 *sin1, *sin2;
25000Sstevel@tonic-gate 
2501*8348SEric.Yu@Sun.COM 				sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
25020Sstevel@tonic-gate 				sin2 = (struct sockaddr_in6 *)addr;
25030Sstevel@tonic-gate 				if (addrlen == sizeof (struct sockaddr_in6) &&
25040Sstevel@tonic-gate 				    sin1->sin6_port == sin2->sin6_port &&
25050Sstevel@tonic-gate 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
25065753Sgww 				    &sin2->sin6_addr))
25070Sstevel@tonic-gate 					match = B_TRUE;
25080Sstevel@tonic-gate 				break;
25090Sstevel@tonic-gate 			}
25100Sstevel@tonic-gate 			case AF_UNIX:
2511*8348SEric.Yu@Sun.COM 				faddr = &sti->sti_ux_faddr;
25120Sstevel@tonic-gate 				faddr_len =
2513*8348SEric.Yu@Sun.COM 				    (t_uscalar_t)sizeof (sti->sti_ux_faddr);
25140Sstevel@tonic-gate 				if (faddr_len == addrlen &&
25150Sstevel@tonic-gate 				    bcmp(addr, faddr, addrlen) == 0)
25160Sstevel@tonic-gate 					match = B_TRUE;
25170Sstevel@tonic-gate 				break;
25180Sstevel@tonic-gate 			default:
2519*8348SEric.Yu@Sun.COM 				faddr = sti->sti_faddr_sa;
2520*8348SEric.Yu@Sun.COM 				faddr_len = (t_uscalar_t)sti->sti_faddr_len;
25210Sstevel@tonic-gate 				if (faddr_len == addrlen &&
25220Sstevel@tonic-gate 				    bcmp(addr, faddr, addrlen) == 0)
25230Sstevel@tonic-gate 					match = B_TRUE;
25240Sstevel@tonic-gate 				break;
25250Sstevel@tonic-gate 			}
25260Sstevel@tonic-gate 
25270Sstevel@tonic-gate 			if (!match) {
25280Sstevel@tonic-gate #ifdef DEBUG
25290Sstevel@tonic-gate 				dprintso(so, 0,
25305753Sgww 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
25315753Sgww 				    pr_addr(so->so_family,
2532*8348SEric.Yu@Sun.COM 				    (struct sockaddr *)addr, addrlen)));
25330Sstevel@tonic-gate 				dprintso(so, 0, ("%s\n",
2534*8348SEric.Yu@Sun.COM 				    pr_addr(so->so_family, sti->sti_faddr_sa,
2535*8348SEric.Yu@Sun.COM 				    sti->sti_faddr_len)));
25360Sstevel@tonic-gate #endif /* DEBUG */
25370Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
25380Sstevel@tonic-gate 				freemsg(mp);
25390Sstevel@tonic-gate 				return (NULL);
25400Sstevel@tonic-gate 			}
25410Sstevel@tonic-gate 			/*
25420Sstevel@tonic-gate 			 * Make the write error nonpersistent. If the error
25430Sstevel@tonic-gate 			 * is zero we use ECONNRESET.
25440Sstevel@tonic-gate 			 * This assumes that the name space for ERROR_type
25450Sstevel@tonic-gate 			 * is the errno name space.
25460Sstevel@tonic-gate 			 */
25470Sstevel@tonic-gate 			if (tudi->ERROR_type != 0)
25480Sstevel@tonic-gate 				error = tudi->ERROR_type;
25490Sstevel@tonic-gate 			else
25500Sstevel@tonic-gate 				error = ECONNRESET;
25510Sstevel@tonic-gate 
25520Sstevel@tonic-gate 			soseterror(so, error);
25530Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
25540Sstevel@tonic-gate 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
25550Sstevel@tonic-gate 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
25560Sstevel@tonic-gate 			*wakeups = RSLEEP | WSLEEP;
25570Sstevel@tonic-gate 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
25580Sstevel@tonic-gate 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
25590Sstevel@tonic-gate 			freemsg(mp);
25600Sstevel@tonic-gate 			return (NULL);
25610Sstevel@tonic-gate 		}
25620Sstevel@tonic-gate 		/*
25630Sstevel@tonic-gate 		 * If the application asked for delayed errors
2564*8348SEric.Yu@Sun.COM 		 * record the T_UDERROR_IND sti_eaddr_mp and the reason in
2565*8348SEric.Yu@Sun.COM 		 * sti_delayed_error for delayed error posting. If the reason
25660Sstevel@tonic-gate 		 * is zero use ECONNRESET.
25670Sstevel@tonic-gate 		 * Note that delayed error indications do not make sense for
25680Sstevel@tonic-gate 		 * AF_UNIX sockets since sendto checks that the destination
25690Sstevel@tonic-gate 		 * address is valid at the time of the sendto.
25700Sstevel@tonic-gate 		 */
25710Sstevel@tonic-gate 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
25720Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
25730Sstevel@tonic-gate 			freemsg(mp);
25740Sstevel@tonic-gate 			return (NULL);
25750Sstevel@tonic-gate 		}
2576*8348SEric.Yu@Sun.COM 		if (sti->sti_eaddr_mp != NULL)
2577*8348SEric.Yu@Sun.COM 			freemsg(sti->sti_eaddr_mp);
25780Sstevel@tonic-gate 
2579*8348SEric.Yu@Sun.COM 		sti->sti_eaddr_mp = mp;
25800Sstevel@tonic-gate 		if (tudi->ERROR_type != 0)
25810Sstevel@tonic-gate 			error = tudi->ERROR_type;
25820Sstevel@tonic-gate 		else
25830Sstevel@tonic-gate 			error = ECONNRESET;
2584*8348SEric.Yu@Sun.COM 		sti->sti_delayed_error = (ushort_t)error;
25850Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
25860Sstevel@tonic-gate 		return (NULL);
25870Sstevel@tonic-gate 	}
25880Sstevel@tonic-gate 
25890Sstevel@tonic-gate 	case T_ERROR_ACK:
25900Sstevel@tonic-gate 		dprintso(so, 0,
25915753Sgww 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
25925753Sgww 		    tpr->error_ack.ERROR_prim,
25935753Sgww 		    tpr->error_ack.TLI_error,
25945753Sgww 		    tpr->error_ack.UNIX_error));
25950Sstevel@tonic-gate 
25960Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
25971548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
25980Sstevel@tonic-gate 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
25990Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26000Sstevel@tonic-gate 			freemsg(mp);
26010Sstevel@tonic-gate 			return (NULL);
26020Sstevel@tonic-gate 		}
26030Sstevel@tonic-gate 		/*
26040Sstevel@tonic-gate 		 * Check if we were waiting for the async message
26050Sstevel@tonic-gate 		 */
26060Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
26070Sstevel@tonic-gate 		if ((so->so_flag & SOASYNC_UNBIND) &&
26080Sstevel@tonic-gate 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
26090Sstevel@tonic-gate 			so_unlock_single(so, SOASYNC_UNBIND);
26100Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
26110Sstevel@tonic-gate 			freemsg(mp);
26120Sstevel@tonic-gate 			return (NULL);
26130Sstevel@tonic-gate 		}
26140Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
26150Sstevel@tonic-gate 		soqueueack(so, mp);
26160Sstevel@tonic-gate 		return (NULL);
26170Sstevel@tonic-gate 
26180Sstevel@tonic-gate 	case T_OK_ACK:
26190Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
26201548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
26210Sstevel@tonic-gate 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
26220Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26230Sstevel@tonic-gate 			freemsg(mp);
26240Sstevel@tonic-gate 			return (NULL);
26250Sstevel@tonic-gate 		}
26260Sstevel@tonic-gate 		/*
26270Sstevel@tonic-gate 		 * Check if we were waiting for the async message
26280Sstevel@tonic-gate 		 */
26290Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
26300Sstevel@tonic-gate 		if ((so->so_flag & SOASYNC_UNBIND) &&
26310Sstevel@tonic-gate 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
26320Sstevel@tonic-gate 			dprintso(so, 1,
26335753Sgww 			    ("strsock_proto: T_OK_ACK async unbind\n"));
26340Sstevel@tonic-gate 			so_unlock_single(so, SOASYNC_UNBIND);
26350Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
26360Sstevel@tonic-gate 			freemsg(mp);
26370Sstevel@tonic-gate 			return (NULL);
26380Sstevel@tonic-gate 		}
26390Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
26400Sstevel@tonic-gate 		soqueueack(so, mp);
26410Sstevel@tonic-gate 		return (NULL);
26420Sstevel@tonic-gate 
26430Sstevel@tonic-gate 	case T_INFO_ACK:
26440Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
26451548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
26460Sstevel@tonic-gate 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
26470Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26480Sstevel@tonic-gate 			freemsg(mp);
26490Sstevel@tonic-gate 			return (NULL);
26500Sstevel@tonic-gate 		}
26510Sstevel@tonic-gate 		soqueueack(so, mp);
26520Sstevel@tonic-gate 		return (NULL);
26530Sstevel@tonic-gate 
26540Sstevel@tonic-gate 	case T_CAPABILITY_ACK:
26550Sstevel@tonic-gate 		/*
26560Sstevel@tonic-gate 		 * A T_capability_ack need only be large enough to hold
26570Sstevel@tonic-gate 		 * the PRIM_type and CAP_bits1 fields; checking for anything
26580Sstevel@tonic-gate 		 * larger might reject a correct response from an older
26590Sstevel@tonic-gate 		 * provider.
26600Sstevel@tonic-gate 		 */
26610Sstevel@tonic-gate 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
26621548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
26630Sstevel@tonic-gate 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
26640Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26650Sstevel@tonic-gate 			freemsg(mp);
26660Sstevel@tonic-gate 			return (NULL);
26670Sstevel@tonic-gate 		}
26680Sstevel@tonic-gate 		soqueueack(so, mp);
26690Sstevel@tonic-gate 		return (NULL);
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 	case T_BIND_ACK:
26720Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
26731548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
26740Sstevel@tonic-gate 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
26750Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26760Sstevel@tonic-gate 			freemsg(mp);
26770Sstevel@tonic-gate 			return (NULL);
26780Sstevel@tonic-gate 		}
26790Sstevel@tonic-gate 		soqueueack(so, mp);
26800Sstevel@tonic-gate 		return (NULL);
26810Sstevel@tonic-gate 
26820Sstevel@tonic-gate 	case T_OPTMGMT_ACK:
26830Sstevel@tonic-gate 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
26841548Srshoaib 			zcmn_err(getzoneid(), CE_WARN,
26850Sstevel@tonic-gate 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
26860Sstevel@tonic-gate 			    (ptrdiff_t)(MBLKL(mp)));
26870Sstevel@tonic-gate 			freemsg(mp);
26880Sstevel@tonic-gate 			return (NULL);
26890Sstevel@tonic-gate 		}
26900Sstevel@tonic-gate 		soqueueack(so, mp);
26910Sstevel@tonic-gate 		return (NULL);
26920Sstevel@tonic-gate 	default:
26930Sstevel@tonic-gate #ifdef DEBUG
26941548Srshoaib 		zcmn_err(getzoneid(), CE_WARN,
26955753Sgww 		    "sockfs: unknown TPI primitive %d received\n",
26965753Sgww 		    tpr->type);
26970Sstevel@tonic-gate #endif /* DEBUG */
26980Sstevel@tonic-gate 		freemsg(mp);
26990Sstevel@tonic-gate 		return (NULL);
27000Sstevel@tonic-gate 	}
27010Sstevel@tonic-gate }
27020Sstevel@tonic-gate 
27030Sstevel@tonic-gate /*
27040Sstevel@tonic-gate  * This routine is registered with the stream head to receive other
27050Sstevel@tonic-gate  * (non-data, and non-proto) messages.
27060Sstevel@tonic-gate  *
27070Sstevel@tonic-gate  * Returns NULL if the message was consumed.
27080Sstevel@tonic-gate  * Returns an mblk to make that mblk be processed by the stream head.
27090Sstevel@tonic-gate  *
27100Sstevel@tonic-gate  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
27110Sstevel@tonic-gate  * *pollwakeups) for the stream head to take action on.
27120Sstevel@tonic-gate  */
27130Sstevel@tonic-gate static mblk_t *
27140Sstevel@tonic-gate strsock_misc(vnode_t *vp, mblk_t *mp,
27150Sstevel@tonic-gate 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
27160Sstevel@tonic-gate 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
27170Sstevel@tonic-gate {
27180Sstevel@tonic-gate 	struct sonode *so;
2719*8348SEric.Yu@Sun.COM 	sotpi_info_t *sti;
27200Sstevel@tonic-gate 
27210Sstevel@tonic-gate 	so = VTOSO(vp);
2722*8348SEric.Yu@Sun.COM 	sti = SOTOTPI(so);
27230Sstevel@tonic-gate 
27240Sstevel@tonic-gate 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
27257240Srh87107 	    (void *)vp, (void *)mp, DB_TYPE(mp)));
27260Sstevel@tonic-gate 
27270Sstevel@tonic-gate 	/* Set default return values */
27280Sstevel@tonic-gate 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
27290Sstevel@tonic-gate 
27300Sstevel@tonic-gate 	switch (DB_TYPE(mp)) {
27310Sstevel@tonic-gate 	case M_PCSIG:
27320Sstevel@tonic-gate 		/*
27330Sstevel@tonic-gate 		 * This assumes that an M_PCSIG for the urgent data arrives
27340Sstevel@tonic-gate 		 * before the corresponding T_EXDATA_IND.
27350Sstevel@tonic-gate 		 *
27360Sstevel@tonic-gate 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
27370Sstevel@tonic-gate 		 * awoken before the urgent data shows up.
27380Sstevel@tonic-gate 		 * For OOBINLINE this can result in select returning
27390Sstevel@tonic-gate 		 * only exceptions as opposed to except|read.
27400Sstevel@tonic-gate 		 */
27410Sstevel@tonic-gate 		if (*mp->b_rptr == SIGURG) {
27420Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
27430Sstevel@tonic-gate 			dprintso(so, 1,
27445753Sgww 			    ("SIGURG(%p): counts %d/%d state %s\n",
2745*8348SEric.Yu@Sun.COM 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
27465753Sgww 			    pr_state(so->so_state, so->so_mode)));
27470Sstevel@tonic-gate 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
27480Sstevel@tonic-gate 			dprintso(so, 1,
27495753Sgww 			    ("after SIGURG(%p): counts %d/%d "
27505753Sgww 			    " poll 0x%x sig 0x%x state %s\n",
2751*8348SEric.Yu@Sun.COM 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2752*8348SEric.Yu@Sun.COM 			    *pollwakeups, *allmsgsigs,
27535753Sgww 			    pr_state(so->so_state, so->so_mode)));
27540Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
27550Sstevel@tonic-gate 		}
27560Sstevel@tonic-gate 		freemsg(mp);
27570Sstevel@tonic-gate 		return (NULL);
27580Sstevel@tonic-gate 
27590Sstevel@tonic-gate 	case M_SIG:
27600Sstevel@tonic-gate 	case M_HANGUP:
27610Sstevel@tonic-gate 	case M_UNHANGUP:
27620Sstevel@tonic-gate 	case M_ERROR:
27630Sstevel@tonic-gate 		/* M_ERRORs etc are ignored */
27640Sstevel@tonic-gate 		freemsg(mp);
27650Sstevel@tonic-gate 		return (NULL);
27660Sstevel@tonic-gate 
27670Sstevel@tonic-gate 	case M_FLUSH:
27680Sstevel@tonic-gate 		/*
27690Sstevel@tonic-gate 		 * Do not flush read queue. If the M_FLUSH
27700Sstevel@tonic-gate 		 * arrives because of an impending T_discon_ind
27710Sstevel@tonic-gate 		 * we still have to keep any queued data - this is part of
27720Sstevel@tonic-gate 		 * socket semantics.
27730Sstevel@tonic-gate 		 */
27740Sstevel@tonic-gate 		if (*mp->b_rptr & FLUSHW) {
27750Sstevel@tonic-gate 			*mp->b_rptr &= ~FLUSHR;
27760Sstevel@tonic-gate 			return (mp);
27770Sstevel@tonic-gate 		}
27780Sstevel@tonic-gate 		freemsg(mp);
27790Sstevel@tonic-gate 		return (NULL);
27800Sstevel@tonic-gate 
27810Sstevel@tonic-gate 	default:
27820Sstevel@tonic-gate 		return (mp);
27830Sstevel@tonic-gate 	}
27840Sstevel@tonic-gate }
27850Sstevel@tonic-gate 
27860Sstevel@tonic-gate 
27870Sstevel@tonic-gate /* Register to receive signals for certain events */
27880Sstevel@tonic-gate int
27890Sstevel@tonic-gate so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
27900Sstevel@tonic-gate {
27910Sstevel@tonic-gate 	struct strsigset ss;
27920Sstevel@tonic-gate 	int32_t rval;
27930Sstevel@tonic-gate 
27940Sstevel@tonic-gate 	/*
27950Sstevel@tonic-gate 	 * Note that SOLOCKED will be set except for the call from soaccept().
27960Sstevel@tonic-gate 	 */
27970Sstevel@tonic-gate 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
27980Sstevel@tonic-gate 	ss.ss_pid = pgrp;
27990Sstevel@tonic-gate 	ss.ss_events = events;
28000Sstevel@tonic-gate 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
28010Sstevel@tonic-gate 	    &rval));
28020Sstevel@tonic-gate }
28030Sstevel@tonic-gate 
28040Sstevel@tonic-gate 
28050Sstevel@tonic-gate /* Register for events matching the SS_ASYNC flag */
28060Sstevel@tonic-gate int
28070Sstevel@tonic-gate so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
28080Sstevel@tonic-gate {
28090Sstevel@tonic-gate 	int events = so->so_state & SS_ASYNC ?
28100Sstevel@tonic-gate 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
28110Sstevel@tonic-gate 	    S_RDBAND | S_BANDURG;
28120Sstevel@tonic-gate 
28130Sstevel@tonic-gate 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
28140Sstevel@tonic-gate }
28150Sstevel@tonic-gate 
28160Sstevel@tonic-gate 
28170Sstevel@tonic-gate /* Change the SS_ASYNC flag, and update signal delivery if needed */
28180Sstevel@tonic-gate int
28190Sstevel@tonic-gate so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
28200Sstevel@tonic-gate {
28210Sstevel@tonic-gate 	ASSERT(mutex_owned(&so->so_lock));
28220Sstevel@tonic-gate 	if (so->so_pgrp != 0) {
28230Sstevel@tonic-gate 		int error;
28240Sstevel@tonic-gate 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
28250Sstevel@tonic-gate 		    S_RDBAND | S_BANDURG :			/* New sigs */
28260Sstevel@tonic-gate 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
28270Sstevel@tonic-gate 
28280Sstevel@tonic-gate 		so_lock_single(so);
28290Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
28300Sstevel@tonic-gate 
28310Sstevel@tonic-gate 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
28320Sstevel@tonic-gate 
28330Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
28340Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
28350Sstevel@tonic-gate 		if (error)
28360Sstevel@tonic-gate 			return (error);
28370Sstevel@tonic-gate 	}
28380Sstevel@tonic-gate 	so->so_state ^= SS_ASYNC;
28390Sstevel@tonic-gate 	return (0);
28400Sstevel@tonic-gate }
28410Sstevel@tonic-gate 
28420Sstevel@tonic-gate /*
28430Sstevel@tonic-gate  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
28440Sstevel@tonic-gate  * any existing one.  If passed zero, just clear the existing one.
28450Sstevel@tonic-gate  */
28460Sstevel@tonic-gate int
28470Sstevel@tonic-gate so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
28480Sstevel@tonic-gate {
28490Sstevel@tonic-gate 	int events = so->so_state & SS_ASYNC ?
28500Sstevel@tonic-gate 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
28510Sstevel@tonic-gate 	    S_RDBAND | S_BANDURG;
28520Sstevel@tonic-gate 	int error;
28530Sstevel@tonic-gate 
28540Sstevel@tonic-gate 	ASSERT(mutex_owned(&so->so_lock));
28550Sstevel@tonic-gate 
28560Sstevel@tonic-gate 	/*
28570Sstevel@tonic-gate 	 * Change socket process (group).
28580Sstevel@tonic-gate 	 *
28590Sstevel@tonic-gate 	 * strioctl (via so_set_asyncsigs) will perform permission check and
28600Sstevel@tonic-gate 	 * also keep a PID_HOLD to prevent the pid from being reused.
28610Sstevel@tonic-gate 	 */
28620Sstevel@tonic-gate 	so_lock_single(so);
28630Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
28640Sstevel@tonic-gate 
28650Sstevel@tonic-gate 	if (pgrp != 0) {
28660Sstevel@tonic-gate 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
28670Sstevel@tonic-gate 		    pgrp, events));
28680Sstevel@tonic-gate 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
28690Sstevel@tonic-gate 		if (error != 0) {
28700Sstevel@tonic-gate 			eprintsoline(so, error);
28710Sstevel@tonic-gate 			goto bad;
28720Sstevel@tonic-gate 		}
28730Sstevel@tonic-gate 	}
28740Sstevel@tonic-gate 	/* Remove the previously registered process/group */
28750Sstevel@tonic-gate 	if (so->so_pgrp != 0) {
28760Sstevel@tonic-gate 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
28770Sstevel@tonic-gate 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
28780Sstevel@tonic-gate 		if (error != 0) {
28790Sstevel@tonic-gate 			eprintsoline(so, error);
28800Sstevel@tonic-gate 			error = 0;
28810Sstevel@tonic-gate 		}
28820Sstevel@tonic-gate 	}
28830Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
28840Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
28850Sstevel@tonic-gate 	so->so_pgrp = pgrp;
28860Sstevel@tonic-gate 	return (0);
28870Sstevel@tonic-gate bad:
28880Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
28890Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
28900Sstevel@tonic-gate 	return (error);
28910Sstevel@tonic-gate }
28920Sstevel@tonic-gate 
2893*8348SEric.Yu@Sun.COM /*
2894*8348SEric.Yu@Sun.COM  * Wrapper for getmsg. If the socket has been converted to a stream
2895*8348SEric.Yu@Sun.COM  * pass the request to the stream head.
2896*8348SEric.Yu@Sun.COM  */
2897*8348SEric.Yu@Sun.COM int
2898*8348SEric.Yu@Sun.COM sock_getmsg(
2899*8348SEric.Yu@Sun.COM 	struct vnode *vp,
2900*8348SEric.Yu@Sun.COM 	struct strbuf *mctl,
2901*8348SEric.Yu@Sun.COM 	struct strbuf *mdata,
2902*8348SEric.Yu@Sun.COM 	uchar_t *prip,
2903*8348SEric.Yu@Sun.COM 	int *flagsp,
2904*8348SEric.Yu@Sun.COM 	int fmode,
2905*8348SEric.Yu@Sun.COM 	rval_t *rvp
2906*8348SEric.Yu@Sun.COM )
2907*8348SEric.Yu@Sun.COM {
2908*8348SEric.Yu@Sun.COM 	struct sonode *so;
29090Sstevel@tonic-gate 
2910*8348SEric.Yu@Sun.COM 	ASSERT(vp->v_type == VSOCK);
2911*8348SEric.Yu@Sun.COM 	/*
2912*8348SEric.Yu@Sun.COM 	 * Use the stream head to find the real socket vnode.
2913*8348SEric.Yu@Sun.COM 	 * This is needed when namefs sits above sockfs.  Some
2914*8348SEric.Yu@Sun.COM 	 * sockets (like SCTP) are not streams.
2915*8348SEric.Yu@Sun.COM 	 */
2916*8348SEric.Yu@Sun.COM 	if (!vp->v_stream) {
2917*8348SEric.Yu@Sun.COM 		return (ENOSTR);
2918*8348SEric.Yu@Sun.COM 	}
2919*8348SEric.Yu@Sun.COM 	ASSERT(vp->v_stream->sd_vnode);
2920*8348SEric.Yu@Sun.COM 	vp = vp->v_stream->sd_vnode;
2921*8348SEric.Yu@Sun.COM 	ASSERT(vn_matchops(vp, socket_vnodeops));
2922*8348SEric.Yu@Sun.COM 	so = VTOSO(vp);
2923*8348SEric.Yu@Sun.COM 
2924*8348SEric.Yu@Sun.COM 	dprintso(so, 1, ("sock_getmsg(%p) %s\n",
2925*8348SEric.Yu@Sun.COM 	    (void *)so, pr_state(so->so_state, so->so_mode)));
2926*8348SEric.Yu@Sun.COM 
2927*8348SEric.Yu@Sun.COM 	if (so->so_version == SOV_STREAM) {
2928*8348SEric.Yu@Sun.COM 		/* The imaginary "sockmod" has been popped - act as a stream */
2929*8348SEric.Yu@Sun.COM 		return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
2930*8348SEric.Yu@Sun.COM 	}
2931*8348SEric.Yu@Sun.COM 	eprintsoline(so, ENOSTR);
2932*8348SEric.Yu@Sun.COM 	return (ENOSTR);
2933*8348SEric.Yu@Sun.COM }
29340Sstevel@tonic-gate 
29350Sstevel@tonic-gate /*
2936*8348SEric.Yu@Sun.COM  * Wrapper for putmsg. If the socket has been converted to a stream
2937*8348SEric.Yu@Sun.COM  * pass the request to the stream head.
2938*8348SEric.Yu@Sun.COM  *
2939*8348SEric.Yu@Sun.COM  * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
2940*8348SEric.Yu@Sun.COM  * streams ioctl set it does not support putmsg and getmsg.
2941*8348SEric.Yu@Sun.COM  * Allowing putmsg would prevent sockfs from tracking the state of
2942*8348SEric.Yu@Sun.COM  * the socket/transport and would also invalidate the locking in sockfs.
29430Sstevel@tonic-gate  */
2944*8348SEric.Yu@Sun.COM int
2945*8348SEric.Yu@Sun.COM sock_putmsg(
2946*8348SEric.Yu@Sun.COM 	struct vnode *vp,
2947*8348SEric.Yu@Sun.COM 	struct strbuf *mctl,
2948*8348SEric.Yu@Sun.COM 	struct strbuf *mdata,
2949*8348SEric.Yu@Sun.COM 	uchar_t pri,
2950*8348SEric.Yu@Sun.COM 	int flag,
2951*8348SEric.Yu@Sun.COM 	int fmode
2952*8348SEric.Yu@Sun.COM )
2953*8348SEric.Yu@Sun.COM {
2954*8348SEric.Yu@Sun.COM 	struct sonode *so;
2955*8348SEric.Yu@Sun.COM 
2956*8348SEric.Yu@Sun.COM 	ASSERT(vp->v_type == VSOCK);
2957*8348SEric.Yu@Sun.COM 	/*
2958*8348SEric.Yu@Sun.COM 	 * Use the stream head to find the real socket vnode.
2959*8348SEric.Yu@Sun.COM 	 * This is needed when namefs sits above sockfs.
2960*8348SEric.Yu@Sun.COM 	 */
2961*8348SEric.Yu@Sun.COM 	if (!vp->v_stream) {
2962*8348SEric.Yu@Sun.COM 		return (ENOSTR);
2963*8348SEric.Yu@Sun.COM 	}
2964*8348SEric.Yu@Sun.COM 	ASSERT(vp->v_stream->sd_vnode);
2965*8348SEric.Yu@Sun.COM 	vp = vp->v_stream->sd_vnode;
2966*8348SEric.Yu@Sun.COM 	ASSERT(vn_matchops(vp, socket_vnodeops));
2967*8348SEric.Yu@Sun.COM 	so = VTOSO(vp);
2968*8348SEric.Yu@Sun.COM 
2969*8348SEric.Yu@Sun.COM 	dprintso(so, 1, ("sock_putmsg(%p) %s\n",
2970*8348SEric.Yu@Sun.COM 	    (void *)so, pr_state(so->so_state, so->so_mode)));
29710Sstevel@tonic-gate 
2972*8348SEric.Yu@Sun.COM 	if (so->so_version == SOV_STREAM) {
2973*8348SEric.Yu@Sun.COM 		/* The imaginary "sockmod" has been popped - act as a stream */
2974*8348SEric.Yu@Sun.COM 		return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
2975*8348SEric.Yu@Sun.COM 	}
2976*8348SEric.Yu@Sun.COM 	eprintsoline(so, ENOSTR);
2977*8348SEric.Yu@Sun.COM 	return (ENOSTR);
2978*8348SEric.Yu@Sun.COM }
2979*8348SEric.Yu@Sun.COM 
2980*8348SEric.Yu@Sun.COM /*
2981*8348SEric.Yu@Sun.COM  * Special function called only from f_getfl().
2982*8348SEric.Yu@Sun.COM  * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
2983*8348SEric.Yu@Sun.COM  * No locks are acquired here, so it is safe to use while uf_lock is held.
2984*8348SEric.Yu@Sun.COM  * This exists solely for BSD fcntl() FASYNC compatibility.
2985*8348SEric.Yu@Sun.COM  */
2986*8348SEric.Yu@Sun.COM int
2987*8348SEric.Yu@Sun.COM sock_getfasync(vnode_t *vp)
29880Sstevel@tonic-gate {
2989*8348SEric.Yu@Sun.COM 	struct sonode *so;
2990*8348SEric.Yu@Sun.COM 
2991*8348SEric.Yu@Sun.COM 	ASSERT(vp->v_type == VSOCK);
2992*8348SEric.Yu@Sun.COM 	/*
2993*8348SEric.Yu@Sun.COM 	 * For stream model, v_stream is used; For non-stream, v_stream always
2994*8348SEric.Yu@Sun.COM 	 * equals NULL
2995*8348SEric.Yu@Sun.COM 	 */
2996*8348SEric.Yu@Sun.COM 	if (vp->v_stream != NULL)
2997*8348SEric.Yu@Sun.COM 		so = VTOSO(vp->v_stream->sd_vnode);
29980Sstevel@tonic-gate 	else
2999*8348SEric.Yu@Sun.COM 		so = VTOSO(vp);
3000*8348SEric.Yu@Sun.COM 
3001*8348SEric.Yu@Sun.COM 	if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
3002*8348SEric.Yu@Sun.COM 		return (0);
3003*8348SEric.Yu@Sun.COM 
3004*8348SEric.Yu@Sun.COM 	return (FASYNC);
30050Sstevel@tonic-gate }
30066707Sbrutus 
30076707Sbrutus /*
30086707Sbrutus  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
30096707Sbrutus  * transport driver/module with an mblk_t chain.
30106707Sbrutus  *
30116707Sbrutus  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
30126707Sbrutus  * bp are of type M_DATA. All other cases we call putq().
30136707Sbrutus  *
30146707Sbrutus  * On success a zero will be return, else an errno will be returned.
30156707Sbrutus  */
30166707Sbrutus int
30176707Sbrutus sodput(sodirect_t *sodp, mblk_t *bp)
30186707Sbrutus {
30196707Sbrutus 	queue_t		*q = sodp->sod_q;
30206707Sbrutus 	struct stdata	*stp = (struct stdata *)q->q_ptr;
30216707Sbrutus 	mblk_t		*nbp;
30226707Sbrutus 	mblk_t		*last = q->q_last;
30236707Sbrutus 	int		bytecnt = 0;
30246707Sbrutus 	int		mblkcnt = 0;
30256707Sbrutus 
30266707Sbrutus 
30277660SEric.Yu@Sun.COM 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
30286707Sbrutus 
30296707Sbrutus 	if (stp->sd_flag == STREOF) {
30307660SEric.Yu@Sun.COM 		do {
30317660SEric.Yu@Sun.COM 			if ((nbp = bp->b_next) != NULL)
30327660SEric.Yu@Sun.COM 				bp->b_next = NULL;
30337660SEric.Yu@Sun.COM 			freemsg(bp);
30347660SEric.Yu@Sun.COM 		} while ((bp = nbp) != NULL);
30357660SEric.Yu@Sun.COM 
30367660SEric.Yu@Sun.COM 		return (0);
30376707Sbrutus 	}
30386707Sbrutus 
30397660SEric.Yu@Sun.COM 	mutex_enter(QLOCK(q));
30406707Sbrutus 	if (q->q_first == NULL) {
30416707Sbrutus 		/* Q empty, really fast fast-path */
30426707Sbrutus 		bp->b_prev = NULL;
30436707Sbrutus 		bp->b_next = NULL;
30446707Sbrutus 		q->q_first = bp;
30456707Sbrutus 		q->q_last = bp;
30466707Sbrutus 
30476707Sbrutus 	} else if (last->b_datap->db_type == M_DATA &&
30486707Sbrutus 	    bp->b_datap->db_type == M_DATA) {
30496707Sbrutus 		/*
30506707Sbrutus 		 * Last mblk_t chain and bp are both type M_DATA so
30516707Sbrutus 		 * in-line putq() here, if the DBLK_UIOA state match
30526707Sbrutus 		 * add bp to the end of the current last chain, else
30536707Sbrutus 		 * start a new last chain with bp.
30546707Sbrutus 		 */
30556707Sbrutus 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
30566707Sbrutus 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
30576707Sbrutus 			/* Added to end */
30586707Sbrutus 			while ((nbp = last->b_cont) != NULL)
30596707Sbrutus 				last = nbp;
30606707Sbrutus 			last->b_cont = bp;
30616707Sbrutus 		} else {
30626707Sbrutus 			/* New last */
30637660SEric.Yu@Sun.COM 			ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
30647660SEric.Yu@Sun.COM 			    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
30656707Sbrutus 			last->b_next = bp;
30666707Sbrutus 			bp->b_next = NULL;
30676707Sbrutus 			bp->b_prev = last;
30686707Sbrutus 			q->q_last = bp;
30696707Sbrutus 		}
30706707Sbrutus 	} else {
30716707Sbrutus 		/*
30726707Sbrutus 		 * Can't use q_last so just call putq().
30736707Sbrutus 		 */
30747660SEric.Yu@Sun.COM 		mutex_exit(QLOCK(q));
30757660SEric.Yu@Sun.COM 
30767660SEric.Yu@Sun.COM 		ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
30777660SEric.Yu@Sun.COM 		    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
30786707Sbrutus 		(void) putq(q, bp);
30796707Sbrutus 		return (0);
30806707Sbrutus 	}
30816707Sbrutus 
30826707Sbrutus 	/* Count bytes and mblk_t's */
30836707Sbrutus 	do {
30846707Sbrutus 		bytecnt += MBLKL(bp);
30856707Sbrutus 		mblkcnt++;
30866707Sbrutus 	} while ((bp = bp->b_cont) != NULL);
30876707Sbrutus 	q->q_count += bytecnt;
30886707Sbrutus 	q->q_mblkcnt += mblkcnt;
30896707Sbrutus 
30906707Sbrutus 	/* Check for QFULL */
30916707Sbrutus 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
30926707Sbrutus 	    q->q_mblkcnt >= q->q_hiwat) {
30936707Sbrutus 		q->q_flag |= QFULL;
30946707Sbrutus 	}
30956707Sbrutus 
30967660SEric.Yu@Sun.COM 	mutex_exit(QLOCK(q));
30976707Sbrutus 	return (0);
30986707Sbrutus }
30996707Sbrutus 
31006707Sbrutus /*
31016707Sbrutus  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
31026707Sbrutus  * driver/module to indicate that read-side data is available.
31036707Sbrutus  *
31046707Sbrutus  * On return the sodirect_t.lock mutex will be exited so this must be the
31056707Sbrutus  * last sodirect_t call to guarantee atomic access of *sodp.
31066707Sbrutus  */
31076707Sbrutus void
31086707Sbrutus sodwakeup(sodirect_t *sodp)
31096707Sbrutus {
31106707Sbrutus 	queue_t		*q = sodp->sod_q;
31116707Sbrutus 	struct stdata	*stp = (struct stdata *)q->q_ptr;
31126707Sbrutus 
31137660SEric.Yu@Sun.COM 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
31146707Sbrutus 
31156707Sbrutus 	if (stp->sd_flag & RSLEEP) {
31166707Sbrutus 		stp->sd_flag &= ~RSLEEP;
31176707Sbrutus 		cv_broadcast(&q->q_wait);
31186707Sbrutus 	}
31196707Sbrutus 
31206707Sbrutus 	if (stp->sd_rput_opt & SR_POLLIN) {
31216707Sbrutus 		stp->sd_rput_opt &= ~SR_POLLIN;
31227660SEric.Yu@Sun.COM 		mutex_exit(sodp->sod_lockp);
31236707Sbrutus 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
31246707Sbrutus 	} else
31257660SEric.Yu@Sun.COM 		mutex_exit(sodp->sod_lockp);
31266707Sbrutus }
3127