xref: /onnv-gate/usr/src/uts/common/fs/sockfs/socktpi.c (revision 7660:1f3541e68e06)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51548Srshoaib  * Common Development and Distribution License (the "License").
61548Srshoaib  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211548Srshoaib 
220Sstevel@tonic-gate /*
236707Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/t_lock.h>
290Sstevel@tonic-gate #include <sys/param.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/buf.h>
320Sstevel@tonic-gate #include <sys/conf.h>
330Sstevel@tonic-gate #include <sys/cred.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/sysmacros.h>
360Sstevel@tonic-gate #include <sys/vfs.h>
370Sstevel@tonic-gate #include <sys/vnode.h>
380Sstevel@tonic-gate #include <sys/debug.h>
390Sstevel@tonic-gate #include <sys/errno.h>
400Sstevel@tonic-gate #include <sys/time.h>
410Sstevel@tonic-gate #include <sys/file.h>
420Sstevel@tonic-gate #include <sys/open.h>
430Sstevel@tonic-gate #include <sys/user.h>
440Sstevel@tonic-gate #include <sys/termios.h>
450Sstevel@tonic-gate #include <sys/stream.h>
460Sstevel@tonic-gate #include <sys/strsubr.h>
470Sstevel@tonic-gate #include <sys/strsun.h>
480Sstevel@tonic-gate #include <sys/ddi.h>
490Sstevel@tonic-gate #include <sys/esunddi.h>
500Sstevel@tonic-gate #include <sys/flock.h>
510Sstevel@tonic-gate #include <sys/modctl.h>
520Sstevel@tonic-gate #include <sys/vtrace.h>
530Sstevel@tonic-gate #include <sys/cmn_err.h>
540Sstevel@tonic-gate #include <sys/pathname.h>
550Sstevel@tonic-gate 
560Sstevel@tonic-gate #include <sys/socket.h>
570Sstevel@tonic-gate #include <sys/socketvar.h>
58741Smasputra #include <sys/sockio.h>
596707Sbrutus #include <sys/sodirect.h>
600Sstevel@tonic-gate #include <netinet/in.h>
610Sstevel@tonic-gate #include <sys/un.h>
620Sstevel@tonic-gate #include <sys/strsun.h>
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #include <sys/tiuser.h>
650Sstevel@tonic-gate #define	_SUN_TPI_VERSION	2
660Sstevel@tonic-gate #include <sys/tihdr.h>
670Sstevel@tonic-gate #include <sys/timod.h>		/* TI_GETMYNAME, TI_GETPEERNAME */
680Sstevel@tonic-gate 
690Sstevel@tonic-gate #include <c2/audit.h>
700Sstevel@tonic-gate 
710Sstevel@tonic-gate #include <inet/common.h>
720Sstevel@tonic-gate #include <inet/ip.h>
730Sstevel@tonic-gate #include <inet/ip6.h>
740Sstevel@tonic-gate #include <inet/tcp.h>
75741Smasputra #include <inet/udp_impl.h>
760Sstevel@tonic-gate 
771974Sbrutus #include <sys/zone.h>
781974Sbrutus 
790Sstevel@tonic-gate #include <fs/sockfs/nl7c.h>
801974Sbrutus #include <fs/sockfs/nl7curi.h>
810Sstevel@tonic-gate 
82898Skais #include <inet/kssl/ksslapi.h>
83898Skais 
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate  * Possible failures when memory can't be allocated. The documented behavior:
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * 		5.5:			4.X:		XNET:
880Sstevel@tonic-gate  * accept:	ENOMEM/ENOSR/EINTR	- (EINTR)	ENOMEM/ENOBUFS/ENOSR/
890Sstevel@tonic-gate  *							EINTR
900Sstevel@tonic-gate  *	(4.X does not document EINTR but returns it)
910Sstevel@tonic-gate  * bind:	ENOSR			-		ENOBUFS/ENOSR
920Sstevel@tonic-gate  * connect: 	EINTR			EINTR		ENOBUFS/ENOSR/EINTR
930Sstevel@tonic-gate  * getpeername:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
940Sstevel@tonic-gate  * getsockname:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
950Sstevel@tonic-gate  *	(4.X getpeername and getsockname do not fail in practice)
960Sstevel@tonic-gate  * getsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
970Sstevel@tonic-gate  * listen:	-			-		ENOBUFS
980Sstevel@tonic-gate  * recv:	ENOMEM/ENOSR/EINTR	EINTR		ENOBUFS/ENOMEM/ENOSR/
990Sstevel@tonic-gate  *							EINTR
1000Sstevel@tonic-gate  * send:	ENOMEM/ENOSR/EINTR	ENOBUFS/EINTR	ENOBUFS/ENOMEM/ENOSR/
1010Sstevel@tonic-gate  *							EINTR
1020Sstevel@tonic-gate  * setsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
1030Sstevel@tonic-gate  * shutdown:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
1040Sstevel@tonic-gate  * socket:	ENOMEM/ENOSR		ENOBUFS		ENOBUFS/ENOMEM/ENOSR
1050Sstevel@tonic-gate  * socketpair:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
1060Sstevel@tonic-gate  *
1070Sstevel@tonic-gate  * Resolution. When allocation fails:
1080Sstevel@tonic-gate  *	recv: return EINTR
1090Sstevel@tonic-gate  *	send: return EINTR
1100Sstevel@tonic-gate  *	connect, accept: EINTR
1110Sstevel@tonic-gate  *	bind, listen, shutdown (unbind, unix_close, disconnect): sleep
1120Sstevel@tonic-gate  *	socket, socketpair: ENOBUFS
1130Sstevel@tonic-gate  *	getpeername, getsockname: sleep
1140Sstevel@tonic-gate  *	getsockopt, setsockopt: sleep
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate #ifdef SOCK_TEST
1180Sstevel@tonic-gate /*
1190Sstevel@tonic-gate  * Variables that make sockfs do something other than the standard TPI
1200Sstevel@tonic-gate  * for the AF_INET transports.
1210Sstevel@tonic-gate  *
1220Sstevel@tonic-gate  * solisten_tpi_tcp:
1230Sstevel@tonic-gate  *	TCP can handle a O_T_BIND_REQ with an increased backlog even though
1240Sstevel@tonic-gate  *	the transport is already bound. This is needed to avoid loosing the
1250Sstevel@tonic-gate  *	port number should listen() do a T_UNBIND_REQ followed by a
1260Sstevel@tonic-gate  *	O_T_BIND_REQ.
1270Sstevel@tonic-gate  *
1280Sstevel@tonic-gate  * soconnect_tpi_udp:
1290Sstevel@tonic-gate  *	UDP and ICMP can handle a T_CONN_REQ.
1300Sstevel@tonic-gate  *	This is needed to make the sequence of connect(), getsockname()
1310Sstevel@tonic-gate  *	return the local IP address used to send packets to the connected to
1320Sstevel@tonic-gate  *	destination.
1330Sstevel@tonic-gate  *
1340Sstevel@tonic-gate  * soconnect_tpi_tcp:
1350Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
1360Sstevel@tonic-gate  *	Set this to non-zero to send TPI conformant messages to TCP in this
1370Sstevel@tonic-gate  *	respect. This is a performance optimization.
1380Sstevel@tonic-gate  *
1390Sstevel@tonic-gate  * soaccept_tpi_tcp:
1400Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without the acceptor being bound.
1410Sstevel@tonic-gate  *	This is a performance optimization that has been picked up in XTI.
1420Sstevel@tonic-gate  *
1430Sstevel@tonic-gate  * soaccept_tpi_multioptions:
1440Sstevel@tonic-gate  *	When inheriting SOL_SOCKET options from the listener to the accepting
1450Sstevel@tonic-gate  *	socket send them as a single message for AF_INET{,6}.
1460Sstevel@tonic-gate  */
1470Sstevel@tonic-gate int solisten_tpi_tcp = 0;
1480Sstevel@tonic-gate int soconnect_tpi_udp = 0;
1490Sstevel@tonic-gate int soconnect_tpi_tcp = 0;
1500Sstevel@tonic-gate int soaccept_tpi_tcp = 0;
1510Sstevel@tonic-gate int soaccept_tpi_multioptions = 1;
1520Sstevel@tonic-gate #else /* SOCK_TEST */
1530Sstevel@tonic-gate #define	soconnect_tpi_tcp	0
1540Sstevel@tonic-gate #define	soconnect_tpi_udp	0
1550Sstevel@tonic-gate #define	solisten_tpi_tcp	0
1560Sstevel@tonic-gate #define	soaccept_tpi_tcp	0
1570Sstevel@tonic-gate #define	soaccept_tpi_multioptions	1
1580Sstevel@tonic-gate #endif /* SOCK_TEST */
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate #ifdef SOCK_TEST
1610Sstevel@tonic-gate extern int do_useracc;
1620Sstevel@tonic-gate extern clock_t sock_test_timelimit;
1630Sstevel@tonic-gate #endif /* SOCK_TEST */
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate /*
1660Sstevel@tonic-gate  * Some X/Open added checks might have to be backed out to keep SunOS 4.X
1670Sstevel@tonic-gate  * applications working. Turn on this flag to disable these checks.
1680Sstevel@tonic-gate  */
1690Sstevel@tonic-gate int xnet_skip_checks = 0;
1700Sstevel@tonic-gate int xnet_check_print = 0;
1710Sstevel@tonic-gate int xnet_truncate_print = 0;
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate extern	void sigintr(k_sigset_t *, int);
1740Sstevel@tonic-gate extern	void sigunintr(k_sigset_t *);
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate extern	void *nl7c_lookup_addr(void *, t_uscalar_t);
1770Sstevel@tonic-gate extern	void *nl7c_add_addr(void *, t_uscalar_t);
1781974Sbrutus extern	void nl7c_listener_addr(void *, struct sonode *);
1790Sstevel@tonic-gate 
180898Skais /* Sockets acting as an in-kernel SSL proxy */
181898Skais extern mblk_t	*strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *,
182898Skais 		    strsigset_t *, strsigset_t *, strpollset_t *);
183898Skais extern mblk_t	*strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *,
184898Skais 		    strsigset_t *, strsigset_t *, strpollset_t *);
185898Skais 
1860Sstevel@tonic-gate static int	sotpi_unbind(struct sonode *, int);
1870Sstevel@tonic-gate 
1886707Sbrutus extern int	sodput(sodirect_t *, mblk_t *);
1896707Sbrutus extern void	sodwakeup(sodirect_t *);
1906707Sbrutus 
1910Sstevel@tonic-gate /* TPI sockfs sonode operations */
1920Sstevel@tonic-gate static int	sotpi_accept(struct sonode *, int, struct sonode **);
1930Sstevel@tonic-gate static int	sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
1940Sstevel@tonic-gate 		    int);
1950Sstevel@tonic-gate static int	sotpi_connect(struct sonode *, const struct sockaddr *,
1960Sstevel@tonic-gate 		    socklen_t, int, int);
1970Sstevel@tonic-gate static int	sotpi_listen(struct sonode *, int);
1980Sstevel@tonic-gate static int	sotpi_sendmsg(struct sonode *, struct nmsghdr *,
1990Sstevel@tonic-gate 		    struct uio *);
2000Sstevel@tonic-gate static int	sotpi_shutdown(struct sonode *, int);
2010Sstevel@tonic-gate static int	sotpi_getsockname(struct sonode *);
202741Smasputra static int	sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
203741Smasputra 		    struct uio *, void *, t_uscalar_t, int);
204741Smasputra static int	sodgram_direct(struct sonode *, struct sockaddr *,
205741Smasputra 		    socklen_t, struct uio *, int);
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate sonodeops_t sotpi_sonodeops = {
2080Sstevel@tonic-gate 	sotpi_accept,		/* sop_accept		*/
2090Sstevel@tonic-gate 	sotpi_bind,		/* sop_bind		*/
2100Sstevel@tonic-gate 	sotpi_listen,		/* sop_listen		*/
2110Sstevel@tonic-gate 	sotpi_connect,		/* sop_connect		*/
2120Sstevel@tonic-gate 	sotpi_recvmsg,		/* sop_recvmsg		*/
2130Sstevel@tonic-gate 	sotpi_sendmsg,		/* sop_sendmsg		*/
2140Sstevel@tonic-gate 	sotpi_getpeername,	/* sop_getpeername	*/
2150Sstevel@tonic-gate 	sotpi_getsockname,	/* sop_getsockname	*/
2160Sstevel@tonic-gate 	sotpi_shutdown,		/* sop_shutdown		*/
2170Sstevel@tonic-gate 	sotpi_getsockopt,	/* sop_getsockopt	*/
2180Sstevel@tonic-gate 	sotpi_setsockopt	/* sop_setsockopt	*/
2190Sstevel@tonic-gate };
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate  * Common create code for socket and accept. If tso is set the values
2230Sstevel@tonic-gate  * from that node is used instead of issuing a T_INFO_REQ.
2240Sstevel@tonic-gate  *
2250Sstevel@tonic-gate  * Assumes that the caller has a VN_HOLD on accessvp.
2260Sstevel@tonic-gate  * The VN_RELE will occur either when sotpi_create() fails or when
2270Sstevel@tonic-gate  * the returned sonode is freed.
2280Sstevel@tonic-gate  */
2290Sstevel@tonic-gate struct sonode *
2300Sstevel@tonic-gate sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
2310Sstevel@tonic-gate     struct sonode *tso, int *errorp)
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate 	struct sonode	*so;
2340Sstevel@tonic-gate 	vnode_t		*vp;
2350Sstevel@tonic-gate 	int		flags, error;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	ASSERT(accessvp != NULL);
2380Sstevel@tonic-gate 	vp = makesockvp(accessvp, domain, type, protocol);
2390Sstevel@tonic-gate 	ASSERT(vp != NULL);
2400Sstevel@tonic-gate 	so = VTOSO(vp);
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	flags = FREAD|FWRITE;
243741Smasputra 
244741Smasputra 	if ((type == SOCK_STREAM || type == SOCK_DGRAM) &&
245741Smasputra 	    (domain == AF_INET || domain == AF_INET6) &&
246741Smasputra 	    (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
247741Smasputra 	    protocol == IPPROTO_IP)) {
248741Smasputra 		/* Tell tcp or udp that it's talking to sockets */
249741Smasputra 		flags |= SO_SOCKSTR;
250741Smasputra 
251741Smasputra 		/*
252741Smasputra 		 * Here we indicate to socktpi_open() our attempt to
253741Smasputra 		 * make direct calls between sockfs and transport.
254741Smasputra 		 * The final decision is left to socktpi_open().
255741Smasputra 		 */
256741Smasputra 		so->so_state |= SS_DIRECT;
257741Smasputra 
258741Smasputra 		ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
259741Smasputra 		if (so->so_type == SOCK_STREAM && tso != NULL) {
260741Smasputra 			if (tso->so_state & SS_DIRECT) {
261741Smasputra 				/*
262741Smasputra 				 * Inherit SS_DIRECT from listener and pass
263741Smasputra 				 * SO_ACCEPTOR open flag to tcp, indicating
264741Smasputra 				 * that this is an accept fast-path instance.
265741Smasputra 				 */
266741Smasputra 				flags |= SO_ACCEPTOR;
267741Smasputra 			} else {
268741Smasputra 				/*
269741Smasputra 				 * SS_DIRECT is not set on listener, meaning
270741Smasputra 				 * that the listener has been converted from
271741Smasputra 				 * a socket to a stream.  Ensure that the
272741Smasputra 				 * acceptor inherits these settings.
273741Smasputra 				 */
274741Smasputra 				so->so_state &= ~SS_DIRECT;
275741Smasputra 				flags &= ~SO_SOCKSTR;
276741Smasputra 			}
2770Sstevel@tonic-gate 		}
2780Sstevel@tonic-gate 	}
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 	/*
2810Sstevel@tonic-gate 	 * Tell local transport that it is talking to sockets.
2820Sstevel@tonic-gate 	 */
2830Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
2840Sstevel@tonic-gate 		flags |= SO_SOCKSTR;
2850Sstevel@tonic-gate 	}
2860Sstevel@tonic-gate 
2871092Skais 	/* Initialize the kernel SSL proxy fields */
2881092Skais 	so->so_kssl_type = KSSL_NO_PROXY;
2891092Skais 	so->so_kssl_ent = NULL;
2901092Skais 	so->so_kssl_ctx = NULL;
2911092Skais 
2925331Samw 	if (error = socktpi_open(&vp, flags, CRED(), NULL)) {
2930Sstevel@tonic-gate 		VN_RELE(vp);
2940Sstevel@tonic-gate 		*errorp = error;
2950Sstevel@tonic-gate 		return (NULL);
2960Sstevel@tonic-gate 	}
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 	if (error = so_strinit(so, tso)) {
2995331Samw 		(void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL);
3000Sstevel@tonic-gate 		VN_RELE(vp);
3010Sstevel@tonic-gate 		*errorp = error;
3020Sstevel@tonic-gate 		return (NULL);
3030Sstevel@tonic-gate 	}
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	if (version == SOV_DEFAULT)
3060Sstevel@tonic-gate 		version = so_default_version;
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 	so->so_version = (short)version;
309898Skais 
3100Sstevel@tonic-gate 	return (so);
3110Sstevel@tonic-gate }
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate /*
3140Sstevel@tonic-gate  * Bind the socket to an unspecified address in sockfs only.
3150Sstevel@tonic-gate  * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
3160Sstevel@tonic-gate  * required in all cases.
3170Sstevel@tonic-gate  */
3180Sstevel@tonic-gate static void
3190Sstevel@tonic-gate so_automatic_bind(struct sonode *so)
3200Sstevel@tonic-gate {
3210Sstevel@tonic-gate 	ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
3240Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND));
3250Sstevel@tonic-gate 	ASSERT(so->so_unbind_mp);
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
3280Sstevel@tonic-gate 	bzero(so->so_laddr_sa, so->so_laddr_len);
3290Sstevel@tonic-gate 	so->so_laddr_sa->sa_family = so->so_family;
3300Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate /*
3350Sstevel@tonic-gate  * bind the socket.
3360Sstevel@tonic-gate  *
3370Sstevel@tonic-gate  * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
3380Sstevel@tonic-gate  * are passed in we allow rebinding. Note that for backwards compatibility
3390Sstevel@tonic-gate  * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
3400Sstevel@tonic-gate  * Thus the rebinding code is currently not executed.
3410Sstevel@tonic-gate  *
3420Sstevel@tonic-gate  * The constraints for rebinding are:
3430Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
3440Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
3450Sstevel@tonic-gate  *   and no listen() has been done.
3460Sstevel@tonic-gate  * This rebinding code was added based on some language in the XNET book
3470Sstevel@tonic-gate  * about not returning EINVAL it the protocol allows rebinding. However,
3480Sstevel@tonic-gate  * this language is not present in the Posix socket draft. Thus maybe the
3490Sstevel@tonic-gate  * rebinding logic should be deleted from the source.
3500Sstevel@tonic-gate  *
3510Sstevel@tonic-gate  * A null "name" can be used to unbind the socket if:
3520Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
3530Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
3540Sstevel@tonic-gate  *   and no listen() has been done.
3550Sstevel@tonic-gate  */
3560Sstevel@tonic-gate static int
3570Sstevel@tonic-gate sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
3580Sstevel@tonic-gate     socklen_t namelen, int backlog, int flags)
3590Sstevel@tonic-gate {
3600Sstevel@tonic-gate 	struct T_bind_req	bind_req;
3610Sstevel@tonic-gate 	struct T_bind_ack	*bind_ack;
3620Sstevel@tonic-gate 	int			error = 0;
3630Sstevel@tonic-gate 	mblk_t			*mp;
3640Sstevel@tonic-gate 	void			*addr;
3650Sstevel@tonic-gate 	t_uscalar_t		addrlen;
3660Sstevel@tonic-gate 	int			unbind_on_err = 1;
3670Sstevel@tonic-gate 	boolean_t		clear_acceptconn_on_err = B_FALSE;
3680Sstevel@tonic-gate 	boolean_t		restore_backlog_on_err = B_FALSE;
3690Sstevel@tonic-gate 	int			save_so_backlog;
3700Sstevel@tonic-gate 	t_scalar_t		PRIM_type = O_T_BIND_REQ;
3710Sstevel@tonic-gate 	boolean_t		tcp_udp_xport;
3720Sstevel@tonic-gate 	void			*nl7c = NULL;
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
3757240Srh87107 	    (void *)so, (void *)name, namelen, backlog, flags,
3765240Snordmark 	    pr_state(so->so_state, so->so_mode)));
3770Sstevel@tonic-gate 
3780Sstevel@tonic-gate 	tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
3810Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
3820Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
3830Sstevel@tonic-gate 	} else {
3840Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
3850Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
3860Sstevel@tonic-gate 	}
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	/*
3890Sstevel@tonic-gate 	 * Make sure that there is a preallocated unbind_req message
3900Sstevel@tonic-gate 	 * before binding. This message allocated when the socket is
3910Sstevel@tonic-gate 	 * created  but it might be have been consumed.
3920Sstevel@tonic-gate 	 */
3930Sstevel@tonic-gate 	if (so->so_unbind_mp == NULL) {
3940Sstevel@tonic-gate 		dprintso(so, 1, ("sobind: allocating unbind_req\n"));
3950Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
3960Sstevel@tonic-gate 		so->so_unbind_mp =
3970Sstevel@tonic-gate 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
3980Sstevel@tonic-gate 	}
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate 	if (flags & _SOBIND_REBIND) {
4010Sstevel@tonic-gate 		/*
4020Sstevel@tonic-gate 		 * Called from solisten after doing an sotpi_unbind() or
4030Sstevel@tonic-gate 		 * potentially without the unbind (latter for AF_INET{,6}).
4040Sstevel@tonic-gate 		 */
4050Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
4080Sstevel@tonic-gate 			ASSERT(so->so_ux_bound_vp);
4090Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
4100Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
4115240Snordmark 			dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
4125240Snordmark 			    "addr 0x%p, vp %p\n",
4130Sstevel@tonic-gate 			    addrlen,
4147240Srh87107 			    (void *)((struct so_ux_addr *)addr)->soua_vp,
4157240Srh87107 			    (void *)so->so_ux_bound_vp));
4160Sstevel@tonic-gate 		} else {
4170Sstevel@tonic-gate 			addr = so->so_laddr_sa;
4180Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_laddr_len;
4190Sstevel@tonic-gate 		}
4200Sstevel@tonic-gate 	} else if (flags & _SOBIND_UNSPEC) {
4210Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 		/*
4240Sstevel@tonic-gate 		 * The caller checked SS_ISBOUND but not necessarily
4250Sstevel@tonic-gate 		 * under so_lock
4260Sstevel@tonic-gate 		 */
4270Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
4280Sstevel@tonic-gate 			/* No error */
4290Sstevel@tonic-gate 			goto done;
4300Sstevel@tonic-gate 		}
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 		/* Set an initial local address */
4330Sstevel@tonic-gate 		switch (so->so_family) {
4340Sstevel@tonic-gate 		case AF_UNIX:
4350Sstevel@tonic-gate 			/*
4360Sstevel@tonic-gate 			 * Use an address with same size as struct sockaddr
4370Sstevel@tonic-gate 			 * just like BSD.
4380Sstevel@tonic-gate 			 */
4390Sstevel@tonic-gate 			so->so_laddr_len =
4405240Snordmark 			    (socklen_t)sizeof (struct sockaddr);
4410Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
4420Sstevel@tonic-gate 			bzero(so->so_laddr_sa, so->so_laddr_len);
4430Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 			/*
4460Sstevel@tonic-gate 			 * Pass down an address with the implicit bind
4470Sstevel@tonic-gate 			 * magic number and the rest all zeros.
4480Sstevel@tonic-gate 			 * The transport will return a unique address.
4490Sstevel@tonic-gate 			 */
4500Sstevel@tonic-gate 			so->so_ux_laddr.soua_vp = NULL;
4510Sstevel@tonic-gate 			so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
4520Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
4530Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
4540Sstevel@tonic-gate 			break;
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate 		case AF_INET:
4570Sstevel@tonic-gate 		case AF_INET6:
4580Sstevel@tonic-gate 			/*
4590Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
4600Sstevel@tonic-gate 			 * Set the address in sockfs to have the sa_family.
4610Sstevel@tonic-gate 			 */
4620Sstevel@tonic-gate 			so->so_laddr_len = (so->so_family == AF_INET) ?
4630Sstevel@tonic-gate 			    (socklen_t)sizeof (sin_t) :
4640Sstevel@tonic-gate 			    (socklen_t)sizeof (sin6_t);
4650Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
4660Sstevel@tonic-gate 			bzero(so->so_laddr_sa, so->so_laddr_len);
4670Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
4680Sstevel@tonic-gate 			addr = NULL;
4690Sstevel@tonic-gate 			addrlen = 0;
4700Sstevel@tonic-gate 			break;
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 		default:
4730Sstevel@tonic-gate 			/*
4740Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
4750Sstevel@tonic-gate 			 * Set the address in sockfs to be zero length.
4760Sstevel@tonic-gate 			 *
4770Sstevel@tonic-gate 			 * Can not assume there is a sa_family for all
4780Sstevel@tonic-gate 			 * protocol families. For example, AF_X25 does not
4790Sstevel@tonic-gate 			 * have a family field.
4800Sstevel@tonic-gate 			 */
4811548Srshoaib 			bzero(so->so_laddr_sa, so->so_laddr_len);
4820Sstevel@tonic-gate 			so->so_laddr_len = 0;	/* XXX correct? */
4830Sstevel@tonic-gate 			addr = NULL;
4840Sstevel@tonic-gate 			addrlen = 0;
4850Sstevel@tonic-gate 			break;
4860Sstevel@tonic-gate 		}
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	} else {
4890Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
4900Sstevel@tonic-gate 			/*
4910Sstevel@tonic-gate 			 * If it is ok to rebind the socket, first unbind
4920Sstevel@tonic-gate 			 * with the transport. A rebind to the NULL address
4930Sstevel@tonic-gate 			 * is interpreted as an unbind.
4940Sstevel@tonic-gate 			 * Note that a bind to NULL in BSD does unbind the
4950Sstevel@tonic-gate 			 * socket but it fails with EINVAL.
4960Sstevel@tonic-gate 			 * Note that regular sockets set SOV_SOCKBSD i.e.
4970Sstevel@tonic-gate 			 * _SOBIND_SOCKBSD gets set here hence no type of
4980Sstevel@tonic-gate 			 * socket does currently allow rebinding.
4990Sstevel@tonic-gate 			 *
5000Sstevel@tonic-gate 			 * If the name is NULL just do an unbind.
5010Sstevel@tonic-gate 			 */
5020Sstevel@tonic-gate 			if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
5030Sstevel@tonic-gate 			    name != NULL) {
5040Sstevel@tonic-gate 				error = EINVAL;
5050Sstevel@tonic-gate 				unbind_on_err = 0;
5060Sstevel@tonic-gate 				eprintsoline(so, error);
5070Sstevel@tonic-gate 				goto done;
5080Sstevel@tonic-gate 			}
5090Sstevel@tonic-gate 			if ((so->so_mode & SM_CONNREQUIRED) &&
5100Sstevel@tonic-gate 			    (so->so_state & SS_CANTREBIND)) {
5110Sstevel@tonic-gate 				error = EINVAL;
5120Sstevel@tonic-gate 				unbind_on_err = 0;
5130Sstevel@tonic-gate 				eprintsoline(so, error);
5140Sstevel@tonic-gate 				goto done;
5150Sstevel@tonic-gate 			}
5160Sstevel@tonic-gate 			error = sotpi_unbind(so, 0);
5170Sstevel@tonic-gate 			if (error) {
5180Sstevel@tonic-gate 				eprintsoline(so, error);
5190Sstevel@tonic-gate 				goto done;
5200Sstevel@tonic-gate 			}
5210Sstevel@tonic-gate 			ASSERT(!(so->so_state & SS_ISBOUND));
5220Sstevel@tonic-gate 			if (name == NULL) {
5230Sstevel@tonic-gate 				so->so_state &=
5245240Snordmark 				    ~(SS_ISCONNECTED|SS_ISCONNECTING);
5250Sstevel@tonic-gate 				goto done;
5260Sstevel@tonic-gate 			}
5270Sstevel@tonic-gate 		}
5280Sstevel@tonic-gate 		/* X/Open requires this check */
5290Sstevel@tonic-gate 		if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
5300Sstevel@tonic-gate 			if (xnet_check_print) {
5310Sstevel@tonic-gate 				printf("sockfs: X/Open bind state check "
5320Sstevel@tonic-gate 				    "caused EINVAL\n");
5330Sstevel@tonic-gate 			}
5340Sstevel@tonic-gate 			error = EINVAL;
5350Sstevel@tonic-gate 			goto done;
5360Sstevel@tonic-gate 		}
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 		switch (so->so_family) {
5390Sstevel@tonic-gate 		case AF_UNIX:
5400Sstevel@tonic-gate 			/*
5410Sstevel@tonic-gate 			 * All AF_UNIX addresses are nul terminated
5420Sstevel@tonic-gate 			 * when copied (copyin_name) in so the minimum
5430Sstevel@tonic-gate 			 * length is 3 bytes.
5440Sstevel@tonic-gate 			 */
5450Sstevel@tonic-gate 			if (name == NULL ||
5460Sstevel@tonic-gate 			    (ssize_t)namelen <= sizeof (short) + 1) {
5470Sstevel@tonic-gate 				error = EISDIR;
5480Sstevel@tonic-gate 				eprintsoline(so, error);
5490Sstevel@tonic-gate 				goto done;
5500Sstevel@tonic-gate 			}
5510Sstevel@tonic-gate 			/*
5520Sstevel@tonic-gate 			 * Verify so_family matches the bound family.
5530Sstevel@tonic-gate 			 * BSD does not check this for AF_UNIX resulting
5540Sstevel@tonic-gate 			 * in funny mknods.
5550Sstevel@tonic-gate 			 */
5560Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
5570Sstevel@tonic-gate 				error = EAFNOSUPPORT;
5580Sstevel@tonic-gate 				goto done;
5590Sstevel@tonic-gate 			}
5600Sstevel@tonic-gate 			break;
5610Sstevel@tonic-gate 		case AF_INET:
5620Sstevel@tonic-gate 			if (name == NULL) {
5630Sstevel@tonic-gate 				error = EINVAL;
5640Sstevel@tonic-gate 				eprintsoline(so, error);
5650Sstevel@tonic-gate 				goto done;
5660Sstevel@tonic-gate 			}
5670Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin_t)) {
5680Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
5690Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
5700Sstevel@tonic-gate 				eprintsoline(so, error);
5710Sstevel@tonic-gate 				goto done;
5720Sstevel@tonic-gate 			}
5730Sstevel@tonic-gate 			if ((flags & _SOBIND_XPG4_2) &&
5740Sstevel@tonic-gate 			    (name->sa_family != so->so_family)) {
5750Sstevel@tonic-gate 				/*
5760Sstevel@tonic-gate 				 * This check has to be made for X/Open
5770Sstevel@tonic-gate 				 * sockets however application failures have
5780Sstevel@tonic-gate 				 * been observed when it is applied to
5790Sstevel@tonic-gate 				 * all sockets.
5800Sstevel@tonic-gate 				 */
5810Sstevel@tonic-gate 				error = EAFNOSUPPORT;
5820Sstevel@tonic-gate 				eprintsoline(so, error);
5830Sstevel@tonic-gate 				goto done;
5840Sstevel@tonic-gate 			}
5850Sstevel@tonic-gate 			/*
5860Sstevel@tonic-gate 			 * Force a zero sa_family to match so_family.
5870Sstevel@tonic-gate 			 *
5880Sstevel@tonic-gate 			 * Some programs like inetd(1M) don't set the
5890Sstevel@tonic-gate 			 * family field. Other programs leave
5900Sstevel@tonic-gate 			 * sin_family set to garbage - SunOS 4.X does
5910Sstevel@tonic-gate 			 * not check the family field on a bind.
5920Sstevel@tonic-gate 			 * We use the family field that
5930Sstevel@tonic-gate 			 * was passed in to the socket() call.
5940Sstevel@tonic-gate 			 */
5950Sstevel@tonic-gate 			name->sa_family = so->so_family;
5960Sstevel@tonic-gate 			break;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 		case AF_INET6: {
5990Sstevel@tonic-gate #ifdef DEBUG
6000Sstevel@tonic-gate 			sin6_t *sin6 = (sin6_t *)name;
6010Sstevel@tonic-gate #endif /* DEBUG */
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 			if (name == NULL) {
6040Sstevel@tonic-gate 				error = EINVAL;
6050Sstevel@tonic-gate 				eprintsoline(so, error);
6060Sstevel@tonic-gate 				goto done;
6070Sstevel@tonic-gate 			}
6080Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin6_t)) {
6090Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
6100Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
6110Sstevel@tonic-gate 				eprintsoline(so, error);
6120Sstevel@tonic-gate 				goto done;
6130Sstevel@tonic-gate 			}
6140Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
6150Sstevel@tonic-gate 				/*
6160Sstevel@tonic-gate 				 * With IPv6 we require the family to match
6170Sstevel@tonic-gate 				 * unlike in IPv4.
6180Sstevel@tonic-gate 				 */
6190Sstevel@tonic-gate 				error = EAFNOSUPPORT;
6200Sstevel@tonic-gate 				eprintsoline(so, error);
6210Sstevel@tonic-gate 				goto done;
6220Sstevel@tonic-gate 			}
6230Sstevel@tonic-gate #ifdef DEBUG
6240Sstevel@tonic-gate 			/*
6250Sstevel@tonic-gate 			 * Verify that apps don't forget to clear
6260Sstevel@tonic-gate 			 * sin6_scope_id etc
6270Sstevel@tonic-gate 			 */
6280Sstevel@tonic-gate 			if (sin6->sin6_scope_id != 0 &&
6290Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
6301548Srshoaib 				zcmn_err(getzoneid(), CE_WARN,
6310Sstevel@tonic-gate 				    "bind with uninitialized sin6_scope_id "
6320Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
6330Sstevel@tonic-gate 				    (int)sin6->sin6_scope_id,
6340Sstevel@tonic-gate 				    (int)curproc->p_pid);
6350Sstevel@tonic-gate 			}
6360Sstevel@tonic-gate 			if (sin6->__sin6_src_id != 0) {
6371548Srshoaib 				zcmn_err(getzoneid(), CE_WARN,
6380Sstevel@tonic-gate 				    "bind with uninitialized __sin6_src_id "
6390Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
6400Sstevel@tonic-gate 				    (int)sin6->__sin6_src_id,
6410Sstevel@tonic-gate 				    (int)curproc->p_pid);
6420Sstevel@tonic-gate 			}
6430Sstevel@tonic-gate #endif /* DEBUG */
6440Sstevel@tonic-gate 			break;
6450Sstevel@tonic-gate 		}
6460Sstevel@tonic-gate 		default:
6470Sstevel@tonic-gate 			/*
6480Sstevel@tonic-gate 			 * Don't do any length or sa_family check to allow
6490Sstevel@tonic-gate 			 * non-sockaddr style addresses.
6500Sstevel@tonic-gate 			 */
6510Sstevel@tonic-gate 			if (name == NULL) {
6520Sstevel@tonic-gate 				error = EINVAL;
6530Sstevel@tonic-gate 				eprintsoline(so, error);
6540Sstevel@tonic-gate 				goto done;
6550Sstevel@tonic-gate 			}
6560Sstevel@tonic-gate 			break;
6570Sstevel@tonic-gate 		}
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 		if (namelen > (t_uscalar_t)so->so_laddr_maxlen) {
6600Sstevel@tonic-gate 			error = ENAMETOOLONG;
6610Sstevel@tonic-gate 			eprintsoline(so, error);
6620Sstevel@tonic-gate 			goto done;
6630Sstevel@tonic-gate 		}
6640Sstevel@tonic-gate 		/*
6650Sstevel@tonic-gate 		 * Save local address.
6660Sstevel@tonic-gate 		 */
6670Sstevel@tonic-gate 		so->so_laddr_len = (socklen_t)namelen;
6680Sstevel@tonic-gate 		ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
6690Sstevel@tonic-gate 		bcopy(name, so->so_laddr_sa, namelen);
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 		addr = so->so_laddr_sa;
6720Sstevel@tonic-gate 		addrlen = (t_uscalar_t)so->so_laddr_len;
6730Sstevel@tonic-gate 		switch (so->so_family) {
6740Sstevel@tonic-gate 		case AF_INET6:
6750Sstevel@tonic-gate 		case AF_INET:
6760Sstevel@tonic-gate 			break;
6770Sstevel@tonic-gate 		case AF_UNIX: {
6780Sstevel@tonic-gate 			struct sockaddr_un *soun =
6795240Snordmark 			    (struct sockaddr_un *)so->so_laddr_sa;
6800Sstevel@tonic-gate 			struct vnode *vp;
6810Sstevel@tonic-gate 			struct vattr vattr;
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate 			ASSERT(so->so_ux_bound_vp == NULL);
6840Sstevel@tonic-gate 			/*
6850Sstevel@tonic-gate 			 * Create vnode for the specified path name.
6860Sstevel@tonic-gate 			 * Keep vnode held with a reference in so_ux_bound_vp.
6870Sstevel@tonic-gate 			 * Use the vnode pointer as the address used in the
6880Sstevel@tonic-gate 			 * bind with the transport.
6890Sstevel@tonic-gate 			 *
6900Sstevel@tonic-gate 			 * Use the same mode as in BSD. In particular this does
6910Sstevel@tonic-gate 			 * not observe the umask.
6920Sstevel@tonic-gate 			 */
6930Sstevel@tonic-gate 			/* MAXPATHLEN + soun_family + nul termination */
6940Sstevel@tonic-gate 			if (so->so_laddr_len >
6950Sstevel@tonic-gate 			    (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
6960Sstevel@tonic-gate 				error = ENAMETOOLONG;
6970Sstevel@tonic-gate 				eprintsoline(so, error);
6980Sstevel@tonic-gate 				goto done;
6990Sstevel@tonic-gate 			}
7000Sstevel@tonic-gate 			vattr.va_type = VSOCK;
7013446Smrj 			vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask;
7020Sstevel@tonic-gate 			vattr.va_mask = AT_TYPE|AT_MODE;
7030Sstevel@tonic-gate 			/* NOTE: holding so_lock */
7040Sstevel@tonic-gate 			error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
7055240Snordmark 			    EXCL, 0, &vp, CRMKNOD, 0, 0);
7060Sstevel@tonic-gate 			if (error) {
7070Sstevel@tonic-gate 				if (error == EEXIST)
7080Sstevel@tonic-gate 					error = EADDRINUSE;
7090Sstevel@tonic-gate 				eprintsoline(so, error);
7100Sstevel@tonic-gate 				goto done;
7110Sstevel@tonic-gate 			}
7120Sstevel@tonic-gate 			/*
7130Sstevel@tonic-gate 			 * Establish pointer from the underlying filesystem
7140Sstevel@tonic-gate 			 * vnode to the socket node.
7150Sstevel@tonic-gate 			 * so_ux_bound_vp and v_stream->sd_vnode form the
7160Sstevel@tonic-gate 			 * cross-linkage between the underlying filesystem
7170Sstevel@tonic-gate 			 * node and the socket node.
7180Sstevel@tonic-gate 			 */
7190Sstevel@tonic-gate 			ASSERT(SOTOV(so)->v_stream);
7200Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
7210Sstevel@tonic-gate 			vp->v_stream = SOTOV(so)->v_stream;
7220Sstevel@tonic-gate 			so->so_ux_bound_vp = vp;
7230Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 			/*
7260Sstevel@tonic-gate 			 * Use the vnode pointer value as a unique address
7270Sstevel@tonic-gate 			 * (together with the magic number to avoid conflicts
7280Sstevel@tonic-gate 			 * with implicit binds) in the transport provider.
7290Sstevel@tonic-gate 			 */
7300Sstevel@tonic-gate 			so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp;
7310Sstevel@tonic-gate 			so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
7320Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
7330Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
7340Sstevel@tonic-gate 			dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
7350Sstevel@tonic-gate 			    addrlen,
7360Sstevel@tonic-gate 			    ((struct so_ux_addr *)addr)->soua_vp));
7370Sstevel@tonic-gate 			break;
7380Sstevel@tonic-gate 		}
7390Sstevel@tonic-gate 		} /* end switch (so->so_family) */
7400Sstevel@tonic-gate 	}
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	/*
7430Sstevel@tonic-gate 	 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
7440Sstevel@tonic-gate 	 * the transport can start passing up T_CONN_IND messages
7450Sstevel@tonic-gate 	 * as soon as it receives the bind req and strsock_proto()
7460Sstevel@tonic-gate 	 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
7470Sstevel@tonic-gate 	 */
7480Sstevel@tonic-gate 	if (flags & _SOBIND_LISTEN) {
7490Sstevel@tonic-gate 		if ((so->so_state & SS_ACCEPTCONN) == 0)
7500Sstevel@tonic-gate 			clear_acceptconn_on_err = B_TRUE;
7510Sstevel@tonic-gate 		save_so_backlog = so->so_backlog;
7520Sstevel@tonic-gate 		restore_backlog_on_err = B_TRUE;
7530Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
7540Sstevel@tonic-gate 		so->so_backlog = backlog;
7550Sstevel@tonic-gate 	}
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate 	/*
7580Sstevel@tonic-gate 	 * If NL7C addr(s) have been configured check for addr/port match,
7590Sstevel@tonic-gate 	 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
7600Sstevel@tonic-gate 	 *
7610Sstevel@tonic-gate 	 * NL7C supports the TCP transport only so check AF_INET and AF_INET6
7620Sstevel@tonic-gate 	 * family sockets only. If match mark as such.
7630Sstevel@tonic-gate 	 */
7641974Sbrutus 	if (nl7c_enabled && ((addr != NULL &&
7650Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
7660Sstevel@tonic-gate 	    (nl7c = nl7c_lookup_addr(addr, addrlen))) ||
7671974Sbrutus 	    so->so_nl7c_flags == NL7C_AF_NCA)) {
7680Sstevel@tonic-gate 		/*
7690Sstevel@tonic-gate 		 * NL7C is not supported in non-global zones,
7700Sstevel@tonic-gate 		 * we enforce this restriction here.
7710Sstevel@tonic-gate 		 */
7720Sstevel@tonic-gate 		if (so->so_zoneid == GLOBAL_ZONEID) {
7730Sstevel@tonic-gate 			/* An NL7C socket, mark it */
7740Sstevel@tonic-gate 			so->so_nl7c_flags |= NL7C_ENABLED;
7751974Sbrutus 			if (nl7c == NULL) {
7761974Sbrutus 				/*
7771974Sbrutus 				 * Was an AF_NCA bind() so add it to the
7781974Sbrutus 				 * addr list for reporting purposes.
7791974Sbrutus 				 */
7801974Sbrutus 				nl7c = nl7c_add_addr(addr, addrlen);
7811974Sbrutus 			}
7820Sstevel@tonic-gate 		} else
7830Sstevel@tonic-gate 			nl7c = NULL;
7840Sstevel@tonic-gate 	}
7850Sstevel@tonic-gate 	/*
7860Sstevel@tonic-gate 	 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
7870Sstevel@tonic-gate 	 * for other transports we will send in a O_T_BIND_REQ.
7880Sstevel@tonic-gate 	 */
7890Sstevel@tonic-gate 	if (tcp_udp_xport &&
7900Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6))
7910Sstevel@tonic-gate 		PRIM_type = T_BIND_REQ;
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate 	bind_req.PRIM_type = PRIM_type;
7940Sstevel@tonic-gate 	bind_req.ADDR_length = addrlen;
7950Sstevel@tonic-gate 	bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
7960Sstevel@tonic-gate 	bind_req.CONIND_number = backlog;
7970Sstevel@tonic-gate 	/* NOTE: holding so_lock while sleeping */
7980Sstevel@tonic-gate 	mp = soallocproto2(&bind_req, sizeof (bind_req),
7995240Snordmark 	    addr, addrlen, 0, _ALLOC_SLEEP);
8000Sstevel@tonic-gate 	so->so_state &= ~SS_LADDR_VALID;
801898Skais 
8020Sstevel@tonic-gate 	/* Done using so_laddr_sa - can drop the lock */
8030Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
8040Sstevel@tonic-gate 
805898Skais 	/*
806898Skais 	 * Intercept the bind_req message here to check if this <address/port>
807898Skais 	 * was configured as an SSL proxy server, or if another endpoint was
808898Skais 	 * already configured to act as a proxy for us.
8091974Sbrutus 	 *
8101974Sbrutus 	 * Note, only if NL7C not enabled for this socket.
811898Skais 	 */
8121974Sbrutus 	if (nl7c == NULL &&
8131974Sbrutus 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
814898Skais 	    so->so_type == SOCK_STREAM) {
815898Skais 
816898Skais 		if (so->so_kssl_ent != NULL) {
817898Skais 			kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type);
818898Skais 			so->so_kssl_ent = NULL;
819898Skais 		}
820898Skais 
821898Skais 		so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent);
822898Skais 		switch (so->so_kssl_type) {
823898Skais 		case KSSL_NO_PROXY:
824898Skais 			break;
825898Skais 
826898Skais 		case KSSL_HAS_PROXY:
827898Skais 			mutex_enter(&so->so_lock);
828898Skais 			goto skip_transport;
829898Skais 
830898Skais 		case KSSL_IS_PROXY:
831898Skais 			break;
832898Skais 		}
833898Skais 	}
834898Skais 
8350Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
8365240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
8370Sstevel@tonic-gate 	if (error) {
8380Sstevel@tonic-gate 		eprintsoline(so, error);
8390Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
8400Sstevel@tonic-gate 		goto done;
8410Sstevel@tonic-gate 	}
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
8440Sstevel@tonic-gate 	error = sowaitprim(so, PRIM_type, T_BIND_ACK,
8450Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
8460Sstevel@tonic-gate 	if (error) {
8470Sstevel@tonic-gate 		eprintsoline(so, error);
8480Sstevel@tonic-gate 		goto done;
8490Sstevel@tonic-gate 	}
850898Skais skip_transport:
8510Sstevel@tonic-gate 	ASSERT(mp);
8520Sstevel@tonic-gate 	/*
8530Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
8540Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the bind
8550Sstevel@tonic-gate 	 * is allowed to complete.
8560Sstevel@tonic-gate 	 */
8570Sstevel@tonic-gate 
8580Sstevel@tonic-gate 	/* Mark as bound. This will be undone if we detect errors below. */
8590Sstevel@tonic-gate 	if (flags & _SOBIND_NOXLATE) {
8600Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
8610Sstevel@tonic-gate 		so->so_state |= SS_FADDR_NOXLATE;
8620Sstevel@tonic-gate 	}
8630Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
8640Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
8650Sstevel@tonic-gate 	ASSERT(so->so_unbind_mp);
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	/* note that we've already set SS_ACCEPTCONN above */
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	/*
8700Sstevel@tonic-gate 	 * Recompute addrlen - an unspecied bind sent down an
8710Sstevel@tonic-gate 	 * address of length zero but we expect the appropriate length
8720Sstevel@tonic-gate 	 * in return.
8730Sstevel@tonic-gate 	 */
8740Sstevel@tonic-gate 	addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
8750Sstevel@tonic-gate 	    sizeof (so->so_ux_laddr) : so->so_laddr_len);
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 	bind_ack = (struct T_bind_ack *)mp->b_rptr;
8780Sstevel@tonic-gate 	/*
8790Sstevel@tonic-gate 	 * The alignment restriction is really too strict but
8800Sstevel@tonic-gate 	 * we want enough alignment to inspect the fields of
8810Sstevel@tonic-gate 	 * a sockaddr_in.
8820Sstevel@tonic-gate 	 */
8830Sstevel@tonic-gate 	addr = sogetoff(mp, bind_ack->ADDR_offset,
8845240Snordmark 	    bind_ack->ADDR_length,
8855240Snordmark 	    __TPI_ALIGN_SIZE);
8860Sstevel@tonic-gate 	if (addr == NULL) {
8870Sstevel@tonic-gate 		freemsg(mp);
8880Sstevel@tonic-gate 		error = EPROTO;
8890Sstevel@tonic-gate 		eprintsoline(so, error);
8900Sstevel@tonic-gate 		goto done;
8910Sstevel@tonic-gate 	}
8920Sstevel@tonic-gate 	if (!(flags & _SOBIND_UNSPEC)) {
8930Sstevel@tonic-gate 		/*
8940Sstevel@tonic-gate 		 * Verify that the transport didn't return something we
8950Sstevel@tonic-gate 		 * did not want e.g. an address other than what we asked for.
8960Sstevel@tonic-gate 		 *
8970Sstevel@tonic-gate 		 * NOTE: These checks would go away if/when we switch to
8980Sstevel@tonic-gate 		 * using the new TPI (in which the transport would fail
8990Sstevel@tonic-gate 		 * the request instead of assigning a different address).
9000Sstevel@tonic-gate 		 *
9010Sstevel@tonic-gate 		 * NOTE2: For protocols that we don't know (i.e. any
9020Sstevel@tonic-gate 		 * other than AF_INET6, AF_INET and AF_UNIX), we
9030Sstevel@tonic-gate 		 * cannot know if the transport should be expected to
9040Sstevel@tonic-gate 		 * return the same address as that requested.
9050Sstevel@tonic-gate 		 *
9060Sstevel@tonic-gate 		 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
9070Sstevel@tonic-gate 		 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
9080Sstevel@tonic-gate 		 *
9090Sstevel@tonic-gate 		 * For example, in the case of netatalk it may be
9100Sstevel@tonic-gate 		 * inappropriate for the transport to return the
9110Sstevel@tonic-gate 		 * requested address (as it may have allocated a local
9120Sstevel@tonic-gate 		 * port number in behaviour similar to that of an
9130Sstevel@tonic-gate 		 * AF_INET bind request with a port number of zero).
9140Sstevel@tonic-gate 		 *
9150Sstevel@tonic-gate 		 * Given the definition of O_T_BIND_REQ, where the
9160Sstevel@tonic-gate 		 * transport may bind to an address other than the
9170Sstevel@tonic-gate 		 * requested address, it's not possible to determine
9180Sstevel@tonic-gate 		 * whether a returned address that differs from the
9190Sstevel@tonic-gate 		 * requested address is a reason to fail (because the
9200Sstevel@tonic-gate 		 * requested address was not available) or succeed
9210Sstevel@tonic-gate 		 * (because the transport allocated an appropriate
9220Sstevel@tonic-gate 		 * address and/or port).
9230Sstevel@tonic-gate 		 *
9240Sstevel@tonic-gate 		 * sockfs currently requires that the transport return
9250Sstevel@tonic-gate 		 * the requested address in the T_BIND_ACK, unless
9260Sstevel@tonic-gate 		 * there is code here to allow for any discrepancy.
9270Sstevel@tonic-gate 		 * Such code exists for AF_INET and AF_INET6.
9280Sstevel@tonic-gate 		 *
9290Sstevel@tonic-gate 		 * Netatalk chooses to return the requested address
9300Sstevel@tonic-gate 		 * rather than the (correct) allocated address.  This
9310Sstevel@tonic-gate 		 * means that netatalk violates the TPI specification
9320Sstevel@tonic-gate 		 * (and would not function correctly if used from a
9330Sstevel@tonic-gate 		 * TLI application), but it does mean that it works
9340Sstevel@tonic-gate 		 * with sockfs.
9350Sstevel@tonic-gate 		 *
9360Sstevel@tonic-gate 		 * As noted above, using the newer XTI bind primitive
9370Sstevel@tonic-gate 		 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
9380Sstevel@tonic-gate 		 * allow sockfs to be more sure about whether or not
9390Sstevel@tonic-gate 		 * the bind request had succeeded (as transports are
9400Sstevel@tonic-gate 		 * not permitted to bind to a different address than
9410Sstevel@tonic-gate 		 * that requested - they must return failure).
9420Sstevel@tonic-gate 		 * Unfortunately, support for T_BIND_REQ may not be
9430Sstevel@tonic-gate 		 * present in all transport implementations (netatalk,
9440Sstevel@tonic-gate 		 * for example, doesn't have it), making the
9450Sstevel@tonic-gate 		 * transition difficult.
9460Sstevel@tonic-gate 		 */
9470Sstevel@tonic-gate 		if (bind_ack->ADDR_length != addrlen) {
9480Sstevel@tonic-gate 			/* Assumes that the requested address was in use */
9490Sstevel@tonic-gate 			freemsg(mp);
9500Sstevel@tonic-gate 			error = EADDRINUSE;
9510Sstevel@tonic-gate 			eprintsoline(so, error);
9520Sstevel@tonic-gate 			goto done;
9530Sstevel@tonic-gate 		}
9540Sstevel@tonic-gate 
9550Sstevel@tonic-gate 		switch (so->so_family) {
9560Sstevel@tonic-gate 		case AF_INET6:
9570Sstevel@tonic-gate 		case AF_INET: {
9580Sstevel@tonic-gate 			sin_t *rname, *aname;
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 			rname = (sin_t *)addr;
9610Sstevel@tonic-gate 			aname = (sin_t *)so->so_laddr_sa;
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 			/*
9640Sstevel@tonic-gate 			 * Take advantage of the alignment
9650Sstevel@tonic-gate 			 * of sin_port and sin6_port which fall
9660Sstevel@tonic-gate 			 * in the same place in their data structures.
9670Sstevel@tonic-gate 			 * Just use sin_port for either address family.
9680Sstevel@tonic-gate 			 *
9690Sstevel@tonic-gate 			 * This may become a problem if (heaven forbid)
9700Sstevel@tonic-gate 			 * there's a separate ipv6port_reserved... :-P
9710Sstevel@tonic-gate 			 *
9720Sstevel@tonic-gate 			 * Binding to port 0 has the semantics of letting
9730Sstevel@tonic-gate 			 * the transport bind to any port.
9740Sstevel@tonic-gate 			 *
9750Sstevel@tonic-gate 			 * If the transport is TCP or UDP since we had sent
9760Sstevel@tonic-gate 			 * a T_BIND_REQ we would not get a port other than
9770Sstevel@tonic-gate 			 * what we asked for.
9780Sstevel@tonic-gate 			 */
9790Sstevel@tonic-gate 			if (tcp_udp_xport) {
9800Sstevel@tonic-gate 				/*
9810Sstevel@tonic-gate 				 * Pick up the new port number if we bound to
9820Sstevel@tonic-gate 				 * port 0.
9830Sstevel@tonic-gate 				 */
9840Sstevel@tonic-gate 				if (aname->sin_port == 0)
9850Sstevel@tonic-gate 					aname->sin_port = rname->sin_port;
9860Sstevel@tonic-gate 				so->so_state |= SS_LADDR_VALID;
9870Sstevel@tonic-gate 				break;
9880Sstevel@tonic-gate 			}
9890Sstevel@tonic-gate 			if (aname->sin_port != 0 &&
9900Sstevel@tonic-gate 			    aname->sin_port != rname->sin_port) {
9910Sstevel@tonic-gate 				freemsg(mp);
9920Sstevel@tonic-gate 				error = EADDRINUSE;
9930Sstevel@tonic-gate 				eprintsoline(so, error);
9940Sstevel@tonic-gate 				goto done;
9950Sstevel@tonic-gate 			}
9960Sstevel@tonic-gate 			/*
9970Sstevel@tonic-gate 			 * Pick up the new port number if we bound to port 0.
9980Sstevel@tonic-gate 			 */
9990Sstevel@tonic-gate 			aname->sin_port = rname->sin_port;
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 			/*
10020Sstevel@tonic-gate 			 * Unfortunately, addresses aren't _quite_ the same.
10030Sstevel@tonic-gate 			 */
10040Sstevel@tonic-gate 			if (so->so_family == AF_INET) {
10050Sstevel@tonic-gate 				if (aname->sin_addr.s_addr !=
10060Sstevel@tonic-gate 				    rname->sin_addr.s_addr) {
10070Sstevel@tonic-gate 					freemsg(mp);
10080Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
10090Sstevel@tonic-gate 					eprintsoline(so, error);
10100Sstevel@tonic-gate 					goto done;
10110Sstevel@tonic-gate 				}
10120Sstevel@tonic-gate 			} else {
10130Sstevel@tonic-gate 				sin6_t *rname6 = (sin6_t *)rname;
10140Sstevel@tonic-gate 				sin6_t *aname6 = (sin6_t *)aname;
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 				if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
10170Sstevel@tonic-gate 				    &rname6->sin6_addr)) {
10180Sstevel@tonic-gate 					freemsg(mp);
10190Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
10200Sstevel@tonic-gate 					eprintsoline(so, error);
10210Sstevel@tonic-gate 					goto done;
10220Sstevel@tonic-gate 				}
10230Sstevel@tonic-gate 			}
10240Sstevel@tonic-gate 			break;
10250Sstevel@tonic-gate 		}
10260Sstevel@tonic-gate 		case AF_UNIX:
10270Sstevel@tonic-gate 			if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) {
10280Sstevel@tonic-gate 				freemsg(mp);
10290Sstevel@tonic-gate 				error = EADDRINUSE;
10300Sstevel@tonic-gate 				eprintsoline(so, error);
10310Sstevel@tonic-gate 				eprintso(so,
10325240Snordmark 				    ("addrlen %d, addr 0x%x, vp %p\n",
10335240Snordmark 				    addrlen, *((int *)addr),
10347240Srh87107 				    (void *)so->so_ux_bound_vp));
10350Sstevel@tonic-gate 				goto done;
10360Sstevel@tonic-gate 			}
10370Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
10380Sstevel@tonic-gate 			break;
10390Sstevel@tonic-gate 		default:
10400Sstevel@tonic-gate 			/*
10410Sstevel@tonic-gate 			 * NOTE: This assumes that addresses can be
10420Sstevel@tonic-gate 			 * byte-compared for equivalence.
10430Sstevel@tonic-gate 			 */
10440Sstevel@tonic-gate 			if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) {
10450Sstevel@tonic-gate 				freemsg(mp);
10460Sstevel@tonic-gate 				error = EADDRINUSE;
10470Sstevel@tonic-gate 				eprintsoline(so, error);
10480Sstevel@tonic-gate 				goto done;
10490Sstevel@tonic-gate 			}
10500Sstevel@tonic-gate 			/*
10510Sstevel@tonic-gate 			 * Don't mark SS_LADDR_VALID, as we cannot be
10520Sstevel@tonic-gate 			 * sure that the returned address is the real
10530Sstevel@tonic-gate 			 * bound address when talking to an unknown
10540Sstevel@tonic-gate 			 * transport.
10550Sstevel@tonic-gate 			 */
10560Sstevel@tonic-gate 			break;
10570Sstevel@tonic-gate 		}
10580Sstevel@tonic-gate 	} else {
10590Sstevel@tonic-gate 		/*
10600Sstevel@tonic-gate 		 * Save for returned address for getsockname.
10610Sstevel@tonic-gate 		 * Needed for unspecific bind unless transport supports
10620Sstevel@tonic-gate 		 * the TI_GETMYNAME ioctl.
10630Sstevel@tonic-gate 		 * Do this for AF_INET{,6} even though they do, as
10640Sstevel@tonic-gate 		 * caching info here is much better performance than
10650Sstevel@tonic-gate 		 * a TPI/STREAMS trip to the transport for getsockname.
10660Sstevel@tonic-gate 		 * Any which can't for some reason _must_ _not_ set
10670Sstevel@tonic-gate 		 * LADDR_VALID here for the caching version of getsockname
10680Sstevel@tonic-gate 		 * to not break;
10690Sstevel@tonic-gate 		 */
10700Sstevel@tonic-gate 		switch (so->so_family) {
10710Sstevel@tonic-gate 		case AF_UNIX:
10720Sstevel@tonic-gate 			/*
10730Sstevel@tonic-gate 			 * Record the address bound with the transport
10740Sstevel@tonic-gate 			 * for use by socketpair.
10750Sstevel@tonic-gate 			 */
10760Sstevel@tonic-gate 			bcopy(addr, &so->so_ux_laddr, addrlen);
10770Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
10780Sstevel@tonic-gate 			break;
10790Sstevel@tonic-gate 		case AF_INET:
10800Sstevel@tonic-gate 		case AF_INET6:
10810Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
10820Sstevel@tonic-gate 			bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
10830Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
10840Sstevel@tonic-gate 			break;
10850Sstevel@tonic-gate 		default:
10860Sstevel@tonic-gate 			/*
10870Sstevel@tonic-gate 			 * Don't mark SS_LADDR_VALID, as we cannot be
10880Sstevel@tonic-gate 			 * sure that the returned address is the real
10890Sstevel@tonic-gate 			 * bound address when talking to an unknown
10900Sstevel@tonic-gate 			 * transport.
10910Sstevel@tonic-gate 			 */
10920Sstevel@tonic-gate 			break;
10930Sstevel@tonic-gate 		}
10940Sstevel@tonic-gate 	}
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 	if (nl7c != NULL) {
10971974Sbrutus 		/* Register listen()er sonode pointer with NL7C */
10981974Sbrutus 		nl7c_listener_addr(nl7c, so);
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 
11010Sstevel@tonic-gate 	freemsg(mp);
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate done:
11040Sstevel@tonic-gate 	if (error) {
11050Sstevel@tonic-gate 		/* reset state & backlog to values held on entry */
11060Sstevel@tonic-gate 		if (clear_acceptconn_on_err == B_TRUE)
11070Sstevel@tonic-gate 			so->so_state &= ~SS_ACCEPTCONN;
11080Sstevel@tonic-gate 		if (restore_backlog_on_err == B_TRUE)
11090Sstevel@tonic-gate 			so->so_backlog = save_so_backlog;
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate 		if (unbind_on_err && so->so_state & SS_ISBOUND) {
11120Sstevel@tonic-gate 			int err;
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
11150Sstevel@tonic-gate 			/* LINTED - statement has no consequent: if */
11160Sstevel@tonic-gate 			if (err) {
11170Sstevel@tonic-gate 				eprintsoline(so, error);
11180Sstevel@tonic-gate 			} else {
11190Sstevel@tonic-gate 				ASSERT(!(so->so_state & SS_ISBOUND));
11200Sstevel@tonic-gate 			}
11210Sstevel@tonic-gate 		}
11220Sstevel@tonic-gate 	}
11230Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
11240Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
11250Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
11260Sstevel@tonic-gate 	} else {
11270Sstevel@tonic-gate 		/* If the caller held the lock don't release it here */
11280Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
11290Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
11300Sstevel@tonic-gate 	}
11310Sstevel@tonic-gate 	return (error);
11320Sstevel@tonic-gate }
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate /* bind the socket */
1135741Smasputra static int
11360Sstevel@tonic-gate sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
11370Sstevel@tonic-gate     int flags)
11380Sstevel@tonic-gate {
11390Sstevel@tonic-gate 	if ((flags & _SOBIND_SOCKETPAIR) == 0)
11400Sstevel@tonic-gate 		return (sotpi_bindlisten(so, name, namelen, 0, flags));
11410Sstevel@tonic-gate 
11420Sstevel@tonic-gate 	flags &= ~_SOBIND_SOCKETPAIR;
11430Sstevel@tonic-gate 	return (sotpi_bindlisten(so, name, namelen, 1, flags));
11440Sstevel@tonic-gate }
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate /*
11470Sstevel@tonic-gate  * Unbind a socket - used when bind() fails, when bind() specifies a NULL
11480Sstevel@tonic-gate  * address, or when listen needs to unbind and bind.
11490Sstevel@tonic-gate  * If the _SOUNBIND_REBIND flag is specified the addresses are retained
11500Sstevel@tonic-gate  * so that a sobind can pick them up.
11510Sstevel@tonic-gate  */
11520Sstevel@tonic-gate static int
11530Sstevel@tonic-gate sotpi_unbind(struct sonode *so, int flags)
11540Sstevel@tonic-gate {
11550Sstevel@tonic-gate 	struct T_unbind_req	unbind_req;
11560Sstevel@tonic-gate 	int			error = 0;
11570Sstevel@tonic-gate 	mblk_t			*mp;
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
11607240Srh87107 	    (void *)so, flags, pr_state(so->so_state, so->so_mode)));
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
11630Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
11640Sstevel@tonic-gate 
11650Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
11660Sstevel@tonic-gate 		error = EINVAL;
11670Sstevel@tonic-gate 		eprintsoline(so, error);
11680Sstevel@tonic-gate 		goto done;
11690Sstevel@tonic-gate 	}
11700Sstevel@tonic-gate 
11710Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
11720Sstevel@tonic-gate 
11730Sstevel@tonic-gate 	/*
11740Sstevel@tonic-gate 	 * Flush the read and write side (except stream head read queue)
11750Sstevel@tonic-gate 	 * and send down T_UNBIND_REQ.
11760Sstevel@tonic-gate 	 */
11770Sstevel@tonic-gate 	(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate 	unbind_req.PRIM_type = T_UNBIND_REQ;
11800Sstevel@tonic-gate 	mp = soallocproto1(&unbind_req, sizeof (unbind_req),
11810Sstevel@tonic-gate 	    0, _ALLOC_SLEEP);
11820Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
11835240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
11840Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
11850Sstevel@tonic-gate 	if (error) {
11860Sstevel@tonic-gate 		eprintsoline(so, error);
11870Sstevel@tonic-gate 		goto done;
11880Sstevel@tonic-gate 	}
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	error = sowaitokack(so, T_UNBIND_REQ);
11910Sstevel@tonic-gate 	if (error) {
11920Sstevel@tonic-gate 		eprintsoline(so, error);
11930Sstevel@tonic-gate 		goto done;
11940Sstevel@tonic-gate 	}
11950Sstevel@tonic-gate 
11960Sstevel@tonic-gate 	/*
11970Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
11980Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the unbind
11990Sstevel@tonic-gate 	 * is allowed to complete.
12000Sstevel@tonic-gate 	 */
12010Sstevel@tonic-gate 	if (!(flags & _SOUNBIND_REBIND)) {
12020Sstevel@tonic-gate 		/*
12030Sstevel@tonic-gate 		 * Clear out bound address.
12040Sstevel@tonic-gate 		 */
12050Sstevel@tonic-gate 		vnode_t *vp;
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate 		if ((vp = so->so_ux_bound_vp) != NULL) {
1208898Skais 
1209898Skais 			/* Undo any SSL proxy setup */
1210898Skais 			if ((so->so_family == AF_INET ||
1211898Skais 			    so->so_family == AF_INET6) &&
1212898Skais 			    (so->so_type == SOCK_STREAM) &&
1213898Skais 			    (so->so_kssl_ent != NULL)) {
1214898Skais 				kssl_release_ent(so->so_kssl_ent, so,
1215898Skais 				    so->so_kssl_type);
1216898Skais 				so->so_kssl_ent = NULL;
1217898Skais 				so->so_kssl_type = KSSL_NO_PROXY;
1218898Skais 			}
1219898Skais 
12200Sstevel@tonic-gate 			so->so_ux_bound_vp = NULL;
12210Sstevel@tonic-gate 			vn_rele_stream(vp);
12220Sstevel@tonic-gate 		}
12230Sstevel@tonic-gate 		/* Clear out address */
12240Sstevel@tonic-gate 		so->so_laddr_len = 0;
12250Sstevel@tonic-gate 	}
12260Sstevel@tonic-gate 	so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
12271974Sbrutus 
12280Sstevel@tonic-gate done:
1229898Skais 
12300Sstevel@tonic-gate 	/* If the caller held the lock don't release it here */
12310Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
12320Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	return (error);
12350Sstevel@tonic-gate }
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate /*
12380Sstevel@tonic-gate  * listen on the socket.
12390Sstevel@tonic-gate  * For TPI conforming transports this has to first unbind with the transport
12400Sstevel@tonic-gate  * and then bind again using the new backlog.
12410Sstevel@tonic-gate  */
12420Sstevel@tonic-gate int
12430Sstevel@tonic-gate sotpi_listen(struct sonode *so, int backlog)
12440Sstevel@tonic-gate {
12450Sstevel@tonic-gate 	int		error = 0;
12460Sstevel@tonic-gate 
12470Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
12487240Srh87107 	    (void *)so, backlog, pr_state(so->so_state, so->so_mode)));
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate 	if (so->so_serv_type == T_CLTS)
12510Sstevel@tonic-gate 		return (EOPNOTSUPP);
12520Sstevel@tonic-gate 
12530Sstevel@tonic-gate 	/*
12540Sstevel@tonic-gate 	 * If the socket is ready to accept connections already, then
12550Sstevel@tonic-gate 	 * return without doing anything.  This avoids a problem where
12560Sstevel@tonic-gate 	 * a second listen() call fails if a connection is pending and
12570Sstevel@tonic-gate 	 * leaves the socket unbound. Only when we are not unbinding
12580Sstevel@tonic-gate 	 * with the transport can we safely increase the backlog.
12590Sstevel@tonic-gate 	 */
12600Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN &&
12610Sstevel@tonic-gate 	    !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
12625240Snordmark 	    /*CONSTCOND*/
12635240Snordmark 	    !solisten_tpi_tcp))
12640Sstevel@tonic-gate 		return (0);
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 	if (so->so_state & SS_ISCONNECTED)
12670Sstevel@tonic-gate 		return (EINVAL);
12680Sstevel@tonic-gate 
12690Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
12700Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
12710Sstevel@tonic-gate 
12720Sstevel@tonic-gate 	if (backlog < 0)
12730Sstevel@tonic-gate 		backlog = 0;
12740Sstevel@tonic-gate 	/*
12750Sstevel@tonic-gate 	 * Use the same qlimit as in BSD. BSD checks the qlimit
12760Sstevel@tonic-gate 	 * before queuing the next connection implying that a
12770Sstevel@tonic-gate 	 * listen(sock, 0) allows one connection to be queued.
12780Sstevel@tonic-gate 	 * BSD also uses 1.5 times the requested backlog.
12790Sstevel@tonic-gate 	 *
12800Sstevel@tonic-gate 	 * XNS Issue 4 required a strict interpretation of the backlog.
12810Sstevel@tonic-gate 	 * This has been waived subsequently for Issue 4 and the change
12820Sstevel@tonic-gate 	 * incorporated in XNS Issue 5. So we aren't required to do
12830Sstevel@tonic-gate 	 * anything special for XPG apps.
12840Sstevel@tonic-gate 	 */
12850Sstevel@tonic-gate 	if (backlog >= (INT_MAX - 1) / 3)
12860Sstevel@tonic-gate 		backlog = INT_MAX;
12870Sstevel@tonic-gate 	else
12880Sstevel@tonic-gate 		backlog = backlog * 3 / 2 + 1;
12890Sstevel@tonic-gate 
12900Sstevel@tonic-gate 	/*
12910Sstevel@tonic-gate 	 * If the listen doesn't change the backlog we do nothing.
12920Sstevel@tonic-gate 	 * This avoids an EPROTO error from the transport.
12930Sstevel@tonic-gate 	 */
12940Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) &&
12950Sstevel@tonic-gate 	    so->so_backlog == backlog)
12960Sstevel@tonic-gate 		goto done;
12970Sstevel@tonic-gate 
12980Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
12990Sstevel@tonic-gate 		/*
13000Sstevel@tonic-gate 		 * Must have been explicitly bound in the UNIX domain.
13010Sstevel@tonic-gate 		 */
13020Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
13030Sstevel@tonic-gate 			error = EINVAL;
13040Sstevel@tonic-gate 			goto done;
13050Sstevel@tonic-gate 		}
13060Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
13075240Snordmark 		    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
13080Sstevel@tonic-gate 	} else if (backlog > 0) {
13090Sstevel@tonic-gate 		/*
13100Sstevel@tonic-gate 		 * AF_INET{,6} hack to avoid losing the port.
13110Sstevel@tonic-gate 		 * Assumes that all AF_INET{,6} transports can handle a
13120Sstevel@tonic-gate 		 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
13130Sstevel@tonic-gate 		 * has already bound thus it is possible to avoid the unbind.
13140Sstevel@tonic-gate 		 */
13150Sstevel@tonic-gate 		if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
13160Sstevel@tonic-gate 		    /*CONSTCOND*/
13170Sstevel@tonic-gate 		    !solisten_tpi_tcp)) {
13180Sstevel@tonic-gate 			error = sotpi_unbind(so, _SOUNBIND_REBIND);
13190Sstevel@tonic-gate 			if (error)
13200Sstevel@tonic-gate 				goto done;
13210Sstevel@tonic-gate 		}
13220Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
13235240Snordmark 		    _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
13240Sstevel@tonic-gate 	} else {
13250Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
13260Sstevel@tonic-gate 		so->so_backlog = backlog;
13270Sstevel@tonic-gate 	}
13280Sstevel@tonic-gate 	if (error)
13290Sstevel@tonic-gate 		goto done;
13300Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ACCEPTCONN);
13310Sstevel@tonic-gate done:
13320Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
13330Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
13340Sstevel@tonic-gate 	return (error);
13350Sstevel@tonic-gate }
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate /*
13380Sstevel@tonic-gate  * Disconnect either a specified seqno or all (-1).
13390Sstevel@tonic-gate  * The former is used on listening sockets only.
13400Sstevel@tonic-gate  *
13410Sstevel@tonic-gate  * When seqno == -1 sodisconnect could call sotpi_unbind. However,
13420Sstevel@tonic-gate  * the current use of sodisconnect(seqno == -1) is only for shutdown
13430Sstevel@tonic-gate  * so there is no point (and potentially incorrect) to unbind.
13440Sstevel@tonic-gate  */
13450Sstevel@tonic-gate int
13460Sstevel@tonic-gate sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
13470Sstevel@tonic-gate {
13480Sstevel@tonic-gate 	struct T_discon_req	discon_req;
13490Sstevel@tonic-gate 	int			error = 0;
13500Sstevel@tonic-gate 	mblk_t			*mp;
13510Sstevel@tonic-gate 
13520Sstevel@tonic-gate 	dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
13537240Srh87107 	    (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode)));
13540Sstevel@tonic-gate 
13550Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
13560Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
13570Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
13580Sstevel@tonic-gate 	} else {
13590Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
13600Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
13610Sstevel@tonic-gate 	}
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
13640Sstevel@tonic-gate 		error = EINVAL;
13650Sstevel@tonic-gate 		eprintsoline(so, error);
13660Sstevel@tonic-gate 		goto done;
13670Sstevel@tonic-gate 	}
13680Sstevel@tonic-gate 
13690Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
13700Sstevel@tonic-gate 	/*
13710Sstevel@tonic-gate 	 * Flush the write side (unless this is a listener)
13720Sstevel@tonic-gate 	 * and then send down a T_DISCON_REQ.
13730Sstevel@tonic-gate 	 * (Don't flush on listener since it could flush {O_}T_CONN_RES
13740Sstevel@tonic-gate 	 * and other messages.)
13750Sstevel@tonic-gate 	 */
13760Sstevel@tonic-gate 	if (!(so->so_state & SS_ACCEPTCONN))
13770Sstevel@tonic-gate 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
13780Sstevel@tonic-gate 
13790Sstevel@tonic-gate 	discon_req.PRIM_type = T_DISCON_REQ;
13800Sstevel@tonic-gate 	discon_req.SEQ_number = seqno;
13810Sstevel@tonic-gate 	mp = soallocproto1(&discon_req, sizeof (discon_req),
13820Sstevel@tonic-gate 	    0, _ALLOC_SLEEP);
13830Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
13845240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
13850Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
13860Sstevel@tonic-gate 	if (error) {
13870Sstevel@tonic-gate 		eprintsoline(so, error);
13880Sstevel@tonic-gate 		goto done;
13890Sstevel@tonic-gate 	}
13900Sstevel@tonic-gate 
13910Sstevel@tonic-gate 	error = sowaitokack(so, T_DISCON_REQ);
13920Sstevel@tonic-gate 	if (error) {
13930Sstevel@tonic-gate 		eprintsoline(so, error);
13940Sstevel@tonic-gate 		goto done;
13950Sstevel@tonic-gate 	}
13960Sstevel@tonic-gate 	/*
13970Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
13980Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the disconnect
13990Sstevel@tonic-gate 	 * is allowed to complete. However, it is not possible to
14000Sstevel@tonic-gate 	 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
14010Sstevel@tonic-gate 	 */
14020Sstevel@tonic-gate 	so->so_state &=
14030Sstevel@tonic-gate 	    ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID);
14040Sstevel@tonic-gate done:
14050Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
14060Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
14070Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
14080Sstevel@tonic-gate 	} else {
14090Sstevel@tonic-gate 		/* If the caller held the lock don't release it here */
14100Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
14110Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
14120Sstevel@tonic-gate 	}
14130Sstevel@tonic-gate 	return (error);
14140Sstevel@tonic-gate }
14150Sstevel@tonic-gate 
14160Sstevel@tonic-gate int
14170Sstevel@tonic-gate sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop)
14180Sstevel@tonic-gate {
14190Sstevel@tonic-gate 	struct T_conn_ind	*conn_ind;
14200Sstevel@tonic-gate 	struct T_conn_res	*conn_res;
14210Sstevel@tonic-gate 	int			error = 0;
14224379Sja97890 	mblk_t			*mp, *ctxmp, *ack_mp;
14230Sstevel@tonic-gate 	struct sonode		*nso;
14240Sstevel@tonic-gate 	vnode_t			*nvp;
14250Sstevel@tonic-gate 	void			*src;
14260Sstevel@tonic-gate 	t_uscalar_t		srclen;
14270Sstevel@tonic-gate 	void			*opt;
14280Sstevel@tonic-gate 	t_uscalar_t		optlen;
14290Sstevel@tonic-gate 	t_scalar_t		PRIM_type;
14300Sstevel@tonic-gate 	t_scalar_t		SEQ_number;
14314379Sja97890 	size_t			sinlen;
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
14347240Srh87107 	    (void *)so, fflag, (void *)nsop,
14357240Srh87107 	    pr_state(so->so_state, so->so_mode)));
14360Sstevel@tonic-gate 
14370Sstevel@tonic-gate 	/*
14380Sstevel@tonic-gate 	 * Defer single-threading the accepting socket until
14390Sstevel@tonic-gate 	 * the T_CONN_IND has been received and parsed and the
14400Sstevel@tonic-gate 	 * new sonode has been opened.
14410Sstevel@tonic-gate 	 */
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 	/* Check that we are not already connected */
14440Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) == 0)
14450Sstevel@tonic-gate 		goto conn_bad;
14460Sstevel@tonic-gate again:
14470Sstevel@tonic-gate 	if ((error = sowaitconnind(so, fflag, &mp)) != 0)
14480Sstevel@tonic-gate 		goto e_bad;
14490Sstevel@tonic-gate 
14500Sstevel@tonic-gate 	ASSERT(mp);
14510Sstevel@tonic-gate 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
1452898Skais 	ctxmp = mp->b_cont;
1453898Skais 
14540Sstevel@tonic-gate 	/*
14550Sstevel@tonic-gate 	 * Save SEQ_number for error paths.
14560Sstevel@tonic-gate 	 */
14570Sstevel@tonic-gate 	SEQ_number = conn_ind->SEQ_number;
14580Sstevel@tonic-gate 
14590Sstevel@tonic-gate 	srclen = conn_ind->SRC_length;
14600Sstevel@tonic-gate 	src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
14610Sstevel@tonic-gate 	if (src == NULL) {
14620Sstevel@tonic-gate 		error = EPROTO;
14630Sstevel@tonic-gate 		freemsg(mp);
14640Sstevel@tonic-gate 		eprintsoline(so, error);
14650Sstevel@tonic-gate 		goto disconnect_unlocked;
14660Sstevel@tonic-gate 	}
14670Sstevel@tonic-gate 	optlen = conn_ind->OPT_length;
14680Sstevel@tonic-gate 	switch (so->so_family) {
14690Sstevel@tonic-gate 	case AF_INET:
14700Sstevel@tonic-gate 	case AF_INET6:
14710Sstevel@tonic-gate 		if ((optlen == sizeof (intptr_t)) &&
1472741Smasputra 		    ((so->so_state & SS_DIRECT) != 0)) {
14730Sstevel@tonic-gate 			bcopy(mp->b_rptr + conn_ind->OPT_offset,
14740Sstevel@tonic-gate 			    &opt, conn_ind->OPT_length);
14750Sstevel@tonic-gate 		} else {
14760Sstevel@tonic-gate 			/*
14770Sstevel@tonic-gate 			 * The transport (in this case TCP) hasn't sent up
14780Sstevel@tonic-gate 			 * a pointer to an instance for the accept fast-path.
14790Sstevel@tonic-gate 			 * Disable fast-path completely because the call to
14800Sstevel@tonic-gate 			 * sotpi_create() below would otherwise create an
14810Sstevel@tonic-gate 			 * incomplete TCP instance, which would lead to
14820Sstevel@tonic-gate 			 * problems when sockfs sends a normal T_CONN_RES
14830Sstevel@tonic-gate 			 * message down the new stream.
14840Sstevel@tonic-gate 			 */
1485741Smasputra 			if (so->so_state & SS_DIRECT) {
1486741Smasputra 				int rval;
1487741Smasputra 				/*
1488741Smasputra 				 * For consistency we inform tcp to disable
1489741Smasputra 				 * direct interface on the listener, though
1490741Smasputra 				 * we can certainly live without doing this
1491741Smasputra 				 * because no data will ever travel upstream
1492741Smasputra 				 * on the listening socket.
1493741Smasputra 				 */
1494741Smasputra 				so->so_state &= ~SS_DIRECT;
1495741Smasputra 				(void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
1496741Smasputra 				    0, 0, K_TO_K, CRED(), &rval);
1497741Smasputra 			}
14980Sstevel@tonic-gate 			opt = NULL;
14990Sstevel@tonic-gate 			optlen = 0;
15000Sstevel@tonic-gate 		}
15010Sstevel@tonic-gate 		break;
15020Sstevel@tonic-gate 	case AF_UNIX:
15030Sstevel@tonic-gate 	default:
15040Sstevel@tonic-gate 		if (optlen != 0) {
15050Sstevel@tonic-gate 			opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
15060Sstevel@tonic-gate 			    __TPI_ALIGN_SIZE);
15070Sstevel@tonic-gate 			if (opt == NULL) {
15080Sstevel@tonic-gate 				error = EPROTO;
15090Sstevel@tonic-gate 				freemsg(mp);
15100Sstevel@tonic-gate 				eprintsoline(so, error);
15110Sstevel@tonic-gate 				goto disconnect_unlocked;
15120Sstevel@tonic-gate 			}
15130Sstevel@tonic-gate 		}
15140Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
15150Sstevel@tonic-gate 			if (!(so->so_state & SS_FADDR_NOXLATE)) {
15160Sstevel@tonic-gate 				src = NULL;
15170Sstevel@tonic-gate 				srclen = 0;
15180Sstevel@tonic-gate 			}
15190Sstevel@tonic-gate 			/* Extract src address from options */
15200Sstevel@tonic-gate 			if (optlen != 0)
15210Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &src, &srclen);
15220Sstevel@tonic-gate 		}
15230Sstevel@tonic-gate 		break;
15240Sstevel@tonic-gate 	}
15250Sstevel@tonic-gate 
15260Sstevel@tonic-gate 	/*
15270Sstevel@tonic-gate 	 * Create the new socket.
15280Sstevel@tonic-gate 	 */
15290Sstevel@tonic-gate 	VN_HOLD(so->so_accessvp);
15300Sstevel@tonic-gate 	nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type,
15315240Snordmark 	    so->so_protocol, so->so_version, so, &error);
15320Sstevel@tonic-gate 	if (nso == NULL) {
15330Sstevel@tonic-gate 		ASSERT(error != 0);
15340Sstevel@tonic-gate 		/*
15350Sstevel@tonic-gate 		 * Accept can not fail with ENOBUFS. sotpi_create
15360Sstevel@tonic-gate 		 * sleeps waiting for memory until a signal is caught
15370Sstevel@tonic-gate 		 * so return EINTR.
15380Sstevel@tonic-gate 		 */
15390Sstevel@tonic-gate 		freemsg(mp);
15400Sstevel@tonic-gate 		if (error == ENOBUFS)
15410Sstevel@tonic-gate 			error = EINTR;
15420Sstevel@tonic-gate 		goto e_disc_unl;
15430Sstevel@tonic-gate 	}
15440Sstevel@tonic-gate 	nvp = SOTOV(nso);
15450Sstevel@tonic-gate 
1546898Skais 	/*
1547898Skais 	 * If the transport sent up an SSL connection context, then attach
1548898Skais 	 * it the new socket, and set the (sd_wputdatafunc)() and
1549898Skais 	 * (sd_rputdatafunc)() stream head hooks to intercept and process
1550898Skais 	 * SSL records.
1551898Skais 	 */
1552898Skais 	if (ctxmp != NULL) {
1553898Skais 		/*
1554898Skais 		 * This kssl_ctx_t is already held for us by the transport.
1555898Skais 		 * So, we don't need to do a kssl_hold_ctx() here.
1556898Skais 		 */
1557898Skais 		nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr);
1558898Skais 		freemsg(ctxmp);
1559898Skais 		mp->b_cont = NULL;
1560898Skais 		strsetrwputdatahooks(nvp, strsock_kssl_input,
1561898Skais 		    strsock_kssl_output);
1562*7660SEric.Yu@Sun.COM 
1563*7660SEric.Yu@Sun.COM 		/* Disable sodirect if any */
1564*7660SEric.Yu@Sun.COM 		if (nso->so_direct != NULL) {
1565*7660SEric.Yu@Sun.COM 			mutex_enter(nso->so_direct->sod_lockp);
1566*7660SEric.Yu@Sun.COM 			SOD_DISABLE(nso->so_direct);
1567*7660SEric.Yu@Sun.COM 			mutex_exit(nso->so_direct->sod_lockp);
1568*7660SEric.Yu@Sun.COM 			nso->so_direct = NULL;
1569*7660SEric.Yu@Sun.COM 		}
1570898Skais 	}
15710Sstevel@tonic-gate #ifdef DEBUG
15720Sstevel@tonic-gate 	/*
15730Sstevel@tonic-gate 	 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
15740Sstevel@tonic-gate 	 * it's inherited early to allow debugging of the accept code itself.
15750Sstevel@tonic-gate 	 */
15760Sstevel@tonic-gate 	nso->so_options |= so->so_options & SO_DEBUG;
15770Sstevel@tonic-gate #endif /* DEBUG */
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 	/*
15800Sstevel@tonic-gate 	 * Save the SRC address from the T_CONN_IND
15810Sstevel@tonic-gate 	 * for getpeername to work on AF_UNIX and on transports that do not
15820Sstevel@tonic-gate 	 * support TI_GETPEERNAME.
15830Sstevel@tonic-gate 	 *
15840Sstevel@tonic-gate 	 * NOTE: AF_UNIX NUL termination is ensured by the sender's
15850Sstevel@tonic-gate 	 * copyin_name().
15860Sstevel@tonic-gate 	 */
15870Sstevel@tonic-gate 	if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) {
15880Sstevel@tonic-gate 		error = EINVAL;
15890Sstevel@tonic-gate 		freemsg(mp);
15900Sstevel@tonic-gate 		eprintsoline(so, error);
15910Sstevel@tonic-gate 		goto disconnect_vp_unlocked;
15920Sstevel@tonic-gate 	}
15930Sstevel@tonic-gate 	nso->so_faddr_len = (socklen_t)srclen;
15940Sstevel@tonic-gate 	ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
15950Sstevel@tonic-gate 	bcopy(src, nso->so_faddr_sa, srclen);
15960Sstevel@tonic-gate 	nso->so_state |= SS_FADDR_VALID;
15970Sstevel@tonic-gate 
15980Sstevel@tonic-gate 	if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
15990Sstevel@tonic-gate 	    (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
16000Sstevel@tonic-gate 		cred_t *cr;
16010Sstevel@tonic-gate 
16020Sstevel@tonic-gate 		if ((cr = DB_CRED(mp)) != NULL) {
16030Sstevel@tonic-gate 			crhold(cr);
16040Sstevel@tonic-gate 			nso->so_peercred = cr;
16050Sstevel@tonic-gate 			nso->so_cpid = DB_CPID(mp);
16060Sstevel@tonic-gate 		}
16070Sstevel@tonic-gate 		freemsg(mp);
16080Sstevel@tonic-gate 
16090Sstevel@tonic-gate 		mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
16100Sstevel@tonic-gate 		    sizeof (intptr_t), 0, _ALLOC_INTR);
16110Sstevel@tonic-gate 		if (mp == NULL) {
16120Sstevel@tonic-gate 			/*
16130Sstevel@tonic-gate 			 * Accept can not fail with ENOBUFS.
16140Sstevel@tonic-gate 			 * A signal was caught so return EINTR.
16150Sstevel@tonic-gate 			 */
16160Sstevel@tonic-gate 			error = EINTR;
16170Sstevel@tonic-gate 			eprintsoline(so, error);
16180Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
16190Sstevel@tonic-gate 		}
16200Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
16210Sstevel@tonic-gate 	} else {
16220Sstevel@tonic-gate 		nso->so_peercred = DB_CRED(mp);
16230Sstevel@tonic-gate 		nso->so_cpid = DB_CPID(mp);
16240Sstevel@tonic-gate 		DB_CRED(mp) = NULL;
16250Sstevel@tonic-gate 
16260Sstevel@tonic-gate 		mp->b_rptr = DB_BASE(mp);
16270Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
16280Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
16290Sstevel@tonic-gate 	}
16300Sstevel@tonic-gate 
16310Sstevel@tonic-gate 	/*
16320Sstevel@tonic-gate 	 * New socket must be bound at least in sockfs and, except for AF_INET,
16330Sstevel@tonic-gate 	 * (or AF_INET6) it also has to be bound in the transport provider.
16344379Sja97890 	 * We set the local address in the sonode from the T_OK_ACK of the
16354379Sja97890 	 * T_CONN_RES. For this reason the address we bind to here isn't
16364379Sja97890 	 * important.
16370Sstevel@tonic-gate 	 */
16380Sstevel@tonic-gate 	if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
16390Sstevel@tonic-gate 	    /*CONSTCOND*/
16400Sstevel@tonic-gate 	    nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
16410Sstevel@tonic-gate 		/*
16420Sstevel@tonic-gate 		 * Optimization for AF_INET{,6} transports
16430Sstevel@tonic-gate 		 * that can handle a T_CONN_RES without being bound.
16440Sstevel@tonic-gate 		 */
16450Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
16460Sstevel@tonic-gate 		so_automatic_bind(nso);
16470Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
16480Sstevel@tonic-gate 	} else {
16490Sstevel@tonic-gate 		/* Perform NULL bind with the transport provider. */
16500Sstevel@tonic-gate 		if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) {
16510Sstevel@tonic-gate 			ASSERT(error != ENOBUFS);
16520Sstevel@tonic-gate 			freemsg(mp);
16530Sstevel@tonic-gate 			eprintsoline(nso, error);
16540Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
16550Sstevel@tonic-gate 		}
16560Sstevel@tonic-gate 	}
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	/*
16590Sstevel@tonic-gate 	 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
16600Sstevel@tonic-gate 	 * so that any data arriving on the new socket will cause the
16610Sstevel@tonic-gate 	 * appropriate signals to be delivered for the new socket.
16620Sstevel@tonic-gate 	 *
16630Sstevel@tonic-gate 	 * No other thread (except strsock_proto and strsock_misc)
16640Sstevel@tonic-gate 	 * can access the new socket thus we relax the locking.
16650Sstevel@tonic-gate 	 */
16660Sstevel@tonic-gate 	nso->so_pgrp = so->so_pgrp;
16670Sstevel@tonic-gate 	nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE);
16680Sstevel@tonic-gate 
16690Sstevel@tonic-gate 	if (nso->so_pgrp != 0) {
16700Sstevel@tonic-gate 		if ((error = so_set_events(nso, nvp, CRED())) != 0) {
16710Sstevel@tonic-gate 			eprintsoline(nso, error);
16720Sstevel@tonic-gate 			error = 0;
16730Sstevel@tonic-gate 			nso->so_pgrp = 0;
16740Sstevel@tonic-gate 		}
16750Sstevel@tonic-gate 	}
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 	/*
16780Sstevel@tonic-gate 	 * Make note of the socket level options. TCP and IP level options
16790Sstevel@tonic-gate 	 * are already inherited. We could do all this after accept is
16800Sstevel@tonic-gate 	 * successful but doing it here simplifies code and no harm done
16810Sstevel@tonic-gate 	 * for error case.
16820Sstevel@tonic-gate 	 */
16830Sstevel@tonic-gate 	nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
16840Sstevel@tonic-gate 	    SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
16850Sstevel@tonic-gate 	    SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
16860Sstevel@tonic-gate 	nso->so_sndbuf = so->so_sndbuf;
16870Sstevel@tonic-gate 	nso->so_rcvbuf = so->so_rcvbuf;
16880Sstevel@tonic-gate 	if (nso->so_options & SO_LINGER)
16890Sstevel@tonic-gate 		nso->so_linger = so->so_linger;
16900Sstevel@tonic-gate 
1691741Smasputra 	if ((so->so_state & SS_DIRECT) != 0) {
16920Sstevel@tonic-gate 
16930Sstevel@tonic-gate 		ASSERT(opt != NULL);
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 		conn_res->OPT_length = optlen;
16960Sstevel@tonic-gate 		conn_res->OPT_offset = MBLKL(mp);
16970Sstevel@tonic-gate 		bcopy(&opt, mp->b_wptr, optlen);
16980Sstevel@tonic-gate 		mp->b_wptr += optlen;
16990Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
17000Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = 0;
17010Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
17020Sstevel@tonic-gate 
17030Sstevel@tonic-gate 		/* Send down the T_CONN_RES on acceptor STREAM */
17040Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(nso), mp, NULL,
17050Sstevel@tonic-gate 		    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
17060Sstevel@tonic-gate 		if (error) {
17070Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
17080Sstevel@tonic-gate 			so_lock_single(so);
17090Sstevel@tonic-gate 			eprintsoline(so, error);
17100Sstevel@tonic-gate 			goto disconnect_vp;
17110Sstevel@tonic-gate 		}
17120Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
17130Sstevel@tonic-gate 		error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
17140Sstevel@tonic-gate 		    (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
17150Sstevel@tonic-gate 		if (error) {
17160Sstevel@tonic-gate 			mutex_exit(&nso->so_lock);
17170Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
17180Sstevel@tonic-gate 			so_lock_single(so);
17190Sstevel@tonic-gate 			eprintsoline(so, error);
17200Sstevel@tonic-gate 			goto disconnect_vp;
17210Sstevel@tonic-gate 		}
17220Sstevel@tonic-gate 		if (nso->so_family == AF_INET) {
17230Sstevel@tonic-gate 			sin_t *sin;
17240Sstevel@tonic-gate 
17250Sstevel@tonic-gate 			sin = (sin_t *)(ack_mp->b_rptr +
17260Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
17270Sstevel@tonic-gate 			bcopy(sin, nso->so_laddr_sa, sizeof (sin_t));
17280Sstevel@tonic-gate 			nso->so_laddr_len = sizeof (sin_t);
17290Sstevel@tonic-gate 		} else {
17300Sstevel@tonic-gate 			sin6_t *sin6;
17310Sstevel@tonic-gate 
17320Sstevel@tonic-gate 			sin6 = (sin6_t *)(ack_mp->b_rptr +
17330Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
17340Sstevel@tonic-gate 			bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t));
17350Sstevel@tonic-gate 			nso->so_laddr_len = sizeof (sin6_t);
17360Sstevel@tonic-gate 		}
17370Sstevel@tonic-gate 		freemsg(ack_mp);
17380Sstevel@tonic-gate 
17390Sstevel@tonic-gate 		nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID;
17400Sstevel@tonic-gate 		nso->so_priv = opt;
17410Sstevel@tonic-gate 
17420Sstevel@tonic-gate 		if (so->so_nl7c_flags & NL7C_ENABLED) {
17430Sstevel@tonic-gate 			/*
17441974Sbrutus 			 * A NL7C marked listen()er so the new socket
17451974Sbrutus 			 * inherits the listen()er's NL7C state, except
17461974Sbrutus 			 * for NL7C_POLLIN.
17470Sstevel@tonic-gate 			 *
17481974Sbrutus 			 * Only call NL7C to process the new socket if
17491974Sbrutus 			 * the listen socket allows blocking i/o.
17500Sstevel@tonic-gate 			 */
17511974Sbrutus 			nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN);
17521974Sbrutus 			if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) {
17531974Sbrutus 				/*
17541974Sbrutus 				 * Nonblocking accept() just make it
17551974Sbrutus 				 * persist to defer processing to the
17561974Sbrutus 				 * read-side syscall (e.g. read).
17571974Sbrutus 				 */
17581974Sbrutus 				nso->so_nl7c_flags |= NL7C_SOPERSIST;
17591974Sbrutus 			} else if (nl7c_process(nso, B_FALSE)) {
17600Sstevel@tonic-gate 				/*
17610Sstevel@tonic-gate 				 * NL7C has completed processing on the
17620Sstevel@tonic-gate 				 * socket, close the socket and back to
17630Sstevel@tonic-gate 				 * the top to await the next T_CONN_IND.
17640Sstevel@tonic-gate 				 */
17650Sstevel@tonic-gate 				mutex_exit(&nso->so_lock);
17660Sstevel@tonic-gate 				(void) VOP_CLOSE(nvp, 0, 1, (offset_t)0,
17675331Samw 				    CRED(), NULL);
17680Sstevel@tonic-gate 				VN_RELE(nvp);
17690Sstevel@tonic-gate 				goto again;
17700Sstevel@tonic-gate 			}
17710Sstevel@tonic-gate 			/* Pass the new socket out */
17720Sstevel@tonic-gate 		}
17730Sstevel@tonic-gate 
17740Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
17750Sstevel@tonic-gate 
17760Sstevel@tonic-gate 		/*
17772811Sja97890 		 * It's possible, through the use of autopush for example,
17782811Sja97890 		 * that the acceptor stream may not support SS_DIRECT
17792811Sja97890 		 * semantics. If the new socket does not support SS_DIRECT
17802811Sja97890 		 * we issue a _SIOCSOCKFALLBACK to inform the transport
17812811Sja97890 		 * as we would in the I_PUSH case.
17822811Sja97890 		 */
17832811Sja97890 		if (!(nso->so_state & SS_DIRECT)) {
17842811Sja97890 			int	rval;
17852811Sja97890 
17862811Sja97890 			if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
17872811Sja97890 			    0, 0, K_TO_K, CRED(), &rval)) != 0) {
17882811Sja97890 				mutex_enter(&so->so_lock);
17892811Sja97890 				so_lock_single(so);
17902811Sja97890 				eprintsoline(so, error);
17912811Sja97890 				goto disconnect_vp;
17922811Sja97890 			}
17932811Sja97890 		}
17942811Sja97890 
17952811Sja97890 		/*
17960Sstevel@tonic-gate 		 * Pass out new socket.
17970Sstevel@tonic-gate 		 */
17980Sstevel@tonic-gate 		if (nsop != NULL)
17990Sstevel@tonic-gate 			*nsop = nso;
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 		return (0);
18020Sstevel@tonic-gate 	}
18030Sstevel@tonic-gate 
18040Sstevel@tonic-gate 	/*
18050Sstevel@tonic-gate 	 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
18060Sstevel@tonic-gate 	 * which don't support the FireEngine accept fast-path. It is also
18070Sstevel@tonic-gate 	 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
18080Sstevel@tonic-gate 	 * again. Neither sockfs nor TCP attempt to find out if some other
18090Sstevel@tonic-gate 	 * random module has been inserted in between (in which case we
18100Sstevel@tonic-gate 	 * should follow TLI accept behaviour). We blindly assume the worst
18110Sstevel@tonic-gate 	 * case and revert back to old behaviour i.e. TCP will not send us
18120Sstevel@tonic-gate 	 * any option (eager) and the accept should happen on the listener
18130Sstevel@tonic-gate 	 * queue. Any queued T_conn_ind have already got their options removed
18140Sstevel@tonic-gate 	 * by so_sock2_stream() when "sockmod" was I_POP'd.
18150Sstevel@tonic-gate 	 */
18160Sstevel@tonic-gate 	/*
18170Sstevel@tonic-gate 	 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
18180Sstevel@tonic-gate 	 */
18190Sstevel@tonic-gate 	if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
18200Sstevel@tonic-gate #ifdef	_ILP32
18210Sstevel@tonic-gate 		queue_t	*q;
18220Sstevel@tonic-gate 
18230Sstevel@tonic-gate 		/*
18240Sstevel@tonic-gate 		 * Find read queue in driver
18250Sstevel@tonic-gate 		 * Can safely do this since we "own" nso/nvp.
18260Sstevel@tonic-gate 		 */
18270Sstevel@tonic-gate 		q = strvp2wq(nvp)->q_next;
18280Sstevel@tonic-gate 		while (SAMESTR(q))
18290Sstevel@tonic-gate 			q = q->q_next;
18300Sstevel@tonic-gate 		q = RD(q);
18310Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)q;
18320Sstevel@tonic-gate #else
18330Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
18340Sstevel@tonic-gate #endif	/* _ILP32 */
18350Sstevel@tonic-gate 		conn_res->PRIM_type = O_T_CONN_RES;
18360Sstevel@tonic-gate 		PRIM_type = O_T_CONN_RES;
18370Sstevel@tonic-gate 	} else {
18380Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = nso->so_acceptor_id;
18390Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
18400Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
18410Sstevel@tonic-gate 	}
18420Sstevel@tonic-gate 	conn_res->SEQ_number = SEQ_number;
18430Sstevel@tonic-gate 	conn_res->OPT_length = 0;
18440Sstevel@tonic-gate 	conn_res->OPT_offset = 0;
18450Sstevel@tonic-gate 
18460Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
18470Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
18480Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
18490Sstevel@tonic-gate 
18500Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL,
18510Sstevel@tonic-gate 	    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
18520Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
18530Sstevel@tonic-gate 	if (error) {
18540Sstevel@tonic-gate 		eprintsoline(so, error);
18550Sstevel@tonic-gate 		goto disconnect_vp;
18560Sstevel@tonic-gate 	}
18574379Sja97890 	error = sowaitprim(so, PRIM_type, T_OK_ACK,
18584379Sja97890 	    (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
18590Sstevel@tonic-gate 	if (error) {
18600Sstevel@tonic-gate 		eprintsoline(so, error);
18610Sstevel@tonic-gate 		goto disconnect_vp;
18620Sstevel@tonic-gate 	}
18634379Sja97890 	/*
18644379Sja97890 	 * If there is a sin/sin6 appended onto the T_OK_ACK use
18654379Sja97890 	 * that to set the local address. If this is not present
18664379Sja97890 	 * then we zero out the address and don't set the
18674678Sja97890 	 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over
18684678Sja97890 	 * the pathname from the listening socket.
18694379Sja97890 	 */
18704379Sja97890 	sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
18714379Sja97890 	if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
18724379Sja97890 	    MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
18734379Sja97890 		ack_mp->b_rptr += sizeof (struct T_ok_ack);
18744379Sja97890 		bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen);
18754379Sja97890 		nso->so_laddr_len = sinlen;
18764379Sja97890 		nso->so_state |= SS_LADDR_VALID;
18774678Sja97890 	} else if (nso->so_family == AF_UNIX) {
18784678Sja97890 		ASSERT(so->so_family == AF_UNIX);
18794678Sja97890 		nso->so_laddr_len = so->so_laddr_len;
18804678Sja97890 		ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen);
18814678Sja97890 		bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len);
18824678Sja97890 		nso->so_state |= SS_LADDR_VALID;
18834379Sja97890 	} else {
18844379Sja97890 		nso->so_laddr_len = so->so_laddr_len;
18854379Sja97890 		ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen);
18864379Sja97890 		bzero(nso->so_laddr_sa, nso->so_addr_size);
18874678Sja97890 		nso->so_laddr_sa->sa_family = nso->so_family;
18884379Sja97890 	}
18894379Sja97890 	freemsg(ack_mp);
18904379Sja97890 
18910Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
18920Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
18930Sstevel@tonic-gate 
18940Sstevel@tonic-gate 	nso->so_state |= SS_ISCONNECTED;
18950Sstevel@tonic-gate 
18960Sstevel@tonic-gate 	/*
18970Sstevel@tonic-gate 	 * Pass out new socket.
18980Sstevel@tonic-gate 	 */
18990Sstevel@tonic-gate 	if (nsop != NULL)
19000Sstevel@tonic-gate 		*nsop = nso;
19010Sstevel@tonic-gate 
19020Sstevel@tonic-gate 	return (0);
19030Sstevel@tonic-gate 
19040Sstevel@tonic-gate 
19050Sstevel@tonic-gate eproto_disc_unl:
19060Sstevel@tonic-gate 	error = EPROTO;
19070Sstevel@tonic-gate e_disc_unl:
19080Sstevel@tonic-gate 	eprintsoline(so, error);
19090Sstevel@tonic-gate 	goto disconnect_unlocked;
19100Sstevel@tonic-gate 
19110Sstevel@tonic-gate pr_disc_vp_unl:
19120Sstevel@tonic-gate 	eprintsoline(so, error);
19130Sstevel@tonic-gate disconnect_vp_unlocked:
19145331Samw 	(void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
19150Sstevel@tonic-gate 	VN_RELE(nvp);
19160Sstevel@tonic-gate disconnect_unlocked:
19170Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, 0);
19180Sstevel@tonic-gate 	return (error);
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate pr_disc_vp:
19210Sstevel@tonic-gate 	eprintsoline(so, error);
19220Sstevel@tonic-gate disconnect_vp:
19230Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
19240Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
19250Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
19265331Samw 	(void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
19270Sstevel@tonic-gate 	VN_RELE(nvp);
19280Sstevel@tonic-gate 	return (error);
19290Sstevel@tonic-gate 
19300Sstevel@tonic-gate conn_bad:	/* Note: SunOS 4/BSD unconditionally returns EINVAL here */
19310Sstevel@tonic-gate 	error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
19320Sstevel@tonic-gate 	    ? EOPNOTSUPP : EINVAL;
19330Sstevel@tonic-gate e_bad:
19340Sstevel@tonic-gate 	eprintsoline(so, error);
19350Sstevel@tonic-gate 	return (error);
19360Sstevel@tonic-gate }
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate /*
19390Sstevel@tonic-gate  * connect a socket.
19400Sstevel@tonic-gate  *
19410Sstevel@tonic-gate  * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
19420Sstevel@tonic-gate  * unconnect (by specifying a null address).
19430Sstevel@tonic-gate  */
19440Sstevel@tonic-gate int
19450Sstevel@tonic-gate sotpi_connect(struct sonode *so,
19460Sstevel@tonic-gate 	const struct sockaddr *name,
19470Sstevel@tonic-gate 	socklen_t namelen,
19480Sstevel@tonic-gate 	int fflag,
19490Sstevel@tonic-gate 	int flags)
19500Sstevel@tonic-gate {
19510Sstevel@tonic-gate 	struct T_conn_req	conn_req;
19520Sstevel@tonic-gate 	int			error = 0;
19530Sstevel@tonic-gate 	mblk_t			*mp;
19540Sstevel@tonic-gate 	void			*src;
19550Sstevel@tonic-gate 	socklen_t		srclen;
19560Sstevel@tonic-gate 	void			*addr;
19570Sstevel@tonic-gate 	socklen_t		addrlen;
19580Sstevel@tonic-gate 	boolean_t		need_unlock;
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
19617240Srh87107 	    (void *)so, (void *)name, namelen, fflag, flags,
19625240Snordmark 	    pr_state(so->so_state, so->so_mode)));
19630Sstevel@tonic-gate 
19640Sstevel@tonic-gate 	/*
19650Sstevel@tonic-gate 	 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
19660Sstevel@tonic-gate 	 * avoid sleeping for memory with SOLOCKED held.
19670Sstevel@tonic-gate 	 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen
19680Sstevel@tonic-gate 	 * + sizeof (struct T_opthdr).
19690Sstevel@tonic-gate 	 * (the AF_UNIX so_ux_addr_xlate() does not make the address
19700Sstevel@tonic-gate 	 * exceed so_faddr_maxlen).
19710Sstevel@tonic-gate 	 */
19720Sstevel@tonic-gate 	mp = soallocproto(sizeof (struct T_conn_req) +
19730Sstevel@tonic-gate 	    2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR);
19740Sstevel@tonic-gate 	if (mp == NULL) {
19750Sstevel@tonic-gate 		/*
19760Sstevel@tonic-gate 		 * Connect can not fail with ENOBUFS. A signal was
19770Sstevel@tonic-gate 		 * caught so return EINTR.
19780Sstevel@tonic-gate 		 */
19790Sstevel@tonic-gate 		error = EINTR;
19800Sstevel@tonic-gate 		eprintsoline(so, error);
19810Sstevel@tonic-gate 		return (error);
19820Sstevel@tonic-gate 	}
19830Sstevel@tonic-gate 
19840Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
19850Sstevel@tonic-gate 	/*
19865694Sjprakash 	 * Make sure there is a preallocated T_unbind_req message
19875694Sjprakash 	 * before any binding. This message is allocated when the
19885694Sjprakash 	 * socket is created. Since another thread can consume
19895694Sjprakash 	 * so_unbind_mp by the time we return from so_lock_single(),
19905694Sjprakash 	 * we should check the availability of so_unbind_mp after
19915694Sjprakash 	 * we return from so_lock_single().
19920Sstevel@tonic-gate 	 */
19935694Sjprakash 
19945694Sjprakash 	so_lock_single(so);	/* Set SOLOCKED */
19955694Sjprakash 	need_unlock = B_TRUE;
19965694Sjprakash 
19970Sstevel@tonic-gate 	if (so->so_unbind_mp == NULL) {
19980Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
19990Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
20000Sstevel@tonic-gate 		so->so_unbind_mp =
20010Sstevel@tonic-gate 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR);
20020Sstevel@tonic-gate 		if (so->so_unbind_mp == NULL) {
20030Sstevel@tonic-gate 			error = EINTR;
20040Sstevel@tonic-gate 			goto done;
20050Sstevel@tonic-gate 		}
20060Sstevel@tonic-gate 	}
20070Sstevel@tonic-gate 
20080Sstevel@tonic-gate 	/*
20090Sstevel@tonic-gate 	 * Can't have done a listen before connecting.
20100Sstevel@tonic-gate 	 */
20110Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN) {
20120Sstevel@tonic-gate 		error = EOPNOTSUPP;
20130Sstevel@tonic-gate 		goto done;
20140Sstevel@tonic-gate 	}
20150Sstevel@tonic-gate 
20160Sstevel@tonic-gate 	/*
20170Sstevel@tonic-gate 	 * Must be bound with the transport
20180Sstevel@tonic-gate 	 */
20190Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
20200Sstevel@tonic-gate 		if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
20210Sstevel@tonic-gate 		    /*CONSTCOND*/
20220Sstevel@tonic-gate 		    so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
20230Sstevel@tonic-gate 			/*
20240Sstevel@tonic-gate 			 * Optimization for AF_INET{,6} transports
20250Sstevel@tonic-gate 			 * that can handle a T_CONN_REQ without being bound.
20260Sstevel@tonic-gate 			 */
20270Sstevel@tonic-gate 			so_automatic_bind(so);
20280Sstevel@tonic-gate 		} else {
20290Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
20300Sstevel@tonic-gate 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
20310Sstevel@tonic-gate 			if (error)
20320Sstevel@tonic-gate 				goto done;
20330Sstevel@tonic-gate 		}
20340Sstevel@tonic-gate 		ASSERT(so->so_state & SS_ISBOUND);
20350Sstevel@tonic-gate 		flags |= _SOCONNECT_DID_BIND;
20360Sstevel@tonic-gate 	}
20370Sstevel@tonic-gate 
20380Sstevel@tonic-gate 	/*
20390Sstevel@tonic-gate 	 * Handle a connect to a name parameter of type AF_UNSPEC like a
20400Sstevel@tonic-gate 	 * connect to a null address. This is the portable method to
20410Sstevel@tonic-gate 	 * unconnect a socket.
20420Sstevel@tonic-gate 	 */
20430Sstevel@tonic-gate 	if ((namelen >= sizeof (sa_family_t)) &&
20440Sstevel@tonic-gate 	    (name->sa_family == AF_UNSPEC)) {
20450Sstevel@tonic-gate 		name = NULL;
20460Sstevel@tonic-gate 		namelen = 0;
20470Sstevel@tonic-gate 	}
20480Sstevel@tonic-gate 
20490Sstevel@tonic-gate 	/*
20500Sstevel@tonic-gate 	 * Check that we are not already connected.
20510Sstevel@tonic-gate 	 * A connection-oriented socket cannot be reconnected.
20520Sstevel@tonic-gate 	 * A connected connection-less socket can be
20530Sstevel@tonic-gate 	 * - connected to a different address by a subsequent connect
20540Sstevel@tonic-gate 	 * - "unconnected" by a connect to the NULL address
20550Sstevel@tonic-gate 	 */
20560Sstevel@tonic-gate 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
20570Sstevel@tonic-gate 		ASSERT(!(flags & _SOCONNECT_DID_BIND));
20580Sstevel@tonic-gate 		if (so->so_mode & SM_CONNREQUIRED) {
20590Sstevel@tonic-gate 			/* Connection-oriented socket */
20600Sstevel@tonic-gate 			error = so->so_state & SS_ISCONNECTED ?
20610Sstevel@tonic-gate 			    EISCONN : EALREADY;
20620Sstevel@tonic-gate 			goto done;
20630Sstevel@tonic-gate 		}
20640Sstevel@tonic-gate 		/* Connection-less socket */
20650Sstevel@tonic-gate 		if (name == NULL) {
20660Sstevel@tonic-gate 			/*
20670Sstevel@tonic-gate 			 * Remove the connected state and clear SO_DGRAM_ERRIND
20680Sstevel@tonic-gate 			 * since it was set when the socket was connected.
20690Sstevel@tonic-gate 			 * If this is UDP also send down a T_DISCON_REQ.
20700Sstevel@tonic-gate 			 */
20710Sstevel@tonic-gate 			int val;
20720Sstevel@tonic-gate 
20730Sstevel@tonic-gate 			if ((so->so_family == AF_INET ||
20745240Snordmark 			    so->so_family == AF_INET6) &&
20750Sstevel@tonic-gate 			    (so->so_type == SOCK_DGRAM ||
20765240Snordmark 			    so->so_type == SOCK_RAW) &&
20770Sstevel@tonic-gate 			    /*CONSTCOND*/
20780Sstevel@tonic-gate 			    !soconnect_tpi_udp) {
20790Sstevel@tonic-gate 				/* XXX What about implicitly unbinding here? */
20800Sstevel@tonic-gate 				error = sodisconnect(so, -1,
20815240Snordmark 				    _SODISCONNECT_LOCK_HELD);
20820Sstevel@tonic-gate 			} else {
20830Sstevel@tonic-gate 				so->so_state &=
20840Sstevel@tonic-gate 				    ~(SS_ISCONNECTED | SS_ISCONNECTING |
20850Sstevel@tonic-gate 				    SS_FADDR_VALID);
20860Sstevel@tonic-gate 				so->so_faddr_len = 0;
20870Sstevel@tonic-gate 			}
20880Sstevel@tonic-gate 
20890Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
20900Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
20910Sstevel@tonic-gate 
20920Sstevel@tonic-gate 			val = 0;
20930Sstevel@tonic-gate 			(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
20945240Snordmark 			    &val, (t_uscalar_t)sizeof (val));
20950Sstevel@tonic-gate 
20960Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
20970Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
20980Sstevel@tonic-gate 			goto done;
20990Sstevel@tonic-gate 		}
21000Sstevel@tonic-gate 	}
21010Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ISBOUND);
21020Sstevel@tonic-gate 
21030Sstevel@tonic-gate 	if (name == NULL || namelen == 0) {
21040Sstevel@tonic-gate 		error = EINVAL;
21050Sstevel@tonic-gate 		goto done;
21060Sstevel@tonic-gate 	}
21070Sstevel@tonic-gate 	/*
21080Sstevel@tonic-gate 	 * Mark the socket if so_faddr_sa represents the transport level
21090Sstevel@tonic-gate 	 * address.
21100Sstevel@tonic-gate 	 */
21110Sstevel@tonic-gate 	if (flags & _SOCONNECT_NOXLATE) {
21120Sstevel@tonic-gate 		struct sockaddr_ux	*soaddr_ux;
21130Sstevel@tonic-gate 
21140Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
21150Sstevel@tonic-gate 		if (namelen != sizeof (struct sockaddr_ux)) {
21160Sstevel@tonic-gate 			error = EINVAL;
21170Sstevel@tonic-gate 			goto done;
21180Sstevel@tonic-gate 		}
21190Sstevel@tonic-gate 		soaddr_ux = (struct sockaddr_ux *)name;
21200Sstevel@tonic-gate 		name = (struct sockaddr *)&soaddr_ux->sou_addr;
21210Sstevel@tonic-gate 		namelen = sizeof (soaddr_ux->sou_addr);
21220Sstevel@tonic-gate 		so->so_state |= SS_FADDR_NOXLATE;
21230Sstevel@tonic-gate 	}
21240Sstevel@tonic-gate 
21250Sstevel@tonic-gate 	/*
21260Sstevel@tonic-gate 	 * Length and family checks.
21270Sstevel@tonic-gate 	 */
21280Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
21290Sstevel@tonic-gate 	if (error)
21300Sstevel@tonic-gate 		goto bad;
21310Sstevel@tonic-gate 
21320Sstevel@tonic-gate 	/*
21330Sstevel@tonic-gate 	 * Save foreign address. Needed for AF_UNIX as well as
21340Sstevel@tonic-gate 	 * transport providers that do not support TI_GETPEERNAME.
21350Sstevel@tonic-gate 	 * Also used for cached foreign address for TCP and UDP.
21360Sstevel@tonic-gate 	 */
21370Sstevel@tonic-gate 	if (namelen > (t_uscalar_t)so->so_faddr_maxlen) {
21380Sstevel@tonic-gate 		error = EINVAL;
21390Sstevel@tonic-gate 		goto done;
21400Sstevel@tonic-gate 	}
21410Sstevel@tonic-gate 	so->so_faddr_len = (socklen_t)namelen;
21420Sstevel@tonic-gate 	ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
21430Sstevel@tonic-gate 	bcopy(name, so->so_faddr_sa, namelen);
21440Sstevel@tonic-gate 	so->so_state |= SS_FADDR_VALID;
21450Sstevel@tonic-gate 
21460Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
21470Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
21480Sstevel@tonic-gate 			/*
21490Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
21500Sstevel@tonic-gate 			 * pass any (transport internal) source address.
21510Sstevel@tonic-gate 			 */
21520Sstevel@tonic-gate 			addr = so->so_faddr_sa;
21530Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_faddr_len;
21540Sstevel@tonic-gate 			src = NULL;
21550Sstevel@tonic-gate 			srclen = 0;
21560Sstevel@tonic-gate 		} else {
21570Sstevel@tonic-gate 			/*
21580Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
21590Sstevel@tonic-gate 			 * and translate the remote address.
21600Sstevel@tonic-gate 			 * Holding so_lock thus so_laddr_sa can not change.
21610Sstevel@tonic-gate 			 */
21620Sstevel@tonic-gate 			src = so->so_laddr_sa;
21630Sstevel@tonic-gate 			srclen = (t_uscalar_t)so->so_laddr_len;
21640Sstevel@tonic-gate 			dprintso(so, 1,
21655240Snordmark 			    ("sotpi_connect UNIX: srclen %d, src %p\n",
21665240Snordmark 			    srclen, src));
21670Sstevel@tonic-gate 			error = so_ux_addr_xlate(so,
21685240Snordmark 			    so->so_faddr_sa, (socklen_t)so->so_faddr_len,
21695240Snordmark 			    (flags & _SOCONNECT_XPG4_2),
21705240Snordmark 			    &addr, &addrlen);
21710Sstevel@tonic-gate 			if (error)
21720Sstevel@tonic-gate 				goto bad;
21730Sstevel@tonic-gate 		}
21740Sstevel@tonic-gate 	} else {
21750Sstevel@tonic-gate 		addr = so->so_faddr_sa;
21760Sstevel@tonic-gate 		addrlen = (t_uscalar_t)so->so_faddr_len;
21770Sstevel@tonic-gate 		src = NULL;
21780Sstevel@tonic-gate 		srclen = 0;
21790Sstevel@tonic-gate 	}
21800Sstevel@tonic-gate 	/*
21810Sstevel@tonic-gate 	 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
21820Sstevel@tonic-gate 	 * option which asks the transport provider to send T_UDERR_IND
21830Sstevel@tonic-gate 	 * messages. These T_UDERR_IND messages are used to return connected
21840Sstevel@tonic-gate 	 * style errors (e.g. ECONNRESET) for connected datagram sockets.
21850Sstevel@tonic-gate 	 *
21860Sstevel@tonic-gate 	 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
21870Sstevel@tonic-gate 	 * we send down a T_CONN_REQ. This is needed to let the
21880Sstevel@tonic-gate 	 * transport assign a local address that is consistent with
21890Sstevel@tonic-gate 	 * the remote address. Applications depend on a getsockname()
21900Sstevel@tonic-gate 	 * after a connect() to retrieve the "source" IP address for
21910Sstevel@tonic-gate 	 * the connected socket.  Invalidate the cached local address
21920Sstevel@tonic-gate 	 * to force getsockname() to enquire of the transport.
21930Sstevel@tonic-gate 	 */
21940Sstevel@tonic-gate 	if (!(so->so_mode & SM_CONNREQUIRED)) {
21950Sstevel@tonic-gate 		/*
21960Sstevel@tonic-gate 		 * Datagram socket.
21970Sstevel@tonic-gate 		 */
21980Sstevel@tonic-gate 		int32_t val;
21990Sstevel@tonic-gate 
22000Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
22010Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
22020Sstevel@tonic-gate 
22030Sstevel@tonic-gate 		val = 1;
22040Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
22055240Snordmark 		    &val, (t_uscalar_t)sizeof (val));
22060Sstevel@tonic-gate 
22070Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
22080Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
22090Sstevel@tonic-gate 		if ((so->so_family != AF_INET && so->so_family != AF_INET6) ||
22100Sstevel@tonic-gate 		    (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) ||
22110Sstevel@tonic-gate 		    soconnect_tpi_udp) {
22120Sstevel@tonic-gate 			soisconnected(so);
22130Sstevel@tonic-gate 			goto done;
22140Sstevel@tonic-gate 		}
22150Sstevel@tonic-gate 		/*
22160Sstevel@tonic-gate 		 * Send down T_CONN_REQ etc.
22170Sstevel@tonic-gate 		 * Clear fflag to avoid returning EWOULDBLOCK.
22180Sstevel@tonic-gate 		 */
22190Sstevel@tonic-gate 		fflag = 0;
22200Sstevel@tonic-gate 		ASSERT(so->so_family != AF_UNIX);
22210Sstevel@tonic-gate 		so->so_state &= ~SS_LADDR_VALID;
22220Sstevel@tonic-gate 	} else if (so->so_laddr_len != 0) {
22230Sstevel@tonic-gate 		/*
22240Sstevel@tonic-gate 		 * If the local address or port was "any" then it may be
22250Sstevel@tonic-gate 		 * changed by the transport as a result of the
22260Sstevel@tonic-gate 		 * connect.  Invalidate the cached version if we have one.
22270Sstevel@tonic-gate 		 */
22280Sstevel@tonic-gate 		switch (so->so_family) {
22290Sstevel@tonic-gate 		case AF_INET:
22300Sstevel@tonic-gate 			ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t));
22310Sstevel@tonic-gate 			if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr ==
22320Sstevel@tonic-gate 			    INADDR_ANY ||
22330Sstevel@tonic-gate 			    ((sin_t *)so->so_laddr_sa)->sin_port == 0)
22340Sstevel@tonic-gate 				so->so_state &= ~SS_LADDR_VALID;
22350Sstevel@tonic-gate 			break;
22360Sstevel@tonic-gate 
22370Sstevel@tonic-gate 		case AF_INET6:
22380Sstevel@tonic-gate 			ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t));
22390Sstevel@tonic-gate 			if (IN6_IS_ADDR_UNSPECIFIED(
22400Sstevel@tonic-gate 			    &((sin6_t *)so->so_laddr_sa) ->sin6_addr) ||
22410Sstevel@tonic-gate 			    IN6_IS_ADDR_V4MAPPED_ANY(
22420Sstevel@tonic-gate 			    &((sin6_t *)so->so_laddr_sa)->sin6_addr) ||
22430Sstevel@tonic-gate 			    ((sin6_t *)so->so_laddr_sa)->sin6_port == 0)
22445240Snordmark 				so->so_state &= ~SS_LADDR_VALID;
22450Sstevel@tonic-gate 			break;
22460Sstevel@tonic-gate 
22470Sstevel@tonic-gate 		default:
22480Sstevel@tonic-gate 			break;
22490Sstevel@tonic-gate 		}
22500Sstevel@tonic-gate 	}
22510Sstevel@tonic-gate 
22520Sstevel@tonic-gate 	/*
22530Sstevel@tonic-gate 	 * Check for failure of an earlier call
22540Sstevel@tonic-gate 	 */
22550Sstevel@tonic-gate 	if (so->so_error != 0)
22560Sstevel@tonic-gate 		goto so_bad;
22570Sstevel@tonic-gate 
22580Sstevel@tonic-gate 	/*
22590Sstevel@tonic-gate 	 * Send down T_CONN_REQ. Message was allocated above.
22600Sstevel@tonic-gate 	 */
22610Sstevel@tonic-gate 	conn_req.PRIM_type = T_CONN_REQ;
22620Sstevel@tonic-gate 	conn_req.DEST_length = addrlen;
22630Sstevel@tonic-gate 	conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req);
22640Sstevel@tonic-gate 	if (srclen == 0) {
22650Sstevel@tonic-gate 		conn_req.OPT_length = 0;
22660Sstevel@tonic-gate 		conn_req.OPT_offset = 0;
22670Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
22680Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
22690Sstevel@tonic-gate 	} else {
22700Sstevel@tonic-gate 		/*
22710Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
22720Sstevel@tonic-gate 		 * address option.
22730Sstevel@tonic-gate 		 */
22740Sstevel@tonic-gate 		struct T_opthdr toh;
22750Sstevel@tonic-gate 
22760Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
22770Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
22780Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
22790Sstevel@tonic-gate 		toh.status = 0;
22800Sstevel@tonic-gate 		conn_req.OPT_length =
22815240Snordmark 		    (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen));
22820Sstevel@tonic-gate 		conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) +
22835240Snordmark 		    _TPI_ALIGN_TOPT(addrlen));
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
22860Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
22870Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
22880Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
22890Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
22900Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
22910Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
22920Sstevel@tonic-gate 	}
22930Sstevel@tonic-gate 	/*
22940Sstevel@tonic-gate 	 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
22950Sstevel@tonic-gate 	 * in order to have the right state when the T_CONN_CON shows up.
22960Sstevel@tonic-gate 	 */
22970Sstevel@tonic-gate 	soisconnecting(so);
22980Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
22990Sstevel@tonic-gate 
23000Sstevel@tonic-gate 	if (audit_active)
23010Sstevel@tonic-gate 		audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0);
23020Sstevel@tonic-gate 
23030Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
23045240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
23050Sstevel@tonic-gate 	mp = NULL;
23060Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
23070Sstevel@tonic-gate 	if (error != 0)
23080Sstevel@tonic-gate 		goto bad;
23090Sstevel@tonic-gate 
23100Sstevel@tonic-gate 	if ((error = sowaitokack(so, T_CONN_REQ)) != 0)
23110Sstevel@tonic-gate 		goto bad;
23120Sstevel@tonic-gate 
23130Sstevel@tonic-gate 	/* Allow other threads to access the socket */
23140Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
23150Sstevel@tonic-gate 	need_unlock = B_FALSE;
23160Sstevel@tonic-gate 
23170Sstevel@tonic-gate 	/*
23180Sstevel@tonic-gate 	 * Wait until we get a T_CONN_CON or an error
23190Sstevel@tonic-gate 	 */
23200Sstevel@tonic-gate 	if ((error = sowaitconnected(so, fflag, 0)) != 0) {
23210Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
23220Sstevel@tonic-gate 		need_unlock = B_TRUE;
23230Sstevel@tonic-gate 	}
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate done:
23260Sstevel@tonic-gate 	freemsg(mp);
23270Sstevel@tonic-gate 	switch (error) {
23280Sstevel@tonic-gate 	case EINPROGRESS:
23290Sstevel@tonic-gate 	case EALREADY:
23300Sstevel@tonic-gate 	case EISCONN:
23310Sstevel@tonic-gate 	case EINTR:
23320Sstevel@tonic-gate 		/* Non-fatal errors */
23330Sstevel@tonic-gate 		so->so_state &= ~SS_LADDR_VALID;
23340Sstevel@tonic-gate 		/* FALLTHRU */
23350Sstevel@tonic-gate 	case 0:
23360Sstevel@tonic-gate 		break;
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate 	case EHOSTUNREACH:
23390Sstevel@tonic-gate 		if (flags & _SOCONNECT_XPG4_2) {
23400Sstevel@tonic-gate 			/*
23410Sstevel@tonic-gate 			 * X/Open specification contains a requirement that
23420Sstevel@tonic-gate 			 * ENETUNREACH be returned but does not require
23430Sstevel@tonic-gate 			 * EHOSTUNREACH. In order to keep the test suite
23440Sstevel@tonic-gate 			 * happy we mess with the errno here.
23450Sstevel@tonic-gate 			 */
23460Sstevel@tonic-gate 			error = ENETUNREACH;
23470Sstevel@tonic-gate 		}
23480Sstevel@tonic-gate 		/* FALLTHRU */
23490Sstevel@tonic-gate 
23500Sstevel@tonic-gate 	default:
23510Sstevel@tonic-gate 		ASSERT(need_unlock);
23520Sstevel@tonic-gate 		/*
23530Sstevel@tonic-gate 		 * Fatal errors: clear SS_ISCONNECTING in case it was set,
23540Sstevel@tonic-gate 		 * and invalidate local-address cache
23550Sstevel@tonic-gate 		 */
23560Sstevel@tonic-gate 		so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID);
23570Sstevel@tonic-gate 		/* A discon_ind might have already unbound us */
23580Sstevel@tonic-gate 		if ((flags & _SOCONNECT_DID_BIND) &&
23590Sstevel@tonic-gate 		    (so->so_state & SS_ISBOUND)) {
23600Sstevel@tonic-gate 			int err;
23610Sstevel@tonic-gate 
23620Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
23630Sstevel@tonic-gate 			/* LINTED - statement has no conseq */
23640Sstevel@tonic-gate 			if (err) {
23650Sstevel@tonic-gate 				eprintsoline(so, err);
23660Sstevel@tonic-gate 			}
23670Sstevel@tonic-gate 		}
23680Sstevel@tonic-gate 		break;
23690Sstevel@tonic-gate 	}
23700Sstevel@tonic-gate 	if (need_unlock)
23710Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
23720Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
23730Sstevel@tonic-gate 	return (error);
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate so_bad:	error = sogeterr(so);
23760Sstevel@tonic-gate bad:	eprintsoline(so, error);
23770Sstevel@tonic-gate 	goto done;
23780Sstevel@tonic-gate }
23790Sstevel@tonic-gate 
23800Sstevel@tonic-gate int
23810Sstevel@tonic-gate sotpi_shutdown(struct sonode *so, int how)
23820Sstevel@tonic-gate {
23830Sstevel@tonic-gate 	struct T_ordrel_req	ordrel_req;
23840Sstevel@tonic-gate 	mblk_t			*mp;
23850Sstevel@tonic-gate 	uint_t			old_state, state_change;
23860Sstevel@tonic-gate 	int			error = 0;
23870Sstevel@tonic-gate 
23880Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
23897240Srh87107 	    (void *)so, how, pr_state(so->so_state, so->so_mode)));
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
23920Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
23930Sstevel@tonic-gate 
23940Sstevel@tonic-gate 	/*
23950Sstevel@tonic-gate 	 * SunOS 4.X has no check for datagram sockets.
23960Sstevel@tonic-gate 	 * 5.X checks that it is connected (ENOTCONN)
23970Sstevel@tonic-gate 	 * X/Open requires that we check the connected state.
23980Sstevel@tonic-gate 	 */
23990Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
24000Sstevel@tonic-gate 		if (!xnet_skip_checks) {
24010Sstevel@tonic-gate 			error = ENOTCONN;
24020Sstevel@tonic-gate 			if (xnet_check_print) {
24030Sstevel@tonic-gate 				printf("sockfs: X/Open shutdown check "
24045240Snordmark 				    "caused ENOTCONN\n");
24050Sstevel@tonic-gate 			}
24060Sstevel@tonic-gate 		}
24070Sstevel@tonic-gate 		goto done;
24080Sstevel@tonic-gate 	}
24090Sstevel@tonic-gate 	/*
24100Sstevel@tonic-gate 	 * Record the current state and then perform any state changes.
24110Sstevel@tonic-gate 	 * Then use the difference between the old and new states to
24120Sstevel@tonic-gate 	 * determine which messages need to be sent.
24130Sstevel@tonic-gate 	 * This prevents e.g. duplicate T_ORDREL_REQ when there are
24140Sstevel@tonic-gate 	 * duplicate calls to shutdown().
24150Sstevel@tonic-gate 	 */
24160Sstevel@tonic-gate 	old_state = so->so_state;
24170Sstevel@tonic-gate 
24180Sstevel@tonic-gate 	switch (how) {
24190Sstevel@tonic-gate 	case 0:
24200Sstevel@tonic-gate 		socantrcvmore(so);
24210Sstevel@tonic-gate 		break;
24220Sstevel@tonic-gate 	case 1:
24230Sstevel@tonic-gate 		socantsendmore(so);
24240Sstevel@tonic-gate 		break;
24250Sstevel@tonic-gate 	case 2:
24260Sstevel@tonic-gate 		socantsendmore(so);
24270Sstevel@tonic-gate 		socantrcvmore(so);
24280Sstevel@tonic-gate 		break;
24290Sstevel@tonic-gate 	default:
24300Sstevel@tonic-gate 		error = EINVAL;
24310Sstevel@tonic-gate 		goto done;
24320Sstevel@tonic-gate 	}
24330Sstevel@tonic-gate 
24340Sstevel@tonic-gate 	/*
24350Sstevel@tonic-gate 	 * Assumes that the SS_CANT* flags are never cleared in the above code.
24360Sstevel@tonic-gate 	 */
24370Sstevel@tonic-gate 	state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) -
24385240Snordmark 	    (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE));
24390Sstevel@tonic-gate 	ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0);
24400Sstevel@tonic-gate 
24410Sstevel@tonic-gate 	switch (state_change) {
24420Sstevel@tonic-gate 	case 0:
24430Sstevel@tonic-gate 		dprintso(so, 1,
24440Sstevel@tonic-gate 		    ("sotpi_shutdown: nothing to send in state 0x%x\n",
24450Sstevel@tonic-gate 		    so->so_state));
24460Sstevel@tonic-gate 		goto done;
24470Sstevel@tonic-gate 
24480Sstevel@tonic-gate 	case SS_CANTRCVMORE:
24490Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24500Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
24510Sstevel@tonic-gate 		/*
24520Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
24530Sstevel@tonic-gate 		 * pollwakeups, and signals.
24540Sstevel@tonic-gate 		 */
24550Sstevel@tonic-gate 		/*
24560Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
24570Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
24580Sstevel@tonic-gate 		 */
24590Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24600Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
24610Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24620Sstevel@tonic-gate 
24630Sstevel@tonic-gate 		/* Flush read side queue */
24640Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
24650Sstevel@tonic-gate 
24660Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24670Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
24680Sstevel@tonic-gate 		break;
24690Sstevel@tonic-gate 
24700Sstevel@tonic-gate 	case SS_CANTSENDMORE:
24710Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24720Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
24730Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24740Sstevel@tonic-gate 		break;
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	case SS_CANTSENDMORE|SS_CANTRCVMORE:
24770Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24780Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
24790Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
24800Sstevel@tonic-gate 		/*
24810Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
24820Sstevel@tonic-gate 		 * pollwakeups, and signals.
24830Sstevel@tonic-gate 		 */
24840Sstevel@tonic-gate 		/*
24850Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
24860Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
24870Sstevel@tonic-gate 		 */
24880Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24890Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
24900Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24910Sstevel@tonic-gate 
24920Sstevel@tonic-gate 		/* Flush read side queue */
24930Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24960Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
24970Sstevel@tonic-gate 		break;
24980Sstevel@tonic-gate 	}
24990Sstevel@tonic-gate 
25000Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
25010Sstevel@tonic-gate 
25020Sstevel@tonic-gate 	/*
25030Sstevel@tonic-gate 	 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
25040Sstevel@tonic-gate 	 * was set due to this call and the new state has both of them set:
25050Sstevel@tonic-gate 	 *	Send the AF_UNIX close indication
25060Sstevel@tonic-gate 	 *	For T_COTS send a discon_ind
25070Sstevel@tonic-gate 	 *
25080Sstevel@tonic-gate 	 * If cantsend was set due to this call:
25090Sstevel@tonic-gate 	 *	For T_COTSORD send an ordrel_ind
25100Sstevel@tonic-gate 	 *
25110Sstevel@tonic-gate 	 * Note that for T_CLTS there is no message sent here.
25120Sstevel@tonic-gate 	 */
25130Sstevel@tonic-gate 	if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
25140Sstevel@tonic-gate 	    (SS_CANTRCVMORE|SS_CANTSENDMORE)) {
25150Sstevel@tonic-gate 		/*
25160Sstevel@tonic-gate 		 * For SunOS 4.X compatibility we tell the other end
25170Sstevel@tonic-gate 		 * that we are unable to receive at this point.
25180Sstevel@tonic-gate 		 */
25190Sstevel@tonic-gate 		if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS)
25200Sstevel@tonic-gate 			so_unix_close(so);
25210Sstevel@tonic-gate 
25220Sstevel@tonic-gate 		if (so->so_serv_type == T_COTS)
25230Sstevel@tonic-gate 			error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
25240Sstevel@tonic-gate 	}
25250Sstevel@tonic-gate 	if ((state_change & SS_CANTSENDMORE) &&
25260Sstevel@tonic-gate 	    (so->so_serv_type == T_COTS_ORD)) {
25270Sstevel@tonic-gate 		/* Send an orderly release */
25280Sstevel@tonic-gate 		ordrel_req.PRIM_type = T_ORDREL_REQ;
25290Sstevel@tonic-gate 
25300Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
25310Sstevel@tonic-gate 		mp = soallocproto1(&ordrel_req, sizeof (ordrel_req),
25320Sstevel@tonic-gate 		    0, _ALLOC_SLEEP);
25330Sstevel@tonic-gate 		/*
25340Sstevel@tonic-gate 		 * Send down the T_ORDREL_REQ even if there is flow control.
25350Sstevel@tonic-gate 		 * This prevents shutdown from blocking.
25360Sstevel@tonic-gate 		 * Note that there is no T_OK_ACK for ordrel_req.
25370Sstevel@tonic-gate 		 */
25380Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
25395240Snordmark 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
25400Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
25410Sstevel@tonic-gate 		if (error) {
25420Sstevel@tonic-gate 			eprintsoline(so, error);
25430Sstevel@tonic-gate 			goto done;
25440Sstevel@tonic-gate 		}
25450Sstevel@tonic-gate 	}
25460Sstevel@tonic-gate 
25470Sstevel@tonic-gate done:
25480Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
25490Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
25500Sstevel@tonic-gate 	return (error);
25510Sstevel@tonic-gate }
25520Sstevel@tonic-gate 
25530Sstevel@tonic-gate /*
25540Sstevel@tonic-gate  * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
25550Sstevel@tonic-gate  * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
25560Sstevel@tonic-gate  * that we have closed.
25570Sstevel@tonic-gate  * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
25580Sstevel@tonic-gate  * T_UNITDATA_REQ containing the same option.
25590Sstevel@tonic-gate  *
25600Sstevel@tonic-gate  * For SOCK_DGRAM half-connections (somebody connected to this end
25610Sstevel@tonic-gate  * but this end is not connect) we don't know where to send any
25620Sstevel@tonic-gate  * SO_UNIX_CLOSE.
25630Sstevel@tonic-gate  *
25640Sstevel@tonic-gate  * We have to ignore stream head errors just in case there has been
25650Sstevel@tonic-gate  * a shutdown(output).
25660Sstevel@tonic-gate  * Ignore any flow control to try to get the message more quickly to the peer.
25670Sstevel@tonic-gate  * While locally ignoring flow control solves the problem when there
25680Sstevel@tonic-gate  * is only the loopback transport on the stream it would not provide
25690Sstevel@tonic-gate  * the correct AF_UNIX socket semantics when one or more modules have
25700Sstevel@tonic-gate  * been pushed.
25710Sstevel@tonic-gate  */
25720Sstevel@tonic-gate void
25730Sstevel@tonic-gate so_unix_close(struct sonode *so)
25740Sstevel@tonic-gate {
25750Sstevel@tonic-gate 	int		error;
25760Sstevel@tonic-gate 	struct T_opthdr	toh;
25770Sstevel@tonic-gate 	mblk_t		*mp;
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
25800Sstevel@tonic-gate 
25810Sstevel@tonic-gate 	ASSERT(so->so_family == AF_UNIX);
25820Sstevel@tonic-gate 
25830Sstevel@tonic-gate 	if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
25840Sstevel@tonic-gate 	    (SS_ISCONNECTED|SS_ISBOUND))
25850Sstevel@tonic-gate 		return;
25860Sstevel@tonic-gate 
25870Sstevel@tonic-gate 	dprintso(so, 1, ("so_unix_close(%p) %s\n",
25887240Srh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
25890Sstevel@tonic-gate 
25900Sstevel@tonic-gate 	toh.level = SOL_SOCKET;
25910Sstevel@tonic-gate 	toh.name = SO_UNIX_CLOSE;
25920Sstevel@tonic-gate 
25930Sstevel@tonic-gate 	/* zero length + header */
25940Sstevel@tonic-gate 	toh.len = (t_uscalar_t)sizeof (struct T_opthdr);
25950Sstevel@tonic-gate 	toh.status = 0;
25960Sstevel@tonic-gate 
25970Sstevel@tonic-gate 	if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) {
25980Sstevel@tonic-gate 		struct T_optdata_req tdr;
25990Sstevel@tonic-gate 
26000Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
26010Sstevel@tonic-gate 		tdr.DATA_flag = 0;
26020Sstevel@tonic-gate 
26030Sstevel@tonic-gate 		tdr.OPT_length = (t_scalar_t)sizeof (toh);
26040Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
26050Sstevel@tonic-gate 
26060Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
26070Sstevel@tonic-gate 		mp = soallocproto2(&tdr, sizeof (tdr),
26080Sstevel@tonic-gate 		    &toh, sizeof (toh), 0, _ALLOC_SLEEP);
26090Sstevel@tonic-gate 	} else {
26100Sstevel@tonic-gate 		struct T_unitdata_req	tudr;
26110Sstevel@tonic-gate 		void			*addr;
26120Sstevel@tonic-gate 		socklen_t		addrlen;
26130Sstevel@tonic-gate 		void			*src;
26140Sstevel@tonic-gate 		socklen_t		srclen;
26150Sstevel@tonic-gate 		struct T_opthdr		toh2;
26160Sstevel@tonic-gate 		t_scalar_t		size;
26170Sstevel@tonic-gate 
26180Sstevel@tonic-gate 		/* Connecteded DGRAM socket */
26190Sstevel@tonic-gate 
26200Sstevel@tonic-gate 		/*
26210Sstevel@tonic-gate 		 * For AF_UNIX the destination address is translated to
26220Sstevel@tonic-gate 		 * an internal name and the source address is passed as
26230Sstevel@tonic-gate 		 * an option.
26240Sstevel@tonic-gate 		 */
26250Sstevel@tonic-gate 		/*
26260Sstevel@tonic-gate 		 * Length and family checks.
26270Sstevel@tonic-gate 		 */
26280Sstevel@tonic-gate 		error = so_addr_verify(so, so->so_faddr_sa,
26295240Snordmark 		    (t_uscalar_t)so->so_faddr_len);
26300Sstevel@tonic-gate 		if (error) {
26310Sstevel@tonic-gate 			eprintsoline(so, error);
26320Sstevel@tonic-gate 			return;
26330Sstevel@tonic-gate 		}
26340Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
26350Sstevel@tonic-gate 			/*
26360Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
26370Sstevel@tonic-gate 			 * pass any (transport internal) source address.
26380Sstevel@tonic-gate 			 */
26390Sstevel@tonic-gate 			addr = so->so_faddr_sa;
26400Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_faddr_len;
26410Sstevel@tonic-gate 			src = NULL;
26420Sstevel@tonic-gate 			srclen = 0;
26430Sstevel@tonic-gate 		} else {
26440Sstevel@tonic-gate 			/*
26450Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
26460Sstevel@tonic-gate 			 * and translate the remote address.
26470Sstevel@tonic-gate 			 * Holding so_lock thus so_laddr_sa can not change.
26480Sstevel@tonic-gate 			 */
26490Sstevel@tonic-gate 			src = so->so_laddr_sa;
26500Sstevel@tonic-gate 			srclen = (socklen_t)so->so_laddr_len;
26510Sstevel@tonic-gate 			dprintso(so, 1,
26525240Snordmark 			    ("so_ux_close: srclen %d, src %p\n",
26535240Snordmark 			    srclen, src));
26540Sstevel@tonic-gate 			error = so_ux_addr_xlate(so,
26555240Snordmark 			    so->so_faddr_sa,
26565240Snordmark 			    (socklen_t)so->so_faddr_len, 0,
26575240Snordmark 			    &addr, &addrlen);
26580Sstevel@tonic-gate 			if (error) {
26590Sstevel@tonic-gate 				eprintsoline(so, error);
26600Sstevel@tonic-gate 				return;
26610Sstevel@tonic-gate 			}
26620Sstevel@tonic-gate 		}
26630Sstevel@tonic-gate 		tudr.PRIM_type = T_UNITDATA_REQ;
26640Sstevel@tonic-gate 		tudr.DEST_length = addrlen;
26650Sstevel@tonic-gate 		tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
26660Sstevel@tonic-gate 		if (srclen == 0) {
26670Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)sizeof (toh);
26680Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
26695240Snordmark 			    _TPI_ALIGN_TOPT(addrlen));
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
26720Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
26730Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
26740Sstevel@tonic-gate 			    addr, addrlen, size, _ALLOC_SLEEP);
26750Sstevel@tonic-gate 			mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen);
26760Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
26770Sstevel@tonic-gate 		} else {
26780Sstevel@tonic-gate 			/*
26790Sstevel@tonic-gate 			 * There is a AF_UNIX sockaddr_un to include as a
26800Sstevel@tonic-gate 			 * source address option.
26810Sstevel@tonic-gate 			 */
26820Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) +
26830Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(srclen));
26840Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
26850Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(addrlen));
26860Sstevel@tonic-gate 
26870Sstevel@tonic-gate 			toh2.level = SOL_SOCKET;
26880Sstevel@tonic-gate 			toh2.name = SO_SRCADDR;
26890Sstevel@tonic-gate 			toh2.len = (t_uscalar_t)(srclen +
26905240Snordmark 			    sizeof (struct T_opthdr));
26910Sstevel@tonic-gate 			toh2.status = 0;
26920Sstevel@tonic-gate 
26930Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
26940Sstevel@tonic-gate 
26950Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
26960Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
26970Sstevel@tonic-gate 			    addr, addrlen, size, _ALLOC_SLEEP);
26980Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
26990Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
27000Sstevel@tonic-gate 			soappendmsg(mp, &toh2, sizeof (toh2));
27010Sstevel@tonic-gate 			soappendmsg(mp, src, srclen);
27020Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
27030Sstevel@tonic-gate 		}
27040Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
27050Sstevel@tonic-gate 	}
27060Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
27070Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
27085240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
27090Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
27100Sstevel@tonic-gate }
27110Sstevel@tonic-gate 
27120Sstevel@tonic-gate /*
27130Sstevel@tonic-gate  * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK.
27140Sstevel@tonic-gate  */
27150Sstevel@tonic-gate int
27160Sstevel@tonic-gate sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags)
27170Sstevel@tonic-gate {
27180Sstevel@tonic-gate 	mblk_t		*mp, *nmp;
27190Sstevel@tonic-gate 	int		error;
27200Sstevel@tonic-gate 
27217240Srh87107 	dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n",
27227240Srh87107 	    (void *)so, (void *)msg, flags));
27230Sstevel@tonic-gate 
27240Sstevel@tonic-gate 	/*
27250Sstevel@tonic-gate 	 * There is never any oob data with addresses or control since
27260Sstevel@tonic-gate 	 * the T_EXDATA_IND does not carry any options.
27270Sstevel@tonic-gate 	 */
27280Sstevel@tonic-gate 	msg->msg_controllen = 0;
27290Sstevel@tonic-gate 	msg->msg_namelen = 0;
27300Sstevel@tonic-gate 
27310Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
27320Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
27330Sstevel@tonic-gate 	if ((so->so_options & SO_OOBINLINE) ||
27340Sstevel@tonic-gate 	    (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) {
27350Sstevel@tonic-gate 		dprintso(so, 1, ("sorecvoob: inline or data consumed\n"));
27360Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27370Sstevel@tonic-gate 		return (EINVAL);
27380Sstevel@tonic-gate 	}
27390Sstevel@tonic-gate 	if (!(so->so_state & SS_HAVEOOBDATA)) {
27400Sstevel@tonic-gate 		dprintso(so, 1, ("sorecvoob: no data yet\n"));
27410Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27420Sstevel@tonic-gate 		return (EWOULDBLOCK);
27430Sstevel@tonic-gate 	}
27440Sstevel@tonic-gate 	ASSERT(so->so_oobmsg != NULL);
27450Sstevel@tonic-gate 	mp = so->so_oobmsg;
27460Sstevel@tonic-gate 	if (flags & MSG_PEEK) {
27470Sstevel@tonic-gate 		/*
27480Sstevel@tonic-gate 		 * Since recv* can not return ENOBUFS we can not use dupmsg.
27490Sstevel@tonic-gate 		 * Instead we revert to the consolidation private
27500Sstevel@tonic-gate 		 * allocb_wait plus bcopy.
27510Sstevel@tonic-gate 		 */
27520Sstevel@tonic-gate 		mblk_t *mp1;
27530Sstevel@tonic-gate 
27540Sstevel@tonic-gate 		mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL);
27550Sstevel@tonic-gate 		ASSERT(mp1);
27560Sstevel@tonic-gate 
27570Sstevel@tonic-gate 		while (mp != NULL) {
27580Sstevel@tonic-gate 			ssize_t size;
27590Sstevel@tonic-gate 
27600Sstevel@tonic-gate 			size = MBLKL(mp);
27610Sstevel@tonic-gate 			bcopy(mp->b_rptr, mp1->b_wptr, size);
27620Sstevel@tonic-gate 			mp1->b_wptr += size;
27630Sstevel@tonic-gate 			ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim);
27640Sstevel@tonic-gate 			mp = mp->b_cont;
27650Sstevel@tonic-gate 		}
27660Sstevel@tonic-gate 		mp = mp1;
27670Sstevel@tonic-gate 	} else {
27680Sstevel@tonic-gate 		/*
27690Sstevel@tonic-gate 		 * Update the state indicating that the data has been consumed.
27700Sstevel@tonic-gate 		 * Keep SS_OOBPEND set until data is consumed past the mark.
27710Sstevel@tonic-gate 		 */
27720Sstevel@tonic-gate 		so->so_oobmsg = NULL;
27730Sstevel@tonic-gate 		so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA;
27740Sstevel@tonic-gate 	}
27750Sstevel@tonic-gate 	dprintso(so, 1,
27765240Snordmark 	    ("after recvoob(%p): counts %d/%d state %s\n",
27777240Srh87107 	    (void *)so, so->so_oobsigcnt,
27785240Snordmark 	    so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
27790Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
27800Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 	error = 0;
27830Sstevel@tonic-gate 	nmp = mp;
27840Sstevel@tonic-gate 	while (nmp != NULL && uiop->uio_resid > 0) {
27850Sstevel@tonic-gate 		ssize_t n = MBLKL(nmp);
27860Sstevel@tonic-gate 
27870Sstevel@tonic-gate 		n = MIN(n, uiop->uio_resid);
27880Sstevel@tonic-gate 		if (n > 0)
27890Sstevel@tonic-gate 			error = uiomove(nmp->b_rptr, n,
27905240Snordmark 			    UIO_READ, uiop);
27910Sstevel@tonic-gate 		if (error)
27920Sstevel@tonic-gate 			break;
27930Sstevel@tonic-gate 		nmp = nmp->b_cont;
27940Sstevel@tonic-gate 	}
27950Sstevel@tonic-gate 	freemsg(mp);
27960Sstevel@tonic-gate 	return (error);
27970Sstevel@tonic-gate }
27980Sstevel@tonic-gate 
27990Sstevel@tonic-gate /*
28000Sstevel@tonic-gate  * Called by sotpi_recvmsg when reading a non-zero amount of data.
28010Sstevel@tonic-gate  * In addition, the caller typically verifies that there is some
28020Sstevel@tonic-gate  * potential state to clear by checking
28030Sstevel@tonic-gate  *	if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
28040Sstevel@tonic-gate  * before calling this routine.
28050Sstevel@tonic-gate  * Note that such a check can be made without holding so_lock since
28060Sstevel@tonic-gate  * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
28070Sstevel@tonic-gate  * decrements so_oobsigcnt.
28080Sstevel@tonic-gate  *
28090Sstevel@tonic-gate  * When data is read *after* the point that all pending
28100Sstevel@tonic-gate  * oob data has been consumed the oob indication is cleared.
28110Sstevel@tonic-gate  *
28120Sstevel@tonic-gate  * This logic keeps select/poll returning POLLRDBAND and
28130Sstevel@tonic-gate  * SIOCATMARK returning true until we have read past
28140Sstevel@tonic-gate  * the mark.
28150Sstevel@tonic-gate  */
28160Sstevel@tonic-gate static void
28170Sstevel@tonic-gate sorecv_update_oobstate(struct sonode *so)
28180Sstevel@tonic-gate {
28190Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
28200Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
28210Sstevel@tonic-gate 	dprintso(so, 1,
28225240Snordmark 	    ("sorecv_update_oobstate: counts %d/%d state %s\n",
28235240Snordmark 	    so->so_oobsigcnt,
28245240Snordmark 	    so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
28250Sstevel@tonic-gate 	if (so->so_oobsigcnt == 0) {
28260Sstevel@tonic-gate 		/* No more pending oob indications */
28270Sstevel@tonic-gate 		so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
28280Sstevel@tonic-gate 		freemsg(so->so_oobmsg);
28290Sstevel@tonic-gate 		so->so_oobmsg = NULL;
28300Sstevel@tonic-gate 	}
28310Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
28320Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
28330Sstevel@tonic-gate }
28340Sstevel@tonic-gate 
28350Sstevel@tonic-gate /*
28360Sstevel@tonic-gate  * Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
28370Sstevel@tonic-gate  */
28380Sstevel@tonic-gate static int
28390Sstevel@tonic-gate nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
28400Sstevel@tonic-gate {
28410Sstevel@tonic-gate 	int	error = 0;
28420Sstevel@tonic-gate 	mblk_t *tmp = NULL;
28430Sstevel@tonic-gate 	mblk_t *pmp = NULL;
28440Sstevel@tonic-gate 	mblk_t *nmp = so->so_nl7c_rcv_mp;
28450Sstevel@tonic-gate 
28460Sstevel@tonic-gate 	ASSERT(nmp != NULL);
28470Sstevel@tonic-gate 
28480Sstevel@tonic-gate 	while (nmp != NULL && uiop->uio_resid > 0) {
28490Sstevel@tonic-gate 		ssize_t n;
28500Sstevel@tonic-gate 
28510Sstevel@tonic-gate 		if (DB_TYPE(nmp) == M_DATA) {
28520Sstevel@tonic-gate 			/*
28530Sstevel@tonic-gate 			 * We have some data, uiomove up to resid bytes.
28540Sstevel@tonic-gate 			 */
28550Sstevel@tonic-gate 			n = MIN(MBLKL(nmp), uiop->uio_resid);
28560Sstevel@tonic-gate 			if (n > 0)
28570Sstevel@tonic-gate 				error = uiomove(nmp->b_rptr, n, UIO_READ, uiop);
28580Sstevel@tonic-gate 			nmp->b_rptr += n;
28590Sstevel@tonic-gate 			if (nmp->b_rptr == nmp->b_wptr) {
28600Sstevel@tonic-gate 				pmp = nmp;
28610Sstevel@tonic-gate 				nmp = nmp->b_cont;
28620Sstevel@tonic-gate 			}
28631974Sbrutus 			if (error)
28641974Sbrutus 				break;
28650Sstevel@tonic-gate 		} else {
28660Sstevel@tonic-gate 			/*
28670Sstevel@tonic-gate 			 * We only handle data, save for caller to handle.
28680Sstevel@tonic-gate 			 */
28690Sstevel@tonic-gate 			if (pmp != NULL) {
28700Sstevel@tonic-gate 				pmp->b_cont = nmp->b_cont;
28710Sstevel@tonic-gate 			}
28720Sstevel@tonic-gate 			nmp->b_cont = NULL;
28730Sstevel@tonic-gate 			if (*rmp == NULL) {
28740Sstevel@tonic-gate 				*rmp = nmp;
28750Sstevel@tonic-gate 			} else {
28761974Sbrutus 				tmp->b_cont = nmp;
28770Sstevel@tonic-gate 			}
28780Sstevel@tonic-gate 			nmp = nmp->b_cont;
28790Sstevel@tonic-gate 			tmp = nmp;
28800Sstevel@tonic-gate 		}
28810Sstevel@tonic-gate 	}
28820Sstevel@tonic-gate 	if (pmp != NULL) {
28830Sstevel@tonic-gate 		/* Free any mblk_t(s) which we have consumed */
28840Sstevel@tonic-gate 		pmp->b_cont = NULL;
28850Sstevel@tonic-gate 		freemsg(so->so_nl7c_rcv_mp);
28860Sstevel@tonic-gate 	}
28870Sstevel@tonic-gate 	if ((so->so_nl7c_rcv_mp = nmp) == NULL) {
28881974Sbrutus 		/* Last mblk_t so return the saved kstrgetmsg() rval/error */
28891974Sbrutus 		if (error == 0) {
28901974Sbrutus 			rval_t	*p = (rval_t *)&so->so_nl7c_rcv_rval;
28911974Sbrutus 
28921974Sbrutus 			error = p->r_v.r_v2;
28931974Sbrutus 			p->r_v.r_v2 = 0;
28941974Sbrutus 		}
28950Sstevel@tonic-gate 		rp->r_vals = so->so_nl7c_rcv_rval;
28960Sstevel@tonic-gate 		so->so_nl7c_rcv_rval = 0;
28970Sstevel@tonic-gate 	} else {
28980Sstevel@tonic-gate 		/* More mblk_t(s) to process so no rval to return */
28990Sstevel@tonic-gate 		rp->r_vals = 0;
29000Sstevel@tonic-gate 	}
29010Sstevel@tonic-gate 	return (error);
29020Sstevel@tonic-gate }
29030Sstevel@tonic-gate 
29040Sstevel@tonic-gate /*
29050Sstevel@tonic-gate  * Receive the next message on the queue.
29060Sstevel@tonic-gate  * If msg_controllen is non-zero when called the caller is interested in
29070Sstevel@tonic-gate  * any received control info (options).
29080Sstevel@tonic-gate  * If msg_namelen is non-zero when called the caller is interested in
29090Sstevel@tonic-gate  * any received source address.
29100Sstevel@tonic-gate  * The routine returns with msg_control and msg_name pointing to
29110Sstevel@tonic-gate  * kmem_alloc'ed memory which the caller has to free.
29120Sstevel@tonic-gate  */
29130Sstevel@tonic-gate int
29140Sstevel@tonic-gate sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
29150Sstevel@tonic-gate {
29160Sstevel@tonic-gate 	union T_primitives	*tpr;
29170Sstevel@tonic-gate 	mblk_t			*mp;
29180Sstevel@tonic-gate 	uchar_t			pri;
29190Sstevel@tonic-gate 	int			pflag, opflag;
29200Sstevel@tonic-gate 	void			*control;
29210Sstevel@tonic-gate 	t_uscalar_t		controllen;
29220Sstevel@tonic-gate 	t_uscalar_t		namelen;
29230Sstevel@tonic-gate 	int			so_state = so->so_state; /* Snapshot */
29240Sstevel@tonic-gate 	ssize_t			saved_resid;
29250Sstevel@tonic-gate 	rval_t			rval;
29260Sstevel@tonic-gate 	int			flags;
29270Sstevel@tonic-gate 	clock_t			timout;
29280Sstevel@tonic-gate 	int			first;
29296707Sbrutus 	int			error = 0;
29306707Sbrutus 	struct uio		*suiop = NULL;
29316707Sbrutus 	sodirect_t		*sodp = so->so_direct;
29320Sstevel@tonic-gate 
29330Sstevel@tonic-gate 	flags = msg->msg_flags;
29340Sstevel@tonic-gate 	msg->msg_flags = 0;
29350Sstevel@tonic-gate 
29360Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
29377240Srh87107 	    (void *)so, (void *)msg, flags,
29385240Snordmark 	    pr_state(so->so_state, so->so_mode), so->so_error));
29390Sstevel@tonic-gate 
29400Sstevel@tonic-gate 	/*
29410Sstevel@tonic-gate 	 * If we are not connected because we have never been connected
29420Sstevel@tonic-gate 	 * we return ENOTCONN. If we have been connected (but are no longer
29430Sstevel@tonic-gate 	 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
29440Sstevel@tonic-gate 	 * the EOF.
29450Sstevel@tonic-gate 	 *
29460Sstevel@tonic-gate 	 * An alternative would be to post an ENOTCONN error in stream head
29470Sstevel@tonic-gate 	 * (read+write) and clear it when we're connected. However, that error
29480Sstevel@tonic-gate 	 * would cause incorrect poll/select behavior!
29490Sstevel@tonic-gate 	 */
29500Sstevel@tonic-gate 	if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
29510Sstevel@tonic-gate 	    (so->so_mode & SM_CONNREQUIRED)) {
29520Sstevel@tonic-gate 		return (ENOTCONN);
29530Sstevel@tonic-gate 	}
29540Sstevel@tonic-gate 
29550Sstevel@tonic-gate 	/*
29560Sstevel@tonic-gate 	 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
29570Sstevel@tonic-gate 	 * after checking that the read queue is empty) and returns zero.
29580Sstevel@tonic-gate 	 * This implementation will sleep (in kstrgetmsg) even if uio_resid
29590Sstevel@tonic-gate 	 * is zero.
29600Sstevel@tonic-gate 	 */
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	if (flags & MSG_OOB) {
29630Sstevel@tonic-gate 		/* Check that the transport supports OOB */
29640Sstevel@tonic-gate 		if (!(so->so_mode & SM_EXDATA))
29650Sstevel@tonic-gate 			return (EOPNOTSUPP);
29660Sstevel@tonic-gate 		return (sorecvoob(so, msg, uiop, flags));
29670Sstevel@tonic-gate 	}
29680Sstevel@tonic-gate 
29690Sstevel@tonic-gate 	/*
29700Sstevel@tonic-gate 	 * Set msg_controllen and msg_namelen to zero here to make it
29710Sstevel@tonic-gate 	 * simpler in the cases that no control or name is returned.
29720Sstevel@tonic-gate 	 */
29730Sstevel@tonic-gate 	controllen = msg->msg_controllen;
29740Sstevel@tonic-gate 	namelen = msg->msg_namelen;
29750Sstevel@tonic-gate 	msg->msg_controllen = 0;
29760Sstevel@tonic-gate 	msg->msg_namelen = 0;
29770Sstevel@tonic-gate 
29780Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
29795240Snordmark 	    namelen, controllen));
29800Sstevel@tonic-gate 
29811974Sbrutus 	mutex_enter(&so->so_lock);
29820Sstevel@tonic-gate 	/*
29830Sstevel@tonic-gate 	 * If an NL7C enabled socket and not waiting for write data.
29840Sstevel@tonic-gate 	 */
29851974Sbrutus 	if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
29860Sstevel@tonic-gate 	    NL7C_ENABLED) {
29870Sstevel@tonic-gate 		if (so->so_nl7c_uri) {
29881974Sbrutus 			/* Close uri processing for a previous request */
29890Sstevel@tonic-gate 			nl7c_close(so);
29900Sstevel@tonic-gate 		}
29911974Sbrutus 		if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) {
29921974Sbrutus 			/* Nothing to process, EOF */
29931974Sbrutus 			mutex_exit(&so->so_lock);
29941974Sbrutus 			return (0);
29951974Sbrutus 		} else if (so->so_nl7c_flags & NL7C_SOPERSIST) {
29961974Sbrutus 			/* Persistent NL7C socket, try to process request */
29971974Sbrutus 			boolean_t ret;
29981974Sbrutus 
29991974Sbrutus 			ret = nl7c_process(so,
30001974Sbrutus 			    (so->so_state & (SS_NONBLOCK|SS_NDELAY)));
30011974Sbrutus 			rval.r_vals = so->so_nl7c_rcv_rval;
30021974Sbrutus 			error = rval.r_v.r_v2;
30031974Sbrutus 			if (error) {
30041974Sbrutus 				/* Error of some sort, return it */
30051974Sbrutus 				mutex_exit(&so->so_lock);
30061974Sbrutus 				return (error);
30071974Sbrutus 			}
30081974Sbrutus 			if (so->so_nl7c_flags &&
30091974Sbrutus 			    ! (so->so_nl7c_flags & NL7C_WAITWRITE)) {
30101974Sbrutus 				/*
30111974Sbrutus 				 * Still an NL7C socket and no data
30121974Sbrutus 				 * to pass up to the caller.
30131974Sbrutus 				 */
30141974Sbrutus 				mutex_exit(&so->so_lock);
30151974Sbrutus 				if (ret) {
30161974Sbrutus 					/* EOF */
30171974Sbrutus 					return (0);
30181974Sbrutus 				} else {
30191974Sbrutus 					/* Need more data */
30201974Sbrutus 					return (EAGAIN);
30211974Sbrutus 				}
30221974Sbrutus 			}
30231974Sbrutus 		} else {
30240Sstevel@tonic-gate 			/*
30251974Sbrutus 			 * Not persistent so no further NL7C processing.
30260Sstevel@tonic-gate 			 */
30270Sstevel@tonic-gate 			so->so_nl7c_flags = 0;
30280Sstevel@tonic-gate 		}
30290Sstevel@tonic-gate 	}
30300Sstevel@tonic-gate 	/*
30310Sstevel@tonic-gate 	 * Only one reader is allowed at any given time. This is needed
30320Sstevel@tonic-gate 	 * for T_EXDATA handling and, in the future, MSG_WAITALL.
30330Sstevel@tonic-gate 	 *
30340Sstevel@tonic-gate 	 * This is slightly different that BSD behavior in that it fails with
30350Sstevel@tonic-gate 	 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
30360Sstevel@tonic-gate 	 * is single-threaded using sblock(), which is dropped while waiting
30370Sstevel@tonic-gate 	 * for data to appear. The difference shows up e.g. if one
30380Sstevel@tonic-gate 	 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
30390Sstevel@tonic-gate 	 * does use nonblocking io and different threads are reading each
30400Sstevel@tonic-gate 	 * file descriptor. In BSD there would never be an EWOULDBLOCK error
30410Sstevel@tonic-gate 	 * in this case as long as the read queue doesn't get empty.
30420Sstevel@tonic-gate 	 * In this implementation the thread using nonblocking io can
30430Sstevel@tonic-gate 	 * get an EWOULDBLOCK error due to the blocking thread executing
30440Sstevel@tonic-gate 	 * e.g. in the uiomove in kstrgetmsg.
30450Sstevel@tonic-gate 	 * This difference is not believed to be significant.
30460Sstevel@tonic-gate 	 */
30473749Sethindra 	/* Set SOREADLOCKED */
30483749Sethindra 	error = so_lock_read_intr(so,
30493749Sethindra 	    uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
30500Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
30510Sstevel@tonic-gate 	if (error)
30520Sstevel@tonic-gate 		return (error);
30530Sstevel@tonic-gate 
30540Sstevel@tonic-gate 	/*
30550Sstevel@tonic-gate 	 * Tell kstrgetmsg to not inspect the stream head errors until all
30560Sstevel@tonic-gate 	 * queued data has been consumed.
30570Sstevel@tonic-gate 	 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
30580Sstevel@tonic-gate 	 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
30590Sstevel@tonic-gate 	 *
30600Sstevel@tonic-gate 	 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
30610Sstevel@tonic-gate 	 * to T_OPTDATA_IND that do not contain any user-visible control msg.
30620Sstevel@tonic-gate 	 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
30630Sstevel@tonic-gate 	 */
30640Sstevel@tonic-gate 	pflag = MSG_ANY | MSG_DELAYERROR;
30650Sstevel@tonic-gate 	if (flags & MSG_PEEK) {
30660Sstevel@tonic-gate 		pflag |= MSG_IPEEK;
30670Sstevel@tonic-gate 		flags &= ~MSG_WAITALL;
30680Sstevel@tonic-gate 	}
30690Sstevel@tonic-gate 	if (so->so_mode & SM_ATOMIC)
30700Sstevel@tonic-gate 		pflag |= MSG_DISCARDTAIL;
30710Sstevel@tonic-gate 
30720Sstevel@tonic-gate 	if (flags & MSG_DONTWAIT)
30730Sstevel@tonic-gate 		timout = 0;
30740Sstevel@tonic-gate 	else
30750Sstevel@tonic-gate 		timout = -1;
30760Sstevel@tonic-gate 	opflag = pflag;
30770Sstevel@tonic-gate 	first = 1;
30780Sstevel@tonic-gate 
30796707Sbrutus 	if (uiop->uio_resid >= uioasync.mincnt &&
30806707Sbrutus 	    sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
30816707Sbrutus 	    uioasync.enabled && !(flags & MSG_PEEK) &&
30826707Sbrutus 	    !(so_state & SS_CANTRCVMORE)) {
30836707Sbrutus 		/*
30846707Sbrutus 		 * Big enough I/O for uioa min setup and an sodirect socket
30856707Sbrutus 		 * and sodirect enabled and uioa enabled and I/O will be done
30866707Sbrutus 		 * and not EOF so initialize the sodirect_t uioa_t with "uiop".
30876707Sbrutus 		 */
3088*7660SEric.Yu@Sun.COM 		mutex_enter(sodp->sod_lockp);
30896707Sbrutus 		if (!uioainit(uiop, &sodp->sod_uioa)) {
30906707Sbrutus 			/*
30916707Sbrutus 			 * Successful uioainit() so the uio_t part of the
30926707Sbrutus 			 * uioa_t will be used for all uio_t work to follow,
30936707Sbrutus 			 * we save the original "uiop" in "suiop".
30946707Sbrutus 			 */
30956707Sbrutus 			suiop = uiop;
30966707Sbrutus 			uiop = (uio_t *)&sodp->sod_uioa;
30976707Sbrutus 			/*
30986707Sbrutus 			 * Before returning to the caller the passed in uio_t
30996707Sbrutus 			 * "uiop" will be updated via a call to uioafini()
31006707Sbrutus 			 * below.
31016707Sbrutus 			 *
31026707Sbrutus 			 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
31036707Sbrutus 			 * here as first we have to uioamove() any currently
31046707Sbrutus 			 * queued M_DATA mblk_t(s) so it will be done in
31056707Sbrutus 			 * kstrgetmsg().
31066707Sbrutus 			 */
31076707Sbrutus 		}
31086707Sbrutus 		/*
31096707Sbrutus 		 * In either uioainit() success or not case note the number
31106707Sbrutus 		 * of uio bytes the caller wants for sod framework and/or
31116707Sbrutus 		 * transport (e.g. TCP) strategy.
31126707Sbrutus 		 */
31136707Sbrutus 		sodp->sod_want = uiop->uio_resid;
3114*7660SEric.Yu@Sun.COM 		mutex_exit(sodp->sod_lockp);
31156707Sbrutus 	} else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
31166707Sbrutus 		/*
31176707Sbrutus 		 * No uioa but still using sodirect so note the number of
31186707Sbrutus 		 * uio bytes the caller wants for sodirect framework and/or
31196707Sbrutus 		 * transport (e.g. TCP) strategy.
31206707Sbrutus 		 *
3121*7660SEric.Yu@Sun.COM 		 * Note, sod_lockp not held, only writer is in this function
31226707Sbrutus 		 * and only one thread at a time so not needed just to init.
31236707Sbrutus 		 */
31246707Sbrutus 		sodp->sod_want = uiop->uio_resid;
31256707Sbrutus 	}
31260Sstevel@tonic-gate retry:
31270Sstevel@tonic-gate 	saved_resid = uiop->uio_resid;
31280Sstevel@tonic-gate 	pri = 0;
31290Sstevel@tonic-gate 	mp = NULL;
31300Sstevel@tonic-gate 	if (so->so_nl7c_rcv_mp != NULL) {
31311974Sbrutus 		/* Already kstrgetmsg()ed saved mblk(s) from NL7C */
31320Sstevel@tonic-gate 		error = nl7c_sorecv(so, &mp, uiop, &rval);
31330Sstevel@tonic-gate 	} else {
31340Sstevel@tonic-gate 		error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
31350Sstevel@tonic-gate 		    timout, &rval);
31360Sstevel@tonic-gate 	}
31370Sstevel@tonic-gate 	if (error) {
31380Sstevel@tonic-gate 		switch (error) {
31390Sstevel@tonic-gate 		case EINTR:
31400Sstevel@tonic-gate 		case EWOULDBLOCK:
31410Sstevel@tonic-gate 			if (!first)
31420Sstevel@tonic-gate 				error = 0;
31430Sstevel@tonic-gate 			break;
31440Sstevel@tonic-gate 		case ETIME:
31450Sstevel@tonic-gate 			/* Returned from kstrgetmsg when timeout expires */
31460Sstevel@tonic-gate 			if (!first)
31470Sstevel@tonic-gate 				error = 0;
31480Sstevel@tonic-gate 			else
31490Sstevel@tonic-gate 				error = EWOULDBLOCK;
31500Sstevel@tonic-gate 			break;
31510Sstevel@tonic-gate 		default:
31520Sstevel@tonic-gate 			eprintsoline(so, error);
31530Sstevel@tonic-gate 			break;
31540Sstevel@tonic-gate 		}
31556707Sbrutus 		goto out;
31560Sstevel@tonic-gate 	}
31570Sstevel@tonic-gate 	/*
31580Sstevel@tonic-gate 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
31590Sstevel@tonic-gate 	 * For non-datagrams MOREDATA is used to set MSG_EOR.
31600Sstevel@tonic-gate 	 */
31610Sstevel@tonic-gate 	ASSERT(!(rval.r_val1 & MORECTL));
31620Sstevel@tonic-gate 	if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
31630Sstevel@tonic-gate 		msg->msg_flags |= MSG_TRUNC;
31640Sstevel@tonic-gate 
31650Sstevel@tonic-gate 	if (mp == NULL) {
31660Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n"));
31670Sstevel@tonic-gate 		/*
31680Sstevel@tonic-gate 		 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
31690Sstevel@tonic-gate 		 * The draft Posix socket spec states that the mark should
31700Sstevel@tonic-gate 		 * not be cleared when peeking. We follow the latter.
31710Sstevel@tonic-gate 		 */
31720Sstevel@tonic-gate 		if ((so->so_state &
31730Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
31740Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
31750Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
31760Sstevel@tonic-gate 			sorecv_update_oobstate(so);
31770Sstevel@tonic-gate 		}
31780Sstevel@tonic-gate 
31790Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
31800Sstevel@tonic-gate 		/* Set MSG_EOR based on MOREDATA */
31810Sstevel@tonic-gate 		if (!(rval.r_val1 & MOREDATA)) {
31820Sstevel@tonic-gate 			if (so->so_state & SS_SAVEDEOR) {
31830Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
31840Sstevel@tonic-gate 				so->so_state &= ~SS_SAVEDEOR;
31850Sstevel@tonic-gate 			}
31860Sstevel@tonic-gate 		}
31870Sstevel@tonic-gate 		/*
31880Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
31890Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
31900Sstevel@tonic-gate 		 */
31910Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
31920Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
31930Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
31940Sstevel@tonic-gate 			first = 0;
31950Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
31960Sstevel@tonic-gate 			goto retry;
31970Sstevel@tonic-gate 		}
31986707Sbrutus 		goto out_locked;
31990Sstevel@tonic-gate 	}
32000Sstevel@tonic-gate 
32010Sstevel@tonic-gate 	/* strsock_proto has already verified length and alignment */
32020Sstevel@tonic-gate 	tpr = (union T_primitives *)mp->b_rptr;
32030Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type));
32040Sstevel@tonic-gate 
32050Sstevel@tonic-gate 	switch (tpr->type) {
32060Sstevel@tonic-gate 	case T_DATA_IND: {
32070Sstevel@tonic-gate 		if ((so->so_state &
32080Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
32090Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
32100Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
32110Sstevel@tonic-gate 			sorecv_update_oobstate(so);
32120Sstevel@tonic-gate 		}
32130Sstevel@tonic-gate 
32140Sstevel@tonic-gate 		/*
32150Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
32160Sstevel@tonic-gate 		 * MORE_flag and MOREDATA.
32170Sstevel@tonic-gate 		 */
32180Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
32190Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
32200Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
32210Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
32220Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
32230Sstevel@tonic-gate 			else
32240Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
32250Sstevel@tonic-gate 		}
32260Sstevel@tonic-gate 		freemsg(mp);
32270Sstevel@tonic-gate 		/*
32280Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
32290Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
32300Sstevel@tonic-gate 		 */
32310Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
32320Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
32330Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
32340Sstevel@tonic-gate 			first = 0;
32350Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
32360Sstevel@tonic-gate 			goto retry;
32370Sstevel@tonic-gate 		}
32386707Sbrutus 		goto out_locked;
32390Sstevel@tonic-gate 	}
32400Sstevel@tonic-gate 	case T_UNITDATA_IND: {
32410Sstevel@tonic-gate 		void *addr;
32420Sstevel@tonic-gate 		t_uscalar_t addrlen;
32430Sstevel@tonic-gate 		void *abuf;
32440Sstevel@tonic-gate 		t_uscalar_t optlen;
32450Sstevel@tonic-gate 		void *opt;
32460Sstevel@tonic-gate 
32470Sstevel@tonic-gate 		if ((so->so_state &
32480Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
32490Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
32500Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
32510Sstevel@tonic-gate 			sorecv_update_oobstate(so);
32520Sstevel@tonic-gate 		}
32530Sstevel@tonic-gate 
32540Sstevel@tonic-gate 		if (namelen != 0) {
32550Sstevel@tonic-gate 			/* Caller wants source address */
32560Sstevel@tonic-gate 			addrlen = tpr->unitdata_ind.SRC_length;
32570Sstevel@tonic-gate 			addr = sogetoff(mp,
32585240Snordmark 			    tpr->unitdata_ind.SRC_offset,
32595240Snordmark 			    addrlen, 1);
32600Sstevel@tonic-gate 			if (addr == NULL) {
32610Sstevel@tonic-gate 				freemsg(mp);
32620Sstevel@tonic-gate 				error = EPROTO;
32630Sstevel@tonic-gate 				eprintsoline(so, error);
32646707Sbrutus 				goto out;
32650Sstevel@tonic-gate 			}
32660Sstevel@tonic-gate 			if (so->so_family == AF_UNIX) {
32670Sstevel@tonic-gate 				/*
32680Sstevel@tonic-gate 				 * Can not use the transport level address.
32690Sstevel@tonic-gate 				 * If there is a SO_SRCADDR option carrying
32700Sstevel@tonic-gate 				 * the socket level address it will be
32710Sstevel@tonic-gate 				 * extracted below.
32720Sstevel@tonic-gate 				 */
32730Sstevel@tonic-gate 				addr = NULL;
32740Sstevel@tonic-gate 				addrlen = 0;
32750Sstevel@tonic-gate 			}
32760Sstevel@tonic-gate 		}
32770Sstevel@tonic-gate 		optlen = tpr->unitdata_ind.OPT_length;
32780Sstevel@tonic-gate 		if (optlen != 0) {
32790Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
32800Sstevel@tonic-gate 
32810Sstevel@tonic-gate 			/*
32820Sstevel@tonic-gate 			 * Extract any source address option.
32830Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
32840Sstevel@tonic-gate 			 */
32850Sstevel@tonic-gate 			opt = sogetoff(mp,
32865240Snordmark 			    tpr->unitdata_ind.OPT_offset,
32875240Snordmark 			    optlen, __TPI_ALIGN_SIZE);
32880Sstevel@tonic-gate 
32890Sstevel@tonic-gate 			if (opt == NULL) {
32900Sstevel@tonic-gate 				freemsg(mp);
32910Sstevel@tonic-gate 				error = EPROTO;
32920Sstevel@tonic-gate 				eprintsoline(so, error);
32936707Sbrutus 				goto out;
32940Sstevel@tonic-gate 			}
32950Sstevel@tonic-gate 			if (so->so_family == AF_UNIX)
32960Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
32970Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
32985240Snordmark 			    !(flags & MSG_XPG4_2));
32990Sstevel@tonic-gate 			if (controllen != 0)
33000Sstevel@tonic-gate 				controllen = ncontrollen;
33010Sstevel@tonic-gate 			else if (ncontrollen != 0)
33020Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
33030Sstevel@tonic-gate 		} else {
33040Sstevel@tonic-gate 			controllen = 0;
33050Sstevel@tonic-gate 		}
33060Sstevel@tonic-gate 
33070Sstevel@tonic-gate 		if (namelen != 0) {
33080Sstevel@tonic-gate 			/*
33090Sstevel@tonic-gate 			 * Return address to caller.
33100Sstevel@tonic-gate 			 * Caller handles truncation if length
33110Sstevel@tonic-gate 			 * exceeds msg_namelen.
33120Sstevel@tonic-gate 			 * NOTE: AF_UNIX NUL termination is ensured by
33130Sstevel@tonic-gate 			 * the sender's copyin_name().
33140Sstevel@tonic-gate 			 */
33150Sstevel@tonic-gate 			abuf = kmem_alloc(addrlen, KM_SLEEP);
33160Sstevel@tonic-gate 
33170Sstevel@tonic-gate 			bcopy(addr, abuf, addrlen);
33180Sstevel@tonic-gate 			msg->msg_name = abuf;
33190Sstevel@tonic-gate 			msg->msg_namelen = addrlen;
33200Sstevel@tonic-gate 		}
33210Sstevel@tonic-gate 
33220Sstevel@tonic-gate 		if (controllen != 0) {
33230Sstevel@tonic-gate 			/*
33240Sstevel@tonic-gate 			 * Return control msg to caller.
33250Sstevel@tonic-gate 			 * Caller handles truncation if length
33260Sstevel@tonic-gate 			 * exceeds msg_controllen.
33270Sstevel@tonic-gate 			 */
33284420Samehta 			control = kmem_zalloc(controllen, KM_SLEEP);
33290Sstevel@tonic-gate 
33300Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
33315240Snordmark 			    !(flags & MSG_XPG4_2),
33325240Snordmark 			    control, controllen);
33330Sstevel@tonic-gate 			if (error) {
33340Sstevel@tonic-gate 				freemsg(mp);
33350Sstevel@tonic-gate 				if (msg->msg_namelen != 0)
33360Sstevel@tonic-gate 					kmem_free(msg->msg_name,
33375240Snordmark 					    msg->msg_namelen);
33380Sstevel@tonic-gate 				kmem_free(control, controllen);
33390Sstevel@tonic-gate 				eprintsoline(so, error);
33406707Sbrutus 				goto out;
33410Sstevel@tonic-gate 			}
33420Sstevel@tonic-gate 			msg->msg_control = control;
33430Sstevel@tonic-gate 			msg->msg_controllen = controllen;
33440Sstevel@tonic-gate 		}
33450Sstevel@tonic-gate 
33460Sstevel@tonic-gate 		freemsg(mp);
33476707Sbrutus 		goto out;
33480Sstevel@tonic-gate 	}
33490Sstevel@tonic-gate 	case T_OPTDATA_IND: {
33500Sstevel@tonic-gate 		struct T_optdata_req *tdr;
33510Sstevel@tonic-gate 		void *opt;
33520Sstevel@tonic-gate 		t_uscalar_t optlen;
33530Sstevel@tonic-gate 
33540Sstevel@tonic-gate 		if ((so->so_state &
33550Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
33560Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
33570Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
33580Sstevel@tonic-gate 			sorecv_update_oobstate(so);
33590Sstevel@tonic-gate 		}
33600Sstevel@tonic-gate 
33610Sstevel@tonic-gate 		tdr = (struct T_optdata_req *)mp->b_rptr;
33620Sstevel@tonic-gate 		optlen = tdr->OPT_length;
33630Sstevel@tonic-gate 		if (optlen != 0) {
33640Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
33650Sstevel@tonic-gate 			/*
33660Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
33670Sstevel@tonic-gate 			 */
33680Sstevel@tonic-gate 			opt = sogetoff(mp,
33695240Snordmark 			    tpr->optdata_ind.OPT_offset,
33705240Snordmark 			    optlen, __TPI_ALIGN_SIZE);
33710Sstevel@tonic-gate 
33720Sstevel@tonic-gate 			if (opt == NULL) {
33730Sstevel@tonic-gate 				freemsg(mp);
33740Sstevel@tonic-gate 				error = EPROTO;
33750Sstevel@tonic-gate 				eprintsoline(so, error);
33766707Sbrutus 				goto out;
33770Sstevel@tonic-gate 			}
33780Sstevel@tonic-gate 
33790Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
33805240Snordmark 			    !(flags & MSG_XPG4_2));
33810Sstevel@tonic-gate 			if (controllen != 0)
33820Sstevel@tonic-gate 				controllen = ncontrollen;
33830Sstevel@tonic-gate 			else if (ncontrollen != 0)
33840Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
33850Sstevel@tonic-gate 		} else {
33860Sstevel@tonic-gate 			controllen = 0;
33870Sstevel@tonic-gate 		}
33880Sstevel@tonic-gate 
33890Sstevel@tonic-gate 		if (controllen != 0) {
33900Sstevel@tonic-gate 			/*
33910Sstevel@tonic-gate 			 * Return control msg to caller.
33920Sstevel@tonic-gate 			 * Caller handles truncation if length
33930Sstevel@tonic-gate 			 * exceeds msg_controllen.
33940Sstevel@tonic-gate 			 */
33954420Samehta 			control = kmem_zalloc(controllen, KM_SLEEP);
33960Sstevel@tonic-gate 
33970Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
33985240Snordmark 			    !(flags & MSG_XPG4_2),
33995240Snordmark 			    control, controllen);
34000Sstevel@tonic-gate 			if (error) {
34010Sstevel@tonic-gate 				freemsg(mp);
34020Sstevel@tonic-gate 				kmem_free(control, controllen);
34030Sstevel@tonic-gate 				eprintsoline(so, error);
34046707Sbrutus 				goto out;
34050Sstevel@tonic-gate 			}
34060Sstevel@tonic-gate 			msg->msg_control = control;
34070Sstevel@tonic-gate 			msg->msg_controllen = controllen;
34080Sstevel@tonic-gate 		}
34090Sstevel@tonic-gate 
34100Sstevel@tonic-gate 		/*
34110Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
34120Sstevel@tonic-gate 		 * DATA_flag and MOREDATA.
34130Sstevel@tonic-gate 		 */
34140Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
34150Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
34160Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
34170Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
34180Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
34190Sstevel@tonic-gate 			else
34200Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
34210Sstevel@tonic-gate 		}
34220Sstevel@tonic-gate 		freemsg(mp);
34230Sstevel@tonic-gate 		/*
34240Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
34250Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
34260Sstevel@tonic-gate 		 * Not possible to wait if control info was received.
34270Sstevel@tonic-gate 		 */
34280Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
34290Sstevel@tonic-gate 		    controllen == 0 &&
34300Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
34310Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
34320Sstevel@tonic-gate 			first = 0;
34330Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
34340Sstevel@tonic-gate 			goto retry;
34350Sstevel@tonic-gate 		}
34366707Sbrutus 		goto out_locked;
34370Sstevel@tonic-gate 	}
34380Sstevel@tonic-gate 	case T_EXDATA_IND: {
34390Sstevel@tonic-gate 		dprintso(so, 1,
34405240Snordmark 		    ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
34415240Snordmark 		    "state %s\n",
34425240Snordmark 		    so->so_oobsigcnt, so->so_oobcnt,
34435240Snordmark 		    saved_resid - uiop->uio_resid,
34445240Snordmark 		    pr_state(so->so_state, so->so_mode)));
34450Sstevel@tonic-gate 		/*
34460Sstevel@tonic-gate 		 * kstrgetmsg handles MSGMARK so there is nothing to
34470Sstevel@tonic-gate 		 * inspect in the T_EXDATA_IND.
34480Sstevel@tonic-gate 		 * strsock_proto makes the stream head queue the T_EXDATA_IND
34490Sstevel@tonic-gate 		 * as a separate message with no M_DATA component. Furthermore,
34500Sstevel@tonic-gate 		 * the stream head does not consolidate M_DATA messages onto
34510Sstevel@tonic-gate 		 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
34520Sstevel@tonic-gate 		 * remains a message by itself. This is needed since MSGMARK
34530Sstevel@tonic-gate 		 * marks both the whole message as well as the last byte
34540Sstevel@tonic-gate 		 * of the message.
34550Sstevel@tonic-gate 		 */
34560Sstevel@tonic-gate 		freemsg(mp);
34570Sstevel@tonic-gate 		ASSERT(uiop->uio_resid == saved_resid);	/* No data */
34580Sstevel@tonic-gate 		if (flags & MSG_PEEK) {
34590Sstevel@tonic-gate 			/*
34600Sstevel@tonic-gate 			 * Even though we are peeking we consume the
34610Sstevel@tonic-gate 			 * T_EXDATA_IND thereby moving the mark information
34620Sstevel@tonic-gate 			 * to SS_RCVATMARK. Then the oob code below will
34630Sstevel@tonic-gate 			 * retry the peeking kstrgetmsg.
34640Sstevel@tonic-gate 			 * Note that the stream head read queue is
34650Sstevel@tonic-gate 			 * never flushed without holding SOREADLOCKED
34660Sstevel@tonic-gate 			 * thus the T_EXDATA_IND can not disappear
34670Sstevel@tonic-gate 			 * underneath us.
34680Sstevel@tonic-gate 			 */
34690Sstevel@tonic-gate 			dprintso(so, 1,
34705240Snordmark 			    ("sotpi_recvmsg: consume EXDATA_IND "
34715240Snordmark 			    "counts %d/%d state %s\n",
34725240Snordmark 			    so->so_oobsigcnt,
34735240Snordmark 			    so->so_oobcnt,
34745240Snordmark 			    pr_state(so->so_state, so->so_mode)));
34750Sstevel@tonic-gate 
34760Sstevel@tonic-gate 			pflag = MSG_ANY | MSG_DELAYERROR;
34770Sstevel@tonic-gate 			if (so->so_mode & SM_ATOMIC)
34780Sstevel@tonic-gate 				pflag |= MSG_DISCARDTAIL;
34790Sstevel@tonic-gate 
34800Sstevel@tonic-gate 			pri = 0;
34810Sstevel@tonic-gate 			mp = NULL;
34820Sstevel@tonic-gate 
34830Sstevel@tonic-gate 			error = kstrgetmsg(SOTOV(so), &mp, uiop,
34845240Snordmark 			    &pri, &pflag, (clock_t)-1, &rval);
34850Sstevel@tonic-gate 			ASSERT(uiop->uio_resid == saved_resid);
34860Sstevel@tonic-gate 
34870Sstevel@tonic-gate 			if (error) {
34880Sstevel@tonic-gate #ifdef SOCK_DEBUG
34890Sstevel@tonic-gate 				if (error != EWOULDBLOCK && error != EINTR) {
34900Sstevel@tonic-gate 					eprintsoline(so, error);
34910Sstevel@tonic-gate 				}
34920Sstevel@tonic-gate #endif /* SOCK_DEBUG */
34936707Sbrutus 				goto out;
34940Sstevel@tonic-gate 			}
34950Sstevel@tonic-gate 			ASSERT(mp);
34960Sstevel@tonic-gate 			tpr = (union T_primitives *)mp->b_rptr;
34970Sstevel@tonic-gate 			ASSERT(tpr->type == T_EXDATA_IND);
34980Sstevel@tonic-gate 			freemsg(mp);
34990Sstevel@tonic-gate 		} /* end "if (flags & MSG_PEEK)" */
35000Sstevel@tonic-gate 
35010Sstevel@tonic-gate 		/*
35020Sstevel@tonic-gate 		 * Decrement the number of queued and pending oob.
35030Sstevel@tonic-gate 		 *
35040Sstevel@tonic-gate 		 * SS_RCVATMARK is cleared when we read past a mark.
35050Sstevel@tonic-gate 		 * SS_HAVEOOBDATA is cleared when we've read past the
35060Sstevel@tonic-gate 		 * last mark.
35070Sstevel@tonic-gate 		 * SS_OOBPEND is cleared if we've read past the last
35080Sstevel@tonic-gate 		 * mark and no (new) SIGURG has been posted.
35090Sstevel@tonic-gate 		 */
35100Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
35110Sstevel@tonic-gate 		ASSERT(so_verify_oobstate(so));
35120Sstevel@tonic-gate 		ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
35130Sstevel@tonic-gate 		ASSERT(so->so_oobsigcnt > 0);
35140Sstevel@tonic-gate 		so->so_oobsigcnt--;
35150Sstevel@tonic-gate 		ASSERT(so->so_oobcnt > 0);
35160Sstevel@tonic-gate 		so->so_oobcnt--;
35170Sstevel@tonic-gate 		/*
35180Sstevel@tonic-gate 		 * Since the T_EXDATA_IND has been removed from the stream
35190Sstevel@tonic-gate 		 * head, but we have not read data past the mark,
35200Sstevel@tonic-gate 		 * sockfs needs to track that the socket is still at the mark.
35210Sstevel@tonic-gate 		 *
35220Sstevel@tonic-gate 		 * Since no data was received call kstrgetmsg again to wait
35230Sstevel@tonic-gate 		 * for data.
35240Sstevel@tonic-gate 		 */
35250Sstevel@tonic-gate 		so->so_state |= SS_RCVATMARK;
35260Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
35270Sstevel@tonic-gate 		dprintso(so, 1,
35280Sstevel@tonic-gate 		    ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
35290Sstevel@tonic-gate 		    so->so_oobsigcnt, so->so_oobcnt,
35300Sstevel@tonic-gate 		    pr_state(so->so_state, so->so_mode)));
35310Sstevel@tonic-gate 		pflag = opflag;
35320Sstevel@tonic-gate 		goto retry;
35330Sstevel@tonic-gate 	}
35340Sstevel@tonic-gate 	default:
35350Sstevel@tonic-gate 		ASSERT(0);
35360Sstevel@tonic-gate 		freemsg(mp);
35370Sstevel@tonic-gate 		error = EPROTO;
35380Sstevel@tonic-gate 		eprintsoline(so, error);
35396707Sbrutus 		goto out;
35400Sstevel@tonic-gate 	}
35410Sstevel@tonic-gate 	/* NOTREACHED */
35426707Sbrutus out:
35430Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
35446707Sbrutus out_locked:
35456707Sbrutus 	if (sodp != NULL) {
35466707Sbrutus 		/* Finish any sodirect and uioa processing */
3547*7660SEric.Yu@Sun.COM 		mutex_enter(sodp->sod_lockp);
35486707Sbrutus 		if (suiop != NULL) {
35496707Sbrutus 			/* Finish any uioa_t processing */
35506707Sbrutus 			int ret;
35516707Sbrutus 
35526707Sbrutus 			ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
35536707Sbrutus 			ret = uioafini(suiop, (uioa_t *)uiop);
35546707Sbrutus 			if (error == 0 && ret != 0) {
35556707Sbrutus 				/* If no error yet, set it */
35566707Sbrutus 				error = ret;
35576707Sbrutus 			}
35586707Sbrutus 			if ((mp = sodp->sod_uioafh) != NULL) {
35596707Sbrutus 				sodp->sod_uioafh = NULL;
35606707Sbrutus 				sodp->sod_uioaft = NULL;
35616707Sbrutus 				freemsg(mp);
35626707Sbrutus 			}
35636707Sbrutus 		}
3564*7660SEric.Yu@Sun.COM 		ASSERT(sodp->sod_uioafh == NULL);
35656707Sbrutus 		if (!(sodp->sod_state & SOD_WAKE_NOT)) {
35666707Sbrutus 			/* Awoke */
35676707Sbrutus 			sodp->sod_state &= SOD_WAKE_CLR;
35686707Sbrutus 			sodp->sod_state |= SOD_WAKE_NOT;
35696707Sbrutus 		}
35706707Sbrutus 		/* Last, clear sod_want value */
35716707Sbrutus 		sodp->sod_want = 0;
3572*7660SEric.Yu@Sun.COM 		mutex_exit(sodp->sod_lockp);
35736707Sbrutus 	}
35740Sstevel@tonic-gate 	so_unlock_read(so);	/* Clear SOREADLOCKED */
35750Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
35760Sstevel@tonic-gate 	return (error);
35770Sstevel@tonic-gate }
35780Sstevel@tonic-gate 
35790Sstevel@tonic-gate /*
35800Sstevel@tonic-gate  * Sending data with options on a datagram socket.
35810Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
35820Sstevel@tonic-gate  */
35830Sstevel@tonic-gate static int
3584741Smasputra sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
3585741Smasputra     struct uio *uiop, void *control, t_uscalar_t controllen, int flags)
35860Sstevel@tonic-gate {
35870Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
35880Sstevel@tonic-gate 	mblk_t			*mp;
35890Sstevel@tonic-gate 	int			error;
35900Sstevel@tonic-gate 	void			*addr;
35910Sstevel@tonic-gate 	socklen_t		addrlen;
35920Sstevel@tonic-gate 	void			*src;
35930Sstevel@tonic-gate 	socklen_t		srclen;
35940Sstevel@tonic-gate 	ssize_t			len;
35950Sstevel@tonic-gate 	int			size;
35960Sstevel@tonic-gate 	struct T_opthdr		toh;
35970Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
35980Sstevel@tonic-gate 	t_uscalar_t		optlen;
35990Sstevel@tonic-gate 	void			*fds;
36000Sstevel@tonic-gate 	int			fdlen;
36010Sstevel@tonic-gate 
36020Sstevel@tonic-gate 	ASSERT(name && namelen);
36030Sstevel@tonic-gate 	ASSERT(control && controllen);
36040Sstevel@tonic-gate 
36050Sstevel@tonic-gate 	len = uiop->uio_resid;
36060Sstevel@tonic-gate 	if (len > (ssize_t)so->so_tidu_size) {
36070Sstevel@tonic-gate 		return (EMSGSIZE);
36080Sstevel@tonic-gate 	}
36090Sstevel@tonic-gate 
36100Sstevel@tonic-gate 	/*
36110Sstevel@tonic-gate 	 * For AF_UNIX the destination address is translated to an internal
36120Sstevel@tonic-gate 	 * name and the source address is passed as an option.
36130Sstevel@tonic-gate 	 * Also, file descriptors are passed as file pointers in an
36140Sstevel@tonic-gate 	 * option.
36150Sstevel@tonic-gate 	 */
36160Sstevel@tonic-gate 
36170Sstevel@tonic-gate 	/*
36180Sstevel@tonic-gate 	 * Length and family checks.
36190Sstevel@tonic-gate 	 */
36200Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
36210Sstevel@tonic-gate 	if (error) {
36220Sstevel@tonic-gate 		eprintsoline(so, error);
36230Sstevel@tonic-gate 		return (error);
36240Sstevel@tonic-gate 	}
36250Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
36260Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
36270Sstevel@tonic-gate 			/*
36280Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
36290Sstevel@tonic-gate 			 * pass any (transport internal) source address.
36300Sstevel@tonic-gate 			 */
36310Sstevel@tonic-gate 			addr = name;
36320Sstevel@tonic-gate 			addrlen = namelen;
36330Sstevel@tonic-gate 			src = NULL;
36340Sstevel@tonic-gate 			srclen = 0;
36350Sstevel@tonic-gate 		} else {
36360Sstevel@tonic-gate 			/*
36370Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
36380Sstevel@tonic-gate 			 * and translate the remote address.
36390Sstevel@tonic-gate 			 *
36400Sstevel@tonic-gate 			 * Note that this code does not prevent so_laddr_sa
36410Sstevel@tonic-gate 			 * from changing while it is being used. Thus
36420Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
36430Sstevel@tonic-gate 			 * send the peer might see a partially new and a
36440Sstevel@tonic-gate 			 * partially old "from" address.
36450Sstevel@tonic-gate 			 */
36460Sstevel@tonic-gate 			src = so->so_laddr_sa;
36470Sstevel@tonic-gate 			srclen = (t_uscalar_t)so->so_laddr_len;
36480Sstevel@tonic-gate 			dprintso(so, 1,
36490Sstevel@tonic-gate 			    ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
36500Sstevel@tonic-gate 			    srclen, src));
36510Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
36525240Snordmark 			    (flags & MSG_XPG4_2),
36535240Snordmark 			    &addr, &addrlen);
36540Sstevel@tonic-gate 			if (error) {
36550Sstevel@tonic-gate 				eprintsoline(so, error);
36560Sstevel@tonic-gate 				return (error);
36570Sstevel@tonic-gate 			}
36580Sstevel@tonic-gate 		}
36590Sstevel@tonic-gate 	} else {
36600Sstevel@tonic-gate 		addr = name;
36610Sstevel@tonic-gate 		addrlen = namelen;
36620Sstevel@tonic-gate 		src = NULL;
36630Sstevel@tonic-gate 		srclen = 0;
36640Sstevel@tonic-gate 	}
36650Sstevel@tonic-gate 	optlen = so_optlen(control, controllen,
36665240Snordmark 	    !(flags & MSG_XPG4_2));
36670Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
36680Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
36690Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
36700Sstevel@tonic-gate 	if (srclen != 0)
36710Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) +
36720Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(srclen));
36730Sstevel@tonic-gate 	else
36740Sstevel@tonic-gate 		tudr.OPT_length = optlen;
36750Sstevel@tonic-gate 	tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
36765240Snordmark 	    _TPI_ALIGN_TOPT(addrlen));
36770Sstevel@tonic-gate 
36780Sstevel@tonic-gate 	size = tudr.OPT_offset + tudr.OPT_length;
36790Sstevel@tonic-gate 
36800Sstevel@tonic-gate 	/*
36810Sstevel@tonic-gate 	 * File descriptors only when SM_FDPASSING set.
36820Sstevel@tonic-gate 	 */
36830Sstevel@tonic-gate 	error = so_getfdopt(control, controllen,
36845240Snordmark 	    !(flags & MSG_XPG4_2), &fds, &fdlen);
36850Sstevel@tonic-gate 	if (error)
36860Sstevel@tonic-gate 		return (error);
36870Sstevel@tonic-gate 	if (fdlen != -1) {
36880Sstevel@tonic-gate 		if (!(so->so_mode & SM_FDPASSING))
36890Sstevel@tonic-gate 			return (EOPNOTSUPP);
36900Sstevel@tonic-gate 
36910Sstevel@tonic-gate 		error = fdbuf_create(fds, fdlen, &fdbuf);
36920Sstevel@tonic-gate 		if (error)
36930Sstevel@tonic-gate 			return (error);
36940Sstevel@tonic-gate 		mp = fdbuf_allocmsg(size, fdbuf);
36950Sstevel@tonic-gate 	} else {
36960Sstevel@tonic-gate 		mp = soallocproto(size, _ALLOC_INTR);
3697455Smeem 		if (mp == NULL) {
3698455Smeem 			/*
3699455Smeem 			 * Caught a signal waiting for memory.
3700455Smeem 			 * Let send* return EINTR.
3701455Smeem 			 */
3702455Smeem 			return (EINTR);
3703455Smeem 		}
37040Sstevel@tonic-gate 	}
37050Sstevel@tonic-gate 	soappendmsg(mp, &tudr, sizeof (tudr));
37060Sstevel@tonic-gate 	soappendmsg(mp, addr, addrlen);
37070Sstevel@tonic-gate 	mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
37080Sstevel@tonic-gate 
37090Sstevel@tonic-gate 	if (fdlen != -1) {
37100Sstevel@tonic-gate 		ASSERT(fdbuf != NULL);
37110Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
37120Sstevel@tonic-gate 		toh.name = SO_FILEP;
37130Sstevel@tonic-gate 		toh.len = fdbuf->fd_size +
37145240Snordmark 		    (t_uscalar_t)sizeof (struct T_opthdr);
37150Sstevel@tonic-gate 		toh.status = 0;
37160Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
37170Sstevel@tonic-gate 		soappendmsg(mp, fdbuf, fdbuf->fd_size);
37180Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
37190Sstevel@tonic-gate 	}
37200Sstevel@tonic-gate 	if (srclen != 0) {
37210Sstevel@tonic-gate 		/*
37220Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
37230Sstevel@tonic-gate 		 * address option.
37240Sstevel@tonic-gate 		 */
37250Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
37260Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
37270Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
37280Sstevel@tonic-gate 		toh.status = 0;
37290Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
37300Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
37310Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
37320Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
37330Sstevel@tonic-gate 	}
37340Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
37350Sstevel@tonic-gate 	so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
37360Sstevel@tonic-gate 	/* At most 3 bytes left in the message */
37370Sstevel@tonic-gate 	ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE));
37380Sstevel@tonic-gate 	ASSERT(MBLKL(mp) <= (ssize_t)size);
37390Sstevel@tonic-gate 
37400Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
37410Sstevel@tonic-gate 	if (audit_active)
37420Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
37430Sstevel@tonic-gate 
37440Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
37450Sstevel@tonic-gate #ifdef SOCK_DEBUG
37460Sstevel@tonic-gate 	if (error) {
37470Sstevel@tonic-gate 		eprintsoline(so, error);
37480Sstevel@tonic-gate 	}
37490Sstevel@tonic-gate #endif /* SOCK_DEBUG */
37500Sstevel@tonic-gate 	return (error);
37510Sstevel@tonic-gate }
37520Sstevel@tonic-gate 
37530Sstevel@tonic-gate /*
37540Sstevel@tonic-gate  * Sending data with options on a connected stream socket.
37550Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
37560Sstevel@tonic-gate  */
37570Sstevel@tonic-gate static int
37580Sstevel@tonic-gate sosend_svccmsg(struct sonode *so,
37590Sstevel@tonic-gate 		struct uio *uiop,
37600Sstevel@tonic-gate 		int more,
37610Sstevel@tonic-gate 		void *control,
37620Sstevel@tonic-gate 		t_uscalar_t controllen,
37630Sstevel@tonic-gate 		int flags)
37640Sstevel@tonic-gate {
37650Sstevel@tonic-gate 	struct T_optdata_req	tdr;
37660Sstevel@tonic-gate 	mblk_t			*mp;
37670Sstevel@tonic-gate 	int			error;
37680Sstevel@tonic-gate 	ssize_t			iosize;
37690Sstevel@tonic-gate 	int			first = 1;
37700Sstevel@tonic-gate 	int			size;
37710Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
37720Sstevel@tonic-gate 	t_uscalar_t		optlen;
37730Sstevel@tonic-gate 	void			*fds;
37740Sstevel@tonic-gate 	int			fdlen;
37750Sstevel@tonic-gate 	struct T_opthdr		toh;
37760Sstevel@tonic-gate 
37770Sstevel@tonic-gate 	dprintso(so, 1,
37785240Snordmark 	    ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
37790Sstevel@tonic-gate 
37800Sstevel@tonic-gate 	/*
37810Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
37820Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
37830Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
37840Sstevel@tonic-gate 	 */
37850Sstevel@tonic-gate 
37860Sstevel@tonic-gate 	/* Options on connection-oriented only when SM_OPTDATA set. */
37870Sstevel@tonic-gate 	if (!(so->so_mode & SM_OPTDATA))
37880Sstevel@tonic-gate 		return (EOPNOTSUPP);
37890Sstevel@tonic-gate 
37900Sstevel@tonic-gate 	do {
37910Sstevel@tonic-gate 		/*
37920Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
37930Sstevel@tonic-gate 		 * message or if the caller passed in "more".
37940Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
37950Sstevel@tonic-gate 		 */
37960Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
37970Sstevel@tonic-gate 		iosize = so->so_tidu_size;
37980Sstevel@tonic-gate 		if (iosize <= 0)
37990Sstevel@tonic-gate 			return (EMSGSIZE);
38000Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
38010Sstevel@tonic-gate 			tdr.DATA_flag = 1;
38020Sstevel@tonic-gate 		} else {
38030Sstevel@tonic-gate 			if (more)
38040Sstevel@tonic-gate 				tdr.DATA_flag = 1;
38050Sstevel@tonic-gate 			else
38060Sstevel@tonic-gate 				tdr.DATA_flag = 0;
38070Sstevel@tonic-gate 			iosize = uiop->uio_resid;
38080Sstevel@tonic-gate 		}
38090Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
38105240Snordmark 		    tdr.DATA_flag, iosize));
38110Sstevel@tonic-gate 
38120Sstevel@tonic-gate 		optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2));
38130Sstevel@tonic-gate 		tdr.OPT_length = optlen;
38140Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
38150Sstevel@tonic-gate 
38160Sstevel@tonic-gate 		size = (int)sizeof (tdr) + optlen;
38170Sstevel@tonic-gate 		/*
38180Sstevel@tonic-gate 		 * File descriptors only when SM_FDPASSING set.
38190Sstevel@tonic-gate 		 */
38200Sstevel@tonic-gate 		error = so_getfdopt(control, controllen,
38215240Snordmark 		    !(flags & MSG_XPG4_2), &fds, &fdlen);
38220Sstevel@tonic-gate 		if (error)
38230Sstevel@tonic-gate 			return (error);
38240Sstevel@tonic-gate 		if (fdlen != -1) {
38250Sstevel@tonic-gate 			if (!(so->so_mode & SM_FDPASSING))
38260Sstevel@tonic-gate 				return (EOPNOTSUPP);
38270Sstevel@tonic-gate 
38280Sstevel@tonic-gate 			error = fdbuf_create(fds, fdlen, &fdbuf);
38290Sstevel@tonic-gate 			if (error)
38300Sstevel@tonic-gate 				return (error);
38310Sstevel@tonic-gate 			mp = fdbuf_allocmsg(size, fdbuf);
38320Sstevel@tonic-gate 		} else {
38330Sstevel@tonic-gate 			mp = soallocproto(size, _ALLOC_INTR);
3834455Smeem 			if (mp == NULL) {
3835455Smeem 				/*
3836455Smeem 				 * Caught a signal waiting for memory.
3837455Smeem 				 * Let send* return EINTR.
3838455Smeem 				 */
3839455Smeem 				return (first ? EINTR : 0);
3840455Smeem 			}
38410Sstevel@tonic-gate 		}
38420Sstevel@tonic-gate 		soappendmsg(mp, &tdr, sizeof (tdr));
38430Sstevel@tonic-gate 
38440Sstevel@tonic-gate 		if (fdlen != -1) {
38450Sstevel@tonic-gate 			ASSERT(fdbuf != NULL);
38460Sstevel@tonic-gate 			toh.level = SOL_SOCKET;
38470Sstevel@tonic-gate 			toh.name = SO_FILEP;
38480Sstevel@tonic-gate 			toh.len = fdbuf->fd_size +
38495240Snordmark 			    (t_uscalar_t)sizeof (struct T_opthdr);
38500Sstevel@tonic-gate 			toh.status = 0;
38510Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
38520Sstevel@tonic-gate 			soappendmsg(mp, fdbuf, fdbuf->fd_size);
38530Sstevel@tonic-gate 			ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
38540Sstevel@tonic-gate 		}
38550Sstevel@tonic-gate 		so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
38560Sstevel@tonic-gate 		/* At most 3 bytes left in the message */
38570Sstevel@tonic-gate 		ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE));
38580Sstevel@tonic-gate 		ASSERT(MBLKL(mp) <= (ssize_t)size);
38590Sstevel@tonic-gate 
38600Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
38610Sstevel@tonic-gate 
38620Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
38635240Snordmark 		    0, MSG_BAND, 0);
38640Sstevel@tonic-gate 		if (error) {
38650Sstevel@tonic-gate 			if (!first && error == EWOULDBLOCK)
38660Sstevel@tonic-gate 				return (0);
38670Sstevel@tonic-gate 			eprintsoline(so, error);
38680Sstevel@tonic-gate 			return (error);
38690Sstevel@tonic-gate 		}
38700Sstevel@tonic-gate 		control = NULL;
38710Sstevel@tonic-gate 		first = 0;
38720Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
38730Sstevel@tonic-gate 			/*
38740Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
38750Sstevel@tonic-gate 			 * some data have been written. This is consistent
38760Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
38770Sstevel@tonic-gate 			 */
38780Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
38790Sstevel@tonic-gate 				tsignal(curthread, SIGPIPE);
38800Sstevel@tonic-gate 				eprintsoline(so, error);
38810Sstevel@tonic-gate 				return (EPIPE);
38820Sstevel@tonic-gate 			}
38830Sstevel@tonic-gate 			if (so->so_error != 0) {
38840Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
38850Sstevel@tonic-gate 				error = sogeterr(so);
38860Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
38870Sstevel@tonic-gate 				if (error != 0) {
38880Sstevel@tonic-gate 					eprintsoline(so, error);
38890Sstevel@tonic-gate 					return (error);
38900Sstevel@tonic-gate 				}
38910Sstevel@tonic-gate 			}
38920Sstevel@tonic-gate 		}
38930Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
38940Sstevel@tonic-gate 	return (0);
38950Sstevel@tonic-gate }
38960Sstevel@tonic-gate 
38970Sstevel@tonic-gate /*
38980Sstevel@tonic-gate  * Sending data on a datagram socket.
38990Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
39000Sstevel@tonic-gate  *
39010Sstevel@tonic-gate  * For AF_UNIX the destination address is translated to an internal
39020Sstevel@tonic-gate  * name and the source address is passed as an option.
39030Sstevel@tonic-gate  */
39040Sstevel@tonic-gate int
3905741Smasputra sosend_dgram(struct sonode *so, struct sockaddr	*name, socklen_t namelen,
3906741Smasputra     struct uio *uiop, int flags)
39070Sstevel@tonic-gate {
39080Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
39090Sstevel@tonic-gate 	mblk_t			*mp;
39100Sstevel@tonic-gate 	int			error;
39110Sstevel@tonic-gate 	void			*addr;
39120Sstevel@tonic-gate 	socklen_t		addrlen;
39130Sstevel@tonic-gate 	void			*src;
39140Sstevel@tonic-gate 	socklen_t		srclen;
39150Sstevel@tonic-gate 	ssize_t			len;
39160Sstevel@tonic-gate 
3917741Smasputra 	ASSERT(name != NULL && namelen != 0);
39180Sstevel@tonic-gate 
39190Sstevel@tonic-gate 	len = uiop->uio_resid;
39200Sstevel@tonic-gate 	if (len > so->so_tidu_size) {
39210Sstevel@tonic-gate 		error = EMSGSIZE;
39220Sstevel@tonic-gate 		goto done;
39230Sstevel@tonic-gate 	}
39240Sstevel@tonic-gate 
3925741Smasputra 	/* Length and family checks */
39260Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
3927741Smasputra 	if (error != 0)
39280Sstevel@tonic-gate 		goto done;
3929741Smasputra 
3930741Smasputra 	if (so->so_state & SS_DIRECT)
3931741Smasputra 		return (sodgram_direct(so, name, namelen, uiop, flags));
3932741Smasputra 
39330Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
39340Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
39350Sstevel@tonic-gate 			/*
39360Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
39370Sstevel@tonic-gate 			 * pass any (transport internal) source address.
39380Sstevel@tonic-gate 			 */
39390Sstevel@tonic-gate 			addr = name;
39400Sstevel@tonic-gate 			addrlen = namelen;
39410Sstevel@tonic-gate 			src = NULL;
39420Sstevel@tonic-gate 			srclen = 0;
39430Sstevel@tonic-gate 		} else {
39440Sstevel@tonic-gate 			/*
39450Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
39460Sstevel@tonic-gate 			 * and translate the remote address.
39470Sstevel@tonic-gate 			 *
39480Sstevel@tonic-gate 			 * Note that this code does not prevent so_laddr_sa
39490Sstevel@tonic-gate 			 * from changing while it is being used. Thus
39500Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
39510Sstevel@tonic-gate 			 * send the peer might see a partially new and a
39520Sstevel@tonic-gate 			 * partially old "from" address.
39530Sstevel@tonic-gate 			 */
39540Sstevel@tonic-gate 			src = so->so_laddr_sa;
39550Sstevel@tonic-gate 			srclen = (socklen_t)so->so_laddr_len;
39560Sstevel@tonic-gate 			dprintso(so, 1,
39575240Snordmark 			    ("sosend_dgram UNIX: srclen %d, src %p\n",
39585240Snordmark 			    srclen, src));
39590Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
39605240Snordmark 			    (flags & MSG_XPG4_2),
39615240Snordmark 			    &addr, &addrlen);
39620Sstevel@tonic-gate 			if (error) {
39630Sstevel@tonic-gate 				eprintsoline(so, error);
39640Sstevel@tonic-gate 				goto done;
39650Sstevel@tonic-gate 			}
39660Sstevel@tonic-gate 		}
39670Sstevel@tonic-gate 	} else {
39680Sstevel@tonic-gate 		addr = name;
39690Sstevel@tonic-gate 		addrlen = namelen;
39700Sstevel@tonic-gate 		src = NULL;
39710Sstevel@tonic-gate 		srclen = 0;
39720Sstevel@tonic-gate 	}
39730Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
39740Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
39750Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
39760Sstevel@tonic-gate 	if (srclen == 0) {
39770Sstevel@tonic-gate 		tudr.OPT_length = 0;
39780Sstevel@tonic-gate 		tudr.OPT_offset = 0;
39790Sstevel@tonic-gate 
39800Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
39810Sstevel@tonic-gate 		    addr, addrlen, 0, _ALLOC_INTR);
39820Sstevel@tonic-gate 		if (mp == NULL) {
39830Sstevel@tonic-gate 			/*
39840Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
39850Sstevel@tonic-gate 			 * Let send* return EINTR.
39860Sstevel@tonic-gate 			 */
39870Sstevel@tonic-gate 			error = EINTR;
39880Sstevel@tonic-gate 			goto done;
39890Sstevel@tonic-gate 		}
39900Sstevel@tonic-gate 	} else {
39910Sstevel@tonic-gate 		/*
39920Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
39930Sstevel@tonic-gate 		 * address option.
39940Sstevel@tonic-gate 		 */
39950Sstevel@tonic-gate 		struct T_opthdr toh;
39960Sstevel@tonic-gate 		ssize_t size;
39970Sstevel@tonic-gate 
39980Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(sizeof (toh) +
39995240Snordmark 		    _TPI_ALIGN_TOPT(srclen));
40000Sstevel@tonic-gate 		tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
40015240Snordmark 		    _TPI_ALIGN_TOPT(addrlen));
40020Sstevel@tonic-gate 
40030Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
40040Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
40050Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
40060Sstevel@tonic-gate 		toh.status = 0;
40070Sstevel@tonic-gate 
40080Sstevel@tonic-gate 		size = tudr.OPT_offset + tudr.OPT_length;
40090Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
40100Sstevel@tonic-gate 		    addr, addrlen, size, _ALLOC_INTR);
40110Sstevel@tonic-gate 		if (mp == NULL) {
40120Sstevel@tonic-gate 			/*
40130Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
40140Sstevel@tonic-gate 			 * Let send* return EINTR.
40150Sstevel@tonic-gate 			 */
40160Sstevel@tonic-gate 			error = EINTR;
40170Sstevel@tonic-gate 			goto done;
40180Sstevel@tonic-gate 		}
40190Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
40200Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
40210Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
40220Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
40230Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
40240Sstevel@tonic-gate 	}
40250Sstevel@tonic-gate 
40260Sstevel@tonic-gate 	if (audit_active)
40270Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
40280Sstevel@tonic-gate 
40290Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
40300Sstevel@tonic-gate done:
40310Sstevel@tonic-gate #ifdef SOCK_DEBUG
40320Sstevel@tonic-gate 	if (error) {
40330Sstevel@tonic-gate 		eprintsoline(so, error);
40340Sstevel@tonic-gate 	}
40350Sstevel@tonic-gate #endif /* SOCK_DEBUG */
40360Sstevel@tonic-gate 	return (error);
40370Sstevel@tonic-gate }
40380Sstevel@tonic-gate 
40390Sstevel@tonic-gate /*
40400Sstevel@tonic-gate  * Sending data on a connected stream socket.
40410Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
40420Sstevel@tonic-gate  */
40430Sstevel@tonic-gate int
40440Sstevel@tonic-gate sosend_svc(struct sonode *so,
40450Sstevel@tonic-gate 	struct uio *uiop,
40460Sstevel@tonic-gate 	t_scalar_t prim,
40470Sstevel@tonic-gate 	int more,
40480Sstevel@tonic-gate 	int sflag)
40490Sstevel@tonic-gate {
40500Sstevel@tonic-gate 	struct T_data_req	tdr;
40510Sstevel@tonic-gate 	mblk_t			*mp;
40520Sstevel@tonic-gate 	int			error;
40530Sstevel@tonic-gate 	ssize_t			iosize;
40540Sstevel@tonic-gate 	int			first = 1;
40550Sstevel@tonic-gate 
40560Sstevel@tonic-gate 	dprintso(so, 1,
40575240Snordmark 	    ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
40587240Srh87107 	    (void *)so, uiop->uio_resid, prim, sflag));
40590Sstevel@tonic-gate 
40600Sstevel@tonic-gate 	/*
40610Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
40620Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
40630Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
40640Sstevel@tonic-gate 	 */
40650Sstevel@tonic-gate 
40660Sstevel@tonic-gate 	do {
40670Sstevel@tonic-gate 		/*
40680Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
40690Sstevel@tonic-gate 		 * message or if the caller passed in "more".
40700Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
40710Sstevel@tonic-gate 		 */
40720Sstevel@tonic-gate 		tdr.PRIM_type = prim;
40730Sstevel@tonic-gate 		iosize = so->so_tidu_size;
40740Sstevel@tonic-gate 		if (iosize <= 0)
40750Sstevel@tonic-gate 			return (EMSGSIZE);
40760Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
40770Sstevel@tonic-gate 			tdr.MORE_flag = 1;
40780Sstevel@tonic-gate 		} else {
40790Sstevel@tonic-gate 			if (more)
40800Sstevel@tonic-gate 				tdr.MORE_flag = 1;
40810Sstevel@tonic-gate 			else
40820Sstevel@tonic-gate 				tdr.MORE_flag = 0;
40830Sstevel@tonic-gate 			iosize = uiop->uio_resid;
40840Sstevel@tonic-gate 		}
40850Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
40865240Snordmark 		    prim, tdr.MORE_flag, iosize));
40870Sstevel@tonic-gate 		mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR);
40880Sstevel@tonic-gate 		if (mp == NULL) {
40890Sstevel@tonic-gate 			/*
40900Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
40910Sstevel@tonic-gate 			 * Let send* return EINTR.
40920Sstevel@tonic-gate 			 */
40930Sstevel@tonic-gate 			if (first)
40940Sstevel@tonic-gate 				return (EINTR);
40950Sstevel@tonic-gate 			else
40960Sstevel@tonic-gate 				return (0);
40970Sstevel@tonic-gate 		}
40980Sstevel@tonic-gate 
40990Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
41005240Snordmark 		    0, sflag | MSG_BAND, 0);
41010Sstevel@tonic-gate 		if (error) {
41020Sstevel@tonic-gate 			if (!first && error == EWOULDBLOCK)
41030Sstevel@tonic-gate 				return (0);
41040Sstevel@tonic-gate 			eprintsoline(so, error);
41050Sstevel@tonic-gate 			return (error);
41060Sstevel@tonic-gate 		}
41070Sstevel@tonic-gate 		first = 0;
41080Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
41090Sstevel@tonic-gate 			/*
41100Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
41110Sstevel@tonic-gate 			 * some data have been written. This is consistent
41120Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
41130Sstevel@tonic-gate 			 */
41140Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
41150Sstevel@tonic-gate 				tsignal(curthread, SIGPIPE);
41160Sstevel@tonic-gate 				eprintsoline(so, error);
41170Sstevel@tonic-gate 				return (EPIPE);
41180Sstevel@tonic-gate 			}
41190Sstevel@tonic-gate 			if (so->so_error != 0) {
41200Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
41210Sstevel@tonic-gate 				error = sogeterr(so);
41220Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
41230Sstevel@tonic-gate 				if (error != 0) {
41240Sstevel@tonic-gate 					eprintsoline(so, error);
41250Sstevel@tonic-gate 					return (error);
41260Sstevel@tonic-gate 				}
41270Sstevel@tonic-gate 			}
41280Sstevel@tonic-gate 		}
41290Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
41300Sstevel@tonic-gate 	return (0);
41310Sstevel@tonic-gate }
41320Sstevel@tonic-gate 
41330Sstevel@tonic-gate /*
41340Sstevel@tonic-gate  * Check the state for errors and call the appropriate send function.
41350Sstevel@tonic-gate  *
41360Sstevel@tonic-gate  * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
41370Sstevel@tonic-gate  * this function issues a setsockopt to toggle SO_DONTROUTE before and
41380Sstevel@tonic-gate  * after sending the message.
41390Sstevel@tonic-gate  */
41400Sstevel@tonic-gate static int
41410Sstevel@tonic-gate sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
41420Sstevel@tonic-gate {
41430Sstevel@tonic-gate 	int		so_state;
41440Sstevel@tonic-gate 	int		so_mode;
41450Sstevel@tonic-gate 	int		error;
41460Sstevel@tonic-gate 	struct sockaddr *name;
41470Sstevel@tonic-gate 	t_uscalar_t	namelen;
41480Sstevel@tonic-gate 	int		dontroute;
41490Sstevel@tonic-gate 	int		flags;
41500Sstevel@tonic-gate 
41510Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
41527240Srh87107 	    (void *)so, (void *)msg, msg->msg_flags,
41535240Snordmark 	    pr_state(so->so_state, so->so_mode), so->so_error));
41540Sstevel@tonic-gate 
41550Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
41560Sstevel@tonic-gate 	so_state = so->so_state;
41570Sstevel@tonic-gate 
41580Sstevel@tonic-gate 	if (so_state & SS_CANTSENDMORE) {
41590Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
41600Sstevel@tonic-gate 		tsignal(curthread, SIGPIPE);
41610Sstevel@tonic-gate 		return (EPIPE);
41620Sstevel@tonic-gate 	}
41630Sstevel@tonic-gate 
41640Sstevel@tonic-gate 	if (so->so_error != 0) {
41650Sstevel@tonic-gate 		error = sogeterr(so);
41660Sstevel@tonic-gate 		if (error != 0) {
41670Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
41680Sstevel@tonic-gate 			return (error);
41690Sstevel@tonic-gate 		}
41700Sstevel@tonic-gate 	}
41710Sstevel@tonic-gate 
41720Sstevel@tonic-gate 	name = (struct sockaddr *)msg->msg_name;
41730Sstevel@tonic-gate 	namelen = msg->msg_namelen;
41740Sstevel@tonic-gate 
41750Sstevel@tonic-gate 	so_mode = so->so_mode;
41760Sstevel@tonic-gate 
41770Sstevel@tonic-gate 	if (name == NULL) {
41780Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED)) {
41790Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
41800Sstevel@tonic-gate 			if (so_mode & SM_CONNREQUIRED)
41810Sstevel@tonic-gate 				return (ENOTCONN);
41820Sstevel@tonic-gate 			else
41830Sstevel@tonic-gate 				return (EDESTADDRREQ);
41840Sstevel@tonic-gate 		}
41850Sstevel@tonic-gate 		if (so_mode & SM_CONNREQUIRED) {
41860Sstevel@tonic-gate 			name = NULL;
41870Sstevel@tonic-gate 			namelen = 0;
41880Sstevel@tonic-gate 		} else {
41890Sstevel@tonic-gate 			/*
41900Sstevel@tonic-gate 			 * Note that this code does not prevent so_faddr_sa
41910Sstevel@tonic-gate 			 * from changing while it is being used. Thus
41920Sstevel@tonic-gate 			 * if an "unconnect"+connect occurs concurrently with
41930Sstevel@tonic-gate 			 * this send the datagram might be delivered to a
41940Sstevel@tonic-gate 			 * garbaled address.
41950Sstevel@tonic-gate 			 */
41960Sstevel@tonic-gate 			ASSERT(so->so_faddr_sa);
41970Sstevel@tonic-gate 			name = so->so_faddr_sa;
41980Sstevel@tonic-gate 			namelen = (t_uscalar_t)so->so_faddr_len;
41990Sstevel@tonic-gate 		}
42000Sstevel@tonic-gate 	} else {
42010Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED) &&
42020Sstevel@tonic-gate 		    (so_mode & SM_CONNREQUIRED)) {
42030Sstevel@tonic-gate 			/* Required but not connected */
42040Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
42050Sstevel@tonic-gate 			return (ENOTCONN);
42060Sstevel@tonic-gate 		}
42070Sstevel@tonic-gate 		/*
42080Sstevel@tonic-gate 		 * Ignore the address on connection-oriented sockets.
42090Sstevel@tonic-gate 		 * Just like BSD this code does not generate an error for
42100Sstevel@tonic-gate 		 * TCP (a CONNREQUIRED socket) when sending to an address
42110Sstevel@tonic-gate 		 * passed in with sendto/sendmsg. Instead the data is
42120Sstevel@tonic-gate 		 * delivered on the connection as if no address had been
42130Sstevel@tonic-gate 		 * supplied.
42140Sstevel@tonic-gate 		 */
42150Sstevel@tonic-gate 		if ((so_state & SS_ISCONNECTED) &&
42160Sstevel@tonic-gate 		    !(so_mode & SM_CONNREQUIRED)) {
42170Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
42180Sstevel@tonic-gate 			return (EISCONN);
42190Sstevel@tonic-gate 		}
42200Sstevel@tonic-gate 		if (!(so_state & SS_ISBOUND)) {
42210Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
42220Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
42230Sstevel@tonic-gate 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
42240Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
42250Sstevel@tonic-gate 			if (error) {
42260Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
42270Sstevel@tonic-gate 				eprintsoline(so, error);
42280Sstevel@tonic-gate 				return (error);
42290Sstevel@tonic-gate 			}
42300Sstevel@tonic-gate 		}
42310Sstevel@tonic-gate 		/*
42320Sstevel@tonic-gate 		 * Handle delayed datagram errors. These are only queued
42330Sstevel@tonic-gate 		 * when the application sets SO_DGRAM_ERRIND.
42340Sstevel@tonic-gate 		 * Return the error if we are sending to the address
42350Sstevel@tonic-gate 		 * that was returned in the last T_UDERROR_IND.
42360Sstevel@tonic-gate 		 * If sending to some other address discard the delayed
42370Sstevel@tonic-gate 		 * error indication.
42380Sstevel@tonic-gate 		 */
42390Sstevel@tonic-gate 		if (so->so_delayed_error) {
42400Sstevel@tonic-gate 			struct T_uderror_ind	*tudi;
42410Sstevel@tonic-gate 			void			*addr;
42420Sstevel@tonic-gate 			t_uscalar_t		addrlen;
42430Sstevel@tonic-gate 			boolean_t		match = B_FALSE;
42440Sstevel@tonic-gate 
42450Sstevel@tonic-gate 			ASSERT(so->so_eaddr_mp);
42460Sstevel@tonic-gate 			error = so->so_delayed_error;
42470Sstevel@tonic-gate 			so->so_delayed_error = 0;
42480Sstevel@tonic-gate 			tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr;
42490Sstevel@tonic-gate 			addrlen = tudi->DEST_length;
42500Sstevel@tonic-gate 			addr = sogetoff(so->so_eaddr_mp,
42515240Snordmark 			    tudi->DEST_offset,
42525240Snordmark 			    addrlen, 1);
42530Sstevel@tonic-gate 			ASSERT(addr);	/* Checked by strsock_proto */
42540Sstevel@tonic-gate 			switch (so->so_family) {
42550Sstevel@tonic-gate 			case AF_INET: {
42560Sstevel@tonic-gate 				/* Compare just IP address and port */
42570Sstevel@tonic-gate 				sin_t *sin1 = (sin_t *)name;
42580Sstevel@tonic-gate 				sin_t *sin2 = (sin_t *)addr;
42590Sstevel@tonic-gate 
42600Sstevel@tonic-gate 				if (addrlen == sizeof (sin_t) &&
42610Sstevel@tonic-gate 				    namelen == addrlen &&
42620Sstevel@tonic-gate 				    sin1->sin_port == sin2->sin_port &&
42630Sstevel@tonic-gate 				    sin1->sin_addr.s_addr ==
42640Sstevel@tonic-gate 				    sin2->sin_addr.s_addr)
42650Sstevel@tonic-gate 					match = B_TRUE;
42660Sstevel@tonic-gate 				break;
42670Sstevel@tonic-gate 			}
42680Sstevel@tonic-gate 			case AF_INET6: {
42690Sstevel@tonic-gate 				/* Compare just IP address and port. Not flow */
42700Sstevel@tonic-gate 				sin6_t *sin1 = (sin6_t *)name;
42710Sstevel@tonic-gate 				sin6_t *sin2 = (sin6_t *)addr;
42720Sstevel@tonic-gate 
42730Sstevel@tonic-gate 				if (addrlen == sizeof (sin6_t) &&
42740Sstevel@tonic-gate 				    namelen == addrlen &&
42750Sstevel@tonic-gate 				    sin1->sin6_port == sin2->sin6_port &&
42760Sstevel@tonic-gate 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
42775240Snordmark 				    &sin2->sin6_addr))
42780Sstevel@tonic-gate 					match = B_TRUE;
42790Sstevel@tonic-gate 				break;
42800Sstevel@tonic-gate 			}
42810Sstevel@tonic-gate 			case AF_UNIX:
42820Sstevel@tonic-gate 			default:
42830Sstevel@tonic-gate 				if (namelen == addrlen &&
42840Sstevel@tonic-gate 				    bcmp(name, addr, namelen) == 0)
42850Sstevel@tonic-gate 					match = B_TRUE;
42860Sstevel@tonic-gate 			}
42870Sstevel@tonic-gate 			if (match) {
42880Sstevel@tonic-gate 				freemsg(so->so_eaddr_mp);
42890Sstevel@tonic-gate 				so->so_eaddr_mp = NULL;
42900Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
42910Sstevel@tonic-gate #ifdef DEBUG
42920Sstevel@tonic-gate 				dprintso(so, 0,
42935240Snordmark 				    ("sockfs delayed error %d for %s\n",
42945240Snordmark 				    error,
42955240Snordmark 				    pr_addr(so->so_family, name, namelen)));
42960Sstevel@tonic-gate #endif /* DEBUG */
42970Sstevel@tonic-gate 				return (error);
42980Sstevel@tonic-gate 			}
42990Sstevel@tonic-gate 			freemsg(so->so_eaddr_mp);
43000Sstevel@tonic-gate 			so->so_eaddr_mp = NULL;
43010Sstevel@tonic-gate 		}
43020Sstevel@tonic-gate 	}
43030Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
43040Sstevel@tonic-gate 
43050Sstevel@tonic-gate 	flags = msg->msg_flags;
43060Sstevel@tonic-gate 	dontroute = 0;
43070Sstevel@tonic-gate 	if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) {
43080Sstevel@tonic-gate 		uint32_t	val;
43090Sstevel@tonic-gate 
43100Sstevel@tonic-gate 		val = 1;
43110Sstevel@tonic-gate 		error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
43125240Snordmark 		    &val, (t_uscalar_t)sizeof (val));
43130Sstevel@tonic-gate 		if (error)
43140Sstevel@tonic-gate 			return (error);
43150Sstevel@tonic-gate 		dontroute = 1;
43160Sstevel@tonic-gate 	}
43170Sstevel@tonic-gate 
43180Sstevel@tonic-gate 	if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) {
43190Sstevel@tonic-gate 		error = EOPNOTSUPP;
43200Sstevel@tonic-gate 		goto done;
43210Sstevel@tonic-gate 	}
43220Sstevel@tonic-gate 	if (msg->msg_controllen != 0) {
43230Sstevel@tonic-gate 		if (!(so_mode & SM_CONNREQUIRED)) {
43240Sstevel@tonic-gate 			error = sosend_dgramcmsg(so, name, namelen, uiop,
4325741Smasputra 			    msg->msg_control, msg->msg_controllen, flags);
43260Sstevel@tonic-gate 		} else {
43270Sstevel@tonic-gate 			if (flags & MSG_OOB) {
43280Sstevel@tonic-gate 				/* Can't generate T_EXDATA_REQ with options */
43290Sstevel@tonic-gate 				error = EOPNOTSUPP;
43300Sstevel@tonic-gate 				goto done;
43310Sstevel@tonic-gate 			}
43320Sstevel@tonic-gate 			error = sosend_svccmsg(so, uiop,
43335240Snordmark 			    !(flags & MSG_EOR),
43345240Snordmark 			    msg->msg_control, msg->msg_controllen,
43355240Snordmark 			    flags);
43360Sstevel@tonic-gate 		}
43370Sstevel@tonic-gate 		goto done;
43380Sstevel@tonic-gate 	}
43390Sstevel@tonic-gate 
43400Sstevel@tonic-gate 	if (!(so_mode & SM_CONNREQUIRED)) {
43410Sstevel@tonic-gate 		/*
43420Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
4343741Smasputra 		 * from send_dgram. This can allow tail-call optimizations.
43440Sstevel@tonic-gate 		 */
43450Sstevel@tonic-gate 		if (!dontroute) {
43460Sstevel@tonic-gate 			return (sosend_dgram(so, name, namelen, uiop, flags));
43470Sstevel@tonic-gate 		}
43480Sstevel@tonic-gate 		error = sosend_dgram(so, name, namelen, uiop, flags);
43490Sstevel@tonic-gate 	} else {
43500Sstevel@tonic-gate 		t_scalar_t prim;
43510Sstevel@tonic-gate 		int sflag;
43520Sstevel@tonic-gate 
43530Sstevel@tonic-gate 		/* Ignore msg_name in the connected state */
43540Sstevel@tonic-gate 		if (flags & MSG_OOB) {
43550Sstevel@tonic-gate 			prim = T_EXDATA_REQ;
43560Sstevel@tonic-gate 			/*
43570Sstevel@tonic-gate 			 * Send down T_EXDATA_REQ even if there is flow
43580Sstevel@tonic-gate 			 * control for data.
43590Sstevel@tonic-gate 			 */
43600Sstevel@tonic-gate 			sflag = MSG_IGNFLOW;
43610Sstevel@tonic-gate 		} else {
43620Sstevel@tonic-gate 			if (so_mode & SM_BYTESTREAM) {
43630Sstevel@tonic-gate 				/* Byte stream transport - use write */
43640Sstevel@tonic-gate 
43650Sstevel@tonic-gate 				dprintso(so, 1, ("sotpi_sendmsg: write\n"));
43660Sstevel@tonic-gate 				/*
4367741Smasputra 				 * If there is no SO_DONTROUTE to turn off,
4368741Smasputra 				 * SS_DIRECT is on, and there is no flow
4369741Smasputra 				 * control, we can take the fast path.
43700Sstevel@tonic-gate 				 */
4371741Smasputra 				if (!dontroute &&
4372741Smasputra 				    (so_state & SS_DIRECT) &&
4373741Smasputra 				    canputnext(SOTOV(so)->v_stream->sd_wrq)) {
4374741Smasputra 					return (sostream_direct(so, uiop,
4375741Smasputra 					    NULL, CRED()));
4376741Smasputra 				}
43770Sstevel@tonic-gate 				error = strwrite(SOTOV(so), uiop, CRED());
43780Sstevel@tonic-gate 				goto done;
43790Sstevel@tonic-gate 			}
43800Sstevel@tonic-gate 			prim = T_DATA_REQ;
43810Sstevel@tonic-gate 			sflag = 0;
43820Sstevel@tonic-gate 		}
43830Sstevel@tonic-gate 		/*
43840Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
43850Sstevel@tonic-gate 		 * from sosend_svc. This can allow tail-call optimizations.
43860Sstevel@tonic-gate 		 */
43870Sstevel@tonic-gate 		if (!dontroute)
43880Sstevel@tonic-gate 			return (sosend_svc(so, uiop, prim,
43895240Snordmark 			    !(flags & MSG_EOR), sflag));
43900Sstevel@tonic-gate 		error = sosend_svc(so, uiop, prim,
43915240Snordmark 		    !(flags & MSG_EOR), sflag);
43920Sstevel@tonic-gate 	}
43930Sstevel@tonic-gate 	ASSERT(dontroute);
43940Sstevel@tonic-gate done:
43950Sstevel@tonic-gate 	if (dontroute) {
43960Sstevel@tonic-gate 		uint32_t	val;
43970Sstevel@tonic-gate 
43980Sstevel@tonic-gate 		val = 0;
43990Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
44005240Snordmark 		    &val, (t_uscalar_t)sizeof (val));
44010Sstevel@tonic-gate 	}
44020Sstevel@tonic-gate 	return (error);
44030Sstevel@tonic-gate }
44040Sstevel@tonic-gate 
44050Sstevel@tonic-gate /*
4406741Smasputra  * Sending data on a datagram socket.
4407741Smasputra  * Assumes caller has verified that SS_ISBOUND etc. are set.
4408741Smasputra  */
4409741Smasputra /* ARGSUSED */
4410741Smasputra static int
4411741Smasputra sodgram_direct(struct sonode *so, struct sockaddr *name,
4412741Smasputra     socklen_t namelen, struct uio *uiop, int flags)
4413741Smasputra {
4414741Smasputra 	struct T_unitdata_req	tudr;
44155240Snordmark 	mblk_t			*mp = NULL;
4416741Smasputra 	int			error = 0;
4417741Smasputra 	void			*addr;
4418741Smasputra 	socklen_t		addrlen;
4419741Smasputra 	ssize_t			len;
4420741Smasputra 	struct stdata		*stp = SOTOV(so)->v_stream;
4421741Smasputra 	int			so_state;
4422741Smasputra 	queue_t			*udp_wq;
44235240Snordmark 	boolean_t		connected;
44245240Snordmark 	mblk_t			*mpdata = NULL;
4425741Smasputra 
4426741Smasputra 	ASSERT(name != NULL && namelen != 0);
4427741Smasputra 	ASSERT(!(so->so_mode & SM_CONNREQUIRED));
4428741Smasputra 	ASSERT(!(so->so_mode & SM_EXDATA));
4429741Smasputra 	ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
4430741Smasputra 	ASSERT(SOTOV(so)->v_type == VSOCK);
4431741Smasputra 
4432741Smasputra 	/* Caller checked for proper length */
4433741Smasputra 	len = uiop->uio_resid;
4434741Smasputra 	ASSERT(len <= so->so_tidu_size);
4435741Smasputra 
4436741Smasputra 	/* Length and family checks have been done by caller */
4437741Smasputra 	ASSERT(name->sa_family == so->so_family);
4438741Smasputra 	ASSERT(so->so_family == AF_INET ||
4439741Smasputra 	    (namelen == (socklen_t)sizeof (struct sockaddr_in6)));
4440741Smasputra 	ASSERT(so->so_family == AF_INET6 ||
4441741Smasputra 	    (namelen == (socklen_t)sizeof (struct sockaddr_in)));
4442741Smasputra 
4443741Smasputra 	addr = name;
4444741Smasputra 	addrlen = namelen;
4445741Smasputra 
4446741Smasputra 	if (stp->sd_sidp != NULL &&
4447741Smasputra 	    (error = straccess(stp, JCWRITE)) != 0)
4448741Smasputra 		goto done;
4449741Smasputra 
4450741Smasputra 	so_state = so->so_state;
4451741Smasputra 
44525240Snordmark 	connected = so_state & SS_ISCONNECTED;
44535240Snordmark 	if (!connected) {
44545240Snordmark 		tudr.PRIM_type = T_UNITDATA_REQ;
44555240Snordmark 		tudr.DEST_length = addrlen;
44565240Snordmark 		tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
44575240Snordmark 		tudr.OPT_length = 0;
44585240Snordmark 		tudr.OPT_offset = 0;
44595240Snordmark 
44605240Snordmark 		mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0,
44615240Snordmark 		    _ALLOC_INTR);
44625240Snordmark 		if (mp == NULL) {
44635240Snordmark 			/*
44645240Snordmark 			 * Caught a signal waiting for memory.
44655240Snordmark 			 * Let send* return EINTR.
44665240Snordmark 			 */
44675240Snordmark 			error = EINTR;
44685240Snordmark 			goto done;
44695240Snordmark 		}
44705240Snordmark 	}
44715240Snordmark 
4472741Smasputra 	/*
4473741Smasputra 	 * For UDP we don't break up the copyin into smaller pieces
4474741Smasputra 	 * as in the TCP case.  That means if ENOMEM is returned by
4475741Smasputra 	 * mcopyinuio() then the uio vector has not been modified at
4476741Smasputra 	 * all and we fallback to either strwrite() or kstrputmsg()
4477741Smasputra 	 * below.  Note also that we never generate priority messages
4478741Smasputra 	 * from here.
4479741Smasputra 	 */
4480741Smasputra 	udp_wq = stp->sd_wrq->q_next;
4481741Smasputra 	if (canput(udp_wq) &&
44825240Snordmark 	    (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) {
44835240Snordmark 		ASSERT(DB_TYPE(mpdata) == M_DATA);
4484741Smasputra 		ASSERT(uiop->uio_resid == 0);
44855240Snordmark 		if (!connected)
44865240Snordmark 			linkb(mp, mpdata);
44875240Snordmark 		else
44885240Snordmark 			mp = mpdata;
4489741Smasputra 		if (audit_active)
4490741Smasputra 			audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
44915240Snordmark 
44925240Snordmark 		udp_wput(udp_wq, mp);
4493741Smasputra 		return (0);
4494741Smasputra 	}
44955240Snordmark 
44965240Snordmark 	ASSERT(mpdata == NULL);
44975240Snordmark 	if (error != 0 && error != ENOMEM) {
44985240Snordmark 		freemsg(mp);
4499741Smasputra 		return (error);
45005240Snordmark 	}
4501741Smasputra 
4502741Smasputra 	/*
4503741Smasputra 	 * For connected, let strwrite() handle the blocking case.
4504741Smasputra 	 * Otherwise we fall thru and use kstrputmsg().
4505741Smasputra 	 */
45065240Snordmark 	if (connected)
4507741Smasputra 		return (strwrite(SOTOV(so), uiop, CRED()));
4508741Smasputra 
4509741Smasputra 	if (audit_active)
4510741Smasputra 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4511741Smasputra 
4512741Smasputra 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4513741Smasputra done:
4514741Smasputra #ifdef SOCK_DEBUG
4515741Smasputra 	if (error != 0) {
4516741Smasputra 		eprintsoline(so, error);
4517741Smasputra 	}
4518741Smasputra #endif /* SOCK_DEBUG */
4519741Smasputra 	return (error);
4520741Smasputra }
4521741Smasputra 
4522741Smasputra int
4523741Smasputra sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
4524741Smasputra {
4525741Smasputra 	struct stdata *stp = SOTOV(so)->v_stream;
4526741Smasputra 	ssize_t iosize, rmax, maxblk;
4527741Smasputra 	queue_t *tcp_wq = stp->sd_wrq->q_next;
4528898Skais 	mblk_t *newmp;
4529741Smasputra 	int error = 0, wflag = 0;
4530741Smasputra 
4531741Smasputra 	ASSERT(so->so_mode & SM_BYTESTREAM);
4532741Smasputra 	ASSERT(SOTOV(so)->v_type == VSOCK);
4533741Smasputra 
4534741Smasputra 	if (stp->sd_sidp != NULL &&
4535741Smasputra 	    (error = straccess(stp, JCWRITE)) != 0)
4536741Smasputra 		return (error);
4537741Smasputra 
4538741Smasputra 	if (uiop == NULL) {
4539741Smasputra 		/*
4540741Smasputra 		 * kstrwritemp() should have checked sd_flag and
4541741Smasputra 		 * flow-control before coming here.  If we end up
4542741Smasputra 		 * here it means that we can simply pass down the
4543741Smasputra 		 * data to tcp.
4544741Smasputra 		 */
4545741Smasputra 		ASSERT(mp != NULL);
4546898Skais 		if (stp->sd_wputdatafunc != NULL) {
4547898Skais 			newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4548898Skais 			    NULL, NULL, NULL);
4549898Skais 			if (newmp == NULL) {
4550898Skais 				/* The caller will free mp */
4551898Skais 				return (ECOMM);
4552898Skais 			}
4553898Skais 			mp = newmp;
4554898Skais 		}
4555741Smasputra 		tcp_wput(tcp_wq, mp);
4556741Smasputra 		return (0);
4557741Smasputra 	}
4558741Smasputra 
4559741Smasputra 	/* Fallback to strwrite() to do proper error handling */
4560741Smasputra 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))
4561741Smasputra 		return (strwrite(SOTOV(so), uiop, cr));
4562741Smasputra 
4563741Smasputra 	rmax = stp->sd_qn_maxpsz;
4564741Smasputra 	ASSERT(rmax >= 0 || rmax == INFPSZ);
4565741Smasputra 	if (rmax == 0 || uiop->uio_resid <= 0)
4566741Smasputra 		return (0);
4567741Smasputra 
4568741Smasputra 	if (rmax == INFPSZ)
4569741Smasputra 		rmax = uiop->uio_resid;
4570741Smasputra 
4571741Smasputra 	maxblk = stp->sd_maxblk;
4572741Smasputra 
4573741Smasputra 	for (;;) {
4574741Smasputra 		iosize = MIN(uiop->uio_resid, rmax);
4575741Smasputra 
4576741Smasputra 		mp = mcopyinuio(stp, uiop, iosize, maxblk, &error);
4577741Smasputra 		if (mp == NULL) {
4578741Smasputra 			/*
4579741Smasputra 			 * Fallback to strwrite() for ENOMEM; if this
4580741Smasputra 			 * is our first time in this routine and the uio
4581741Smasputra 			 * vector has not been modified, we will end up
4582741Smasputra 			 * calling strwrite() without any flag set.
4583741Smasputra 			 */
4584741Smasputra 			if (error == ENOMEM)
4585741Smasputra 				goto slow_send;
4586741Smasputra 			else
4587741Smasputra 				return (error);
4588741Smasputra 		}
4589741Smasputra 		ASSERT(uiop->uio_resid >= 0);
4590741Smasputra 		/*
4591741Smasputra 		 * If mp is non-NULL and ENOMEM is set, it means that
4592741Smasputra 		 * mcopyinuio() was able to break down some of the user
4593741Smasputra 		 * data into one or more mblks.  Send the partial data
4594741Smasputra 		 * to tcp and let the rest be handled in strwrite().
4595741Smasputra 		 */
4596741Smasputra 		ASSERT(error == 0 || error == ENOMEM);
4597898Skais 		if (stp->sd_wputdatafunc != NULL) {
4598898Skais 			newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4599898Skais 			    NULL, NULL, NULL);
4600898Skais 			if (newmp == NULL) {
4601898Skais 				/* The caller will free mp */
4602898Skais 				return (ECOMM);
4603898Skais 			}
4604898Skais 			mp = newmp;
4605898Skais 		}
4606741Smasputra 		tcp_wput(tcp_wq, mp);
4607741Smasputra 
4608741Smasputra 		wflag |= NOINTR;
4609741Smasputra 
4610741Smasputra 		if (uiop->uio_resid == 0) {	/* No more data; we're done */
4611741Smasputra 			ASSERT(error == 0);
4612741Smasputra 			break;
4613741Smasputra 		} else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag &
4614741Smasputra 		    (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) {
4615741Smasputra slow_send:
4616741Smasputra 			/*
4617741Smasputra 			 * We were able to send down partial data using
4618741Smasputra 			 * the direct call interface, but are now relying
4619741Smasputra 			 * on strwrite() to handle the non-fastpath cases.
4620741Smasputra 			 * If the socket is blocking we will sleep in
4621741Smasputra 			 * strwaitq() until write is permitted, otherwise,
4622741Smasputra 			 * we will need to return the amount of bytes
4623741Smasputra 			 * written so far back to the app.  This is the
4624741Smasputra 			 * reason why we pass NOINTR flag to strwrite()
4625741Smasputra 			 * for non-blocking socket, because we don't want
4626741Smasputra 			 * to return EAGAIN when portion of the user data
4627741Smasputra 			 * has actually been sent down.
4628741Smasputra 			 */
4629741Smasputra 			return (strwrite_common(SOTOV(so), uiop, cr, wflag));
4630741Smasputra 		}
4631741Smasputra 	}
4632741Smasputra 	return (0);
4633741Smasputra }
4634741Smasputra 
4635741Smasputra /*
46360Sstevel@tonic-gate  * Update so_faddr by asking the transport (unless AF_UNIX).
46370Sstevel@tonic-gate  */
46380Sstevel@tonic-gate int
46390Sstevel@tonic-gate sotpi_getpeername(struct sonode *so)
46400Sstevel@tonic-gate {
46410Sstevel@tonic-gate 	struct strbuf	strbuf;
46420Sstevel@tonic-gate 	int		error = 0, res;
46430Sstevel@tonic-gate 	void		*addr;
46440Sstevel@tonic-gate 	t_uscalar_t	addrlen;
46450Sstevel@tonic-gate 	k_sigset_t	smask;
46460Sstevel@tonic-gate 
46470Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
46487240Srh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
46490Sstevel@tonic-gate 
46500Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
46510Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
46520Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
46530Sstevel@tonic-gate 		error = ENOTCONN;
46540Sstevel@tonic-gate 		goto done;
46550Sstevel@tonic-gate 	}
46560Sstevel@tonic-gate 	/* Added this check for X/Open */
46570Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
46580Sstevel@tonic-gate 		error = EINVAL;
46590Sstevel@tonic-gate 		if (xnet_check_print) {
46600Sstevel@tonic-gate 			printf("sockfs: X/Open getpeername check => EINVAL\n");
46610Sstevel@tonic-gate 		}
46620Sstevel@tonic-gate 		goto done;
46630Sstevel@tonic-gate 	}
46640Sstevel@tonic-gate #ifdef DEBUG
46650Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
46665240Snordmark 	    pr_addr(so->so_family, so->so_faddr_sa,
46675240Snordmark 	    (t_uscalar_t)so->so_faddr_len)));
46680Sstevel@tonic-gate #endif /* DEBUG */
46690Sstevel@tonic-gate 
46701548Srshoaib 	if (so->so_family == AF_UNIX) {
46710Sstevel@tonic-gate 		/* Transport has different name space - return local info */
46720Sstevel@tonic-gate 		error = 0;
46730Sstevel@tonic-gate 		goto done;
46740Sstevel@tonic-gate 	}
46750Sstevel@tonic-gate 
46760Sstevel@tonic-gate 	ASSERT(so->so_faddr_sa);
46770Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
46780Sstevel@tonic-gate 	addrlen = (t_uscalar_t)so->so_faddr_maxlen;
46790Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
46800Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
46810Sstevel@tonic-gate 
46820Sstevel@tonic-gate 	/*
46830Sstevel@tonic-gate 	 * Issue TI_GETPEERNAME with signals masked.
46840Sstevel@tonic-gate 	 * Put the result in so_faddr_sa so that getpeername works after
46850Sstevel@tonic-gate 	 * a shutdown(output).
46860Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
46870Sstevel@tonic-gate 	 * back to the socket.
46880Sstevel@tonic-gate 	 */
46890Sstevel@tonic-gate 	strbuf.buf = addr;
46900Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
46910Sstevel@tonic-gate 	strbuf.len = 0;
46920Sstevel@tonic-gate 
46930Sstevel@tonic-gate 	sigintr(&smask, 0);
46940Sstevel@tonic-gate 	res = 0;
46950Sstevel@tonic-gate 	ASSERT(CRED());
46960Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
46975240Snordmark 	    0, K_TO_K, CRED(), &res);
46980Sstevel@tonic-gate 	sigunintr(&smask);
46990Sstevel@tonic-gate 
47000Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
47010Sstevel@tonic-gate 	/*
47020Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
47030Sstevel@tonic-gate 	 * the getpeername. Instead fallback on the recorded
47040Sstevel@tonic-gate 	 * so->so_faddr_sa.
47050Sstevel@tonic-gate 	 */
47060Sstevel@tonic-gate 	if (error) {
47070Sstevel@tonic-gate 		/*
47080Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
47090Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
47100Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
47110Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
47120Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
47130Sstevel@tonic-gate 		 * using soseterror().
47140Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
47150Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
47160Sstevel@tonic-gate 		 * socket:
47170Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
47180Sstevel@tonic-gate 		 *		getpeername was called.
47190Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
47200Sstevel@tonic-gate 		 *		side error over the read side error.
47210Sstevel@tonic-gate 		 *	ENOTCONN The transport just got disconnected but
47220Sstevel@tonic-gate 		 *		sockfs had not yet seen the T_DISCON_IND
47230Sstevel@tonic-gate 		 *		when issuing the ioctl.
47240Sstevel@tonic-gate 		 */
47250Sstevel@tonic-gate 		error = 0;
47260Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
47270Sstevel@tonic-gate 	    (so->so_state & SS_ISCONNECTED)) {
47280Sstevel@tonic-gate 		ASSERT(strbuf.len <= (int)so->so_faddr_maxlen);
47290Sstevel@tonic-gate 		so->so_faddr_len = (socklen_t)strbuf.len;
47300Sstevel@tonic-gate 		bcopy(addr, so->so_faddr_sa, so->so_faddr_len);
47310Sstevel@tonic-gate 		so->so_state |= SS_FADDR_VALID;
47320Sstevel@tonic-gate 	}
47330Sstevel@tonic-gate 	kmem_free(addr, addrlen);
47340Sstevel@tonic-gate #ifdef DEBUG
47350Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
47365240Snordmark 	    pr_addr(so->so_family, so->so_faddr_sa,
47375240Snordmark 	    (t_uscalar_t)so->so_faddr_len)));
47380Sstevel@tonic-gate #endif /* DEBUG */
47390Sstevel@tonic-gate done:
47400Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
47410Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
47420Sstevel@tonic-gate 	return (error);
47430Sstevel@tonic-gate }
47440Sstevel@tonic-gate 
47450Sstevel@tonic-gate /*
47460Sstevel@tonic-gate  * Update so_laddr by asking the transport (unless AF_UNIX).
47470Sstevel@tonic-gate  */
47480Sstevel@tonic-gate int
47490Sstevel@tonic-gate sotpi_getsockname(struct sonode *so)
47500Sstevel@tonic-gate {
47510Sstevel@tonic-gate 	struct strbuf	strbuf;
47520Sstevel@tonic-gate 	int		error = 0, res;
47530Sstevel@tonic-gate 	void		*addr;
47540Sstevel@tonic-gate 	t_uscalar_t	addrlen;
47550Sstevel@tonic-gate 	k_sigset_t	smask;
47560Sstevel@tonic-gate 
47570Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
47587240Srh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
47590Sstevel@tonic-gate 
47600Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
47610Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
47620Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) {
47630Sstevel@tonic-gate 		/* Return an all zero address except for the family */
47640Sstevel@tonic-gate 		if (so->so_family == AF_INET)
47650Sstevel@tonic-gate 			so->so_laddr_len = (socklen_t)sizeof (sin_t);
47660Sstevel@tonic-gate 		else if (so->so_family == AF_INET6)
47670Sstevel@tonic-gate 			so->so_laddr_len = (socklen_t)sizeof (sin6_t);
47680Sstevel@tonic-gate 		ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
47690Sstevel@tonic-gate 		bzero(so->so_laddr_sa, so->so_laddr_len);
47700Sstevel@tonic-gate 		/*
47710Sstevel@tonic-gate 		 * Can not assume there is a sa_family for all
47720Sstevel@tonic-gate 		 * protocol families.
47730Sstevel@tonic-gate 		 */
47740Sstevel@tonic-gate 		if (so->so_family == AF_INET || so->so_family == AF_INET6)
47750Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
47760Sstevel@tonic-gate 	}
47770Sstevel@tonic-gate #ifdef DEBUG
47780Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
47795240Snordmark 	    pr_addr(so->so_family, so->so_laddr_sa,
47805240Snordmark 	    (t_uscalar_t)so->so_laddr_len)));
47810Sstevel@tonic-gate #endif /* DEBUG */
47820Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
47830Sstevel@tonic-gate 		/* Transport has different name space - return local info */
47840Sstevel@tonic-gate 		error = 0;
47850Sstevel@tonic-gate 		goto done;
47860Sstevel@tonic-gate 	}
47871156Sblu 	if (!(so->so_state & SS_ISBOUND)) {
47881156Sblu 		/* If not bound, then nothing to return. */
47891156Sblu 		error = 0;
47901156Sblu 		goto done;
47911156Sblu 	}
47920Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
47930Sstevel@tonic-gate 	addrlen = (t_uscalar_t)so->so_laddr_maxlen;
47940Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
47950Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
47960Sstevel@tonic-gate 
47970Sstevel@tonic-gate 	/*
47980Sstevel@tonic-gate 	 * Issue TI_GETMYNAME with signals masked.
47990Sstevel@tonic-gate 	 * Put the result in so_laddr_sa so that getsockname works after
48000Sstevel@tonic-gate 	 * a shutdown(output).
48010Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
48020Sstevel@tonic-gate 	 * back to the socket.
48030Sstevel@tonic-gate 	 */
48040Sstevel@tonic-gate 	strbuf.buf = addr;
48050Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
48060Sstevel@tonic-gate 	strbuf.len = 0;
48070Sstevel@tonic-gate 
48080Sstevel@tonic-gate 	sigintr(&smask, 0);
48090Sstevel@tonic-gate 	res = 0;
48100Sstevel@tonic-gate 	ASSERT(CRED());
48110Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
48125240Snordmark 	    0, K_TO_K, CRED(), &res);
48130Sstevel@tonic-gate 	sigunintr(&smask);
48140Sstevel@tonic-gate 
48150Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
48160Sstevel@tonic-gate 	/*
48170Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
48180Sstevel@tonic-gate 	 * the getsockname. Instead fallback on the recorded
48190Sstevel@tonic-gate 	 * so->so_laddr_sa.
48200Sstevel@tonic-gate 	 */
48210Sstevel@tonic-gate 	if (error) {
48220Sstevel@tonic-gate 		/*
48230Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
48240Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
48250Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
48260Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
48270Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
48280Sstevel@tonic-gate 		 * using soseterror().
48290Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
48300Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
48310Sstevel@tonic-gate 		 * socket:
48320Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
48330Sstevel@tonic-gate 		 *		getsockname was called.
48340Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
48350Sstevel@tonic-gate 		 *		side error over the read side error.
48360Sstevel@tonic-gate 		 */
48370Sstevel@tonic-gate 		error = 0;
48380Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
48390Sstevel@tonic-gate 	    (so->so_state & SS_ISBOUND)) {
48400Sstevel@tonic-gate 		ASSERT(strbuf.len <= (int)so->so_laddr_maxlen);
48410Sstevel@tonic-gate 		so->so_laddr_len = (socklen_t)strbuf.len;
48420Sstevel@tonic-gate 		bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
48430Sstevel@tonic-gate 		so->so_state |= SS_LADDR_VALID;
48440Sstevel@tonic-gate 	}
48450Sstevel@tonic-gate 	kmem_free(addr, addrlen);
48460Sstevel@tonic-gate #ifdef DEBUG
48470Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
48485240Snordmark 	    pr_addr(so->so_family, so->so_laddr_sa,
48495240Snordmark 	    (t_uscalar_t)so->so_laddr_len)));
48500Sstevel@tonic-gate #endif /* DEBUG */
48510Sstevel@tonic-gate done:
48520Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
48530Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
48540Sstevel@tonic-gate 	return (error);
48550Sstevel@tonic-gate }
48560Sstevel@tonic-gate 
48570Sstevel@tonic-gate /*
48580Sstevel@tonic-gate  * Get socket options. For SOL_SOCKET options some options are handled
48590Sstevel@tonic-gate  * by the sockfs while others use the value recorded in the sonode as a
48600Sstevel@tonic-gate  * fallback should the T_SVR4_OPTMGMT_REQ fail.
48610Sstevel@tonic-gate  *
48620Sstevel@tonic-gate  * On the return most *optlenp bytes are copied to optval.
48630Sstevel@tonic-gate  */
48640Sstevel@tonic-gate int
48650Sstevel@tonic-gate sotpi_getsockopt(struct sonode *so, int level, int option_name,
48660Sstevel@tonic-gate 		void *optval, socklen_t *optlenp, int flags)
48670Sstevel@tonic-gate {
48680Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
48690Sstevel@tonic-gate 	struct T_optmgmt_ack	*optmgmt_ack;
48700Sstevel@tonic-gate 	struct opthdr		oh;
48710Sstevel@tonic-gate 	struct opthdr		*opt_res;
48720Sstevel@tonic-gate 	mblk_t			*mp = NULL;
48730Sstevel@tonic-gate 	int			error = 0;
48740Sstevel@tonic-gate 	void			*option = NULL;	/* Set if fallback value */
48750Sstevel@tonic-gate 	t_uscalar_t		maxlen = *optlenp;
48760Sstevel@tonic-gate 	t_uscalar_t		len;
48770Sstevel@tonic-gate 	uint32_t		value;
48780Sstevel@tonic-gate 
48790Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
48807240Srh87107 	    (void *)so, level, option_name, optval, (void *)optlenp,
48815240Snordmark 	    pr_state(so->so_state, so->so_mode)));
48820Sstevel@tonic-gate 
48830Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
48840Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
48850Sstevel@tonic-gate 
48860Sstevel@tonic-gate 	/*
48870Sstevel@tonic-gate 	 * Check for SOL_SOCKET options.
48880Sstevel@tonic-gate 	 * Certain SOL_SOCKET options are returned directly whereas
48890Sstevel@tonic-gate 	 * others only provide a default (fallback) value should
48900Sstevel@tonic-gate 	 * the T_SVR4_OPTMGMT_REQ fail.
48910Sstevel@tonic-gate 	 */
48920Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
48930Sstevel@tonic-gate 		/* Check parameters */
48940Sstevel@tonic-gate 		switch (option_name) {
48950Sstevel@tonic-gate 		case SO_TYPE:
48960Sstevel@tonic-gate 		case SO_ERROR:
48970Sstevel@tonic-gate 		case SO_DEBUG:
48980Sstevel@tonic-gate 		case SO_ACCEPTCONN:
48990Sstevel@tonic-gate 		case SO_REUSEADDR:
49000Sstevel@tonic-gate 		case SO_KEEPALIVE:
49010Sstevel@tonic-gate 		case SO_DONTROUTE:
49020Sstevel@tonic-gate 		case SO_BROADCAST:
49030Sstevel@tonic-gate 		case SO_USELOOPBACK:
49040Sstevel@tonic-gate 		case SO_OOBINLINE:
49050Sstevel@tonic-gate 		case SO_SNDBUF:
49060Sstevel@tonic-gate 		case SO_RCVBUF:
49070Sstevel@tonic-gate #ifdef notyet
49080Sstevel@tonic-gate 		case SO_SNDLOWAT:
49090Sstevel@tonic-gate 		case SO_RCVLOWAT:
49100Sstevel@tonic-gate 		case SO_SNDTIMEO:
49110Sstevel@tonic-gate 		case SO_RCVTIMEO:
49120Sstevel@tonic-gate #endif /* notyet */
49133388Skcpoon 		case SO_DOMAIN:
49140Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
49150Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
49160Sstevel@tonic-gate 				error = EINVAL;
49170Sstevel@tonic-gate 				eprintsoline(so, error);
49180Sstevel@tonic-gate 				goto done2;
49190Sstevel@tonic-gate 			}
49200Sstevel@tonic-gate 			break;
49210Sstevel@tonic-gate 		case SO_LINGER:
49220Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
49230Sstevel@tonic-gate 				error = EINVAL;
49240Sstevel@tonic-gate 				eprintsoline(so, error);
49250Sstevel@tonic-gate 				goto done2;
49260Sstevel@tonic-gate 			}
49270Sstevel@tonic-gate 			break;
49280Sstevel@tonic-gate 		}
49290Sstevel@tonic-gate 
49300Sstevel@tonic-gate 		len = (t_uscalar_t)sizeof (uint32_t);	/* Default */
49310Sstevel@tonic-gate 
49320Sstevel@tonic-gate 		switch (option_name) {
49330Sstevel@tonic-gate 		case SO_TYPE:
49340Sstevel@tonic-gate 			value = so->so_type;
49350Sstevel@tonic-gate 			option = &value;
49360Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
49370Sstevel@tonic-gate 
49380Sstevel@tonic-gate 		case SO_ERROR:
49390Sstevel@tonic-gate 			value = sogeterr(so);
49400Sstevel@tonic-gate 			option = &value;
49410Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
49420Sstevel@tonic-gate 
49430Sstevel@tonic-gate 		case SO_ACCEPTCONN:
49440Sstevel@tonic-gate 			if (so->so_state & SS_ACCEPTCONN)
49450Sstevel@tonic-gate 				value = SO_ACCEPTCONN;
49460Sstevel@tonic-gate 			else
49470Sstevel@tonic-gate 				value = 0;
49480Sstevel@tonic-gate #ifdef DEBUG
49490Sstevel@tonic-gate 			if (value) {
49500Sstevel@tonic-gate 				dprintso(so, 1,
49510Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
49520Sstevel@tonic-gate 				    option_name));
49530Sstevel@tonic-gate 			} else {
49540Sstevel@tonic-gate 				dprintso(so, 1,
49550Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
49560Sstevel@tonic-gate 				    option_name));
49570Sstevel@tonic-gate 			}
49580Sstevel@tonic-gate #endif /* DEBUG */
49590Sstevel@tonic-gate 			option = &value;
49600Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
49610Sstevel@tonic-gate 
49620Sstevel@tonic-gate 		case SO_DEBUG:
49630Sstevel@tonic-gate 		case SO_REUSEADDR:
49640Sstevel@tonic-gate 		case SO_KEEPALIVE:
49650Sstevel@tonic-gate 		case SO_DONTROUTE:
49660Sstevel@tonic-gate 		case SO_BROADCAST:
49670Sstevel@tonic-gate 		case SO_USELOOPBACK:
49680Sstevel@tonic-gate 		case SO_OOBINLINE:
49690Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
49700Sstevel@tonic-gate 			value = (so->so_options & option_name);
49710Sstevel@tonic-gate #ifdef DEBUG
49720Sstevel@tonic-gate 			if (value) {
49730Sstevel@tonic-gate 				dprintso(so, 1,
49740Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
49750Sstevel@tonic-gate 				    option_name));
49760Sstevel@tonic-gate 			} else {
49770Sstevel@tonic-gate 				dprintso(so, 1,
49780Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
49790Sstevel@tonic-gate 				    option_name));
49800Sstevel@tonic-gate 			}
49810Sstevel@tonic-gate #endif /* DEBUG */
49820Sstevel@tonic-gate 			option = &value;
49830Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
49840Sstevel@tonic-gate 
49850Sstevel@tonic-gate 		/*
49860Sstevel@tonic-gate 		 * The following options are only returned by sockfs when the
49870Sstevel@tonic-gate 		 * T_SVR4_OPTMGMT_REQ fails.
49880Sstevel@tonic-gate 		 */
49890Sstevel@tonic-gate 		case SO_LINGER:
49900Sstevel@tonic-gate 			option = &so->so_linger;
49910Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (struct linger);
49920Sstevel@tonic-gate 			break;
49930Sstevel@tonic-gate 		case SO_SNDBUF: {
49940Sstevel@tonic-gate 			ssize_t lvalue;
49950Sstevel@tonic-gate 
49960Sstevel@tonic-gate 			/*
49970Sstevel@tonic-gate 			 * If the option has not been set then get a default
49980Sstevel@tonic-gate 			 * value from the read queue. This value is
49990Sstevel@tonic-gate 			 * returned if the transport fails
50000Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
50010Sstevel@tonic-gate 			 */
50020Sstevel@tonic-gate 			lvalue = so->so_sndbuf;
50030Sstevel@tonic-gate 			if (lvalue == 0) {
50040Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
50050Sstevel@tonic-gate 				(void) strqget(strvp2wq(SOTOV(so))->q_next,
50065240Snordmark 				    QHIWAT, 0, &lvalue);
50070Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
50080Sstevel@tonic-gate 				dprintso(so, 1,
50090Sstevel@tonic-gate 				    ("got SO_SNDBUF %ld from q\n", lvalue));
50100Sstevel@tonic-gate 			}
50110Sstevel@tonic-gate 			value = (int)lvalue;
50120Sstevel@tonic-gate 			option = &value;
50130Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_sndbuf);
50140Sstevel@tonic-gate 			break;
50150Sstevel@tonic-gate 		}
50160Sstevel@tonic-gate 		case SO_RCVBUF: {
50170Sstevel@tonic-gate 			ssize_t lvalue;
50180Sstevel@tonic-gate 
50190Sstevel@tonic-gate 			/*
50200Sstevel@tonic-gate 			 * If the option has not been set then get a default
50210Sstevel@tonic-gate 			 * value from the read queue. This value is
50220Sstevel@tonic-gate 			 * returned if the transport fails
50230Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
50240Sstevel@tonic-gate 			 *
50250Sstevel@tonic-gate 			 * XXX If SO_RCVBUF has been set and this is an
50260Sstevel@tonic-gate 			 * XPG 4.2 application then do not ask the transport
50270Sstevel@tonic-gate 			 * since the transport might adjust the value and not
50280Sstevel@tonic-gate 			 * return exactly what was set by the application.
50290Sstevel@tonic-gate 			 * For non-XPG 4.2 application we return the value
50300Sstevel@tonic-gate 			 * that the transport is actually using.
50310Sstevel@tonic-gate 			 */
50320Sstevel@tonic-gate 			lvalue = so->so_rcvbuf;
50330Sstevel@tonic-gate 			if (lvalue == 0) {
50340Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
50350Sstevel@tonic-gate 				(void) strqget(RD(strvp2wq(SOTOV(so))),
50365240Snordmark 				    QHIWAT, 0, &lvalue);
50370Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
50380Sstevel@tonic-gate 				dprintso(so, 1,
50390Sstevel@tonic-gate 				    ("got SO_RCVBUF %ld from q\n", lvalue));
50400Sstevel@tonic-gate 			} else if (flags & _SOGETSOCKOPT_XPG4_2) {
50410Sstevel@tonic-gate 				value = (int)lvalue;
50420Sstevel@tonic-gate 				option = &value;
50430Sstevel@tonic-gate 				goto copyout;	/* skip asking transport */
50440Sstevel@tonic-gate 			}
50450Sstevel@tonic-gate 			value = (int)lvalue;
50460Sstevel@tonic-gate 			option = &value;
50470Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_rcvbuf);
50480Sstevel@tonic-gate 			break;
50490Sstevel@tonic-gate 		}
50503388Skcpoon 		case SO_DOMAIN:
50513388Skcpoon 			value = so->so_family;
50523388Skcpoon 			option = &value;
50533388Skcpoon 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
50543388Skcpoon 
50550Sstevel@tonic-gate #ifdef notyet
50560Sstevel@tonic-gate 		/*
50570Sstevel@tonic-gate 		 * We do not implement the semantics of these options
50580Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
50590Sstevel@tonic-gate 		 */
50600Sstevel@tonic-gate 		case SO_SNDLOWAT:
50610Sstevel@tonic-gate 			value = so->so_sndlowat;
50620Sstevel@tonic-gate 			option = &value;
50630Sstevel@tonic-gate 			break;
50640Sstevel@tonic-gate 		case SO_RCVLOWAT:
50650Sstevel@tonic-gate 			value = so->so_rcvlowat;
50660Sstevel@tonic-gate 			option = &value;
50670Sstevel@tonic-gate 			break;
50680Sstevel@tonic-gate 		case SO_SNDTIMEO:
50690Sstevel@tonic-gate 			value = so->so_sndtimeo;
50700Sstevel@tonic-gate 			option = &value;
50710Sstevel@tonic-gate 			break;
50720Sstevel@tonic-gate 		case SO_RCVTIMEO:
50730Sstevel@tonic-gate 			value = so->so_rcvtimeo;
50740Sstevel@tonic-gate 			option = &value;
50750Sstevel@tonic-gate 			break;
50760Sstevel@tonic-gate #endif /* notyet */
50770Sstevel@tonic-gate 		}
50780Sstevel@tonic-gate 	}
50790Sstevel@tonic-gate 
50800Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
50810Sstevel@tonic-gate 
50820Sstevel@tonic-gate 	/* Send request */
50830Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
50840Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_CHECK;
50850Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen);
50860Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
50870Sstevel@tonic-gate 
50880Sstevel@tonic-gate 	oh.level = level;
50890Sstevel@tonic-gate 	oh.name = option_name;
50900Sstevel@tonic-gate 	oh.len = maxlen;
50910Sstevel@tonic-gate 
50920Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
50930Sstevel@tonic-gate 	    &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP);
50940Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
50950Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
50965240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
50970Sstevel@tonic-gate 	mp = NULL;
50980Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
50990Sstevel@tonic-gate 	if (error) {
51000Sstevel@tonic-gate 		eprintsoline(so, error);
51010Sstevel@tonic-gate 		goto done2;
51020Sstevel@tonic-gate 	}
51030Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
51040Sstevel@tonic-gate 	    (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0);
51050Sstevel@tonic-gate 	if (error) {
51060Sstevel@tonic-gate 		if (option != NULL) {
51070Sstevel@tonic-gate 			/* We have a fallback value */
51080Sstevel@tonic-gate 			error = 0;
51090Sstevel@tonic-gate 			goto copyout;
51100Sstevel@tonic-gate 		}
51110Sstevel@tonic-gate 		eprintsoline(so, error);
51120Sstevel@tonic-gate 		goto done2;
51130Sstevel@tonic-gate 	}
51140Sstevel@tonic-gate 	ASSERT(mp);
51150Sstevel@tonic-gate 	optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
51160Sstevel@tonic-gate 	opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset,
51175240Snordmark 	    optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE);
51180Sstevel@tonic-gate 	if (opt_res == NULL) {
51190Sstevel@tonic-gate 		if (option != NULL) {
51200Sstevel@tonic-gate 			/* We have a fallback value */
51210Sstevel@tonic-gate 			error = 0;
51220Sstevel@tonic-gate 			goto copyout;
51230Sstevel@tonic-gate 		}
51240Sstevel@tonic-gate 		error = EPROTO;
51250Sstevel@tonic-gate 		eprintsoline(so, error);
51260Sstevel@tonic-gate 		goto done;
51270Sstevel@tonic-gate 	}
51280Sstevel@tonic-gate 	option = &opt_res[1];
51290Sstevel@tonic-gate 
51300Sstevel@tonic-gate 	/* check to ensure that the option is within bounds */
51310Sstevel@tonic-gate 	if (((uintptr_t)option + opt_res->len < (uintptr_t)option) ||
51325240Snordmark 	    (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) {
51330Sstevel@tonic-gate 		if (option != NULL) {
51340Sstevel@tonic-gate 			/* We have a fallback value */
51350Sstevel@tonic-gate 			error = 0;
51360Sstevel@tonic-gate 			goto copyout;
51370Sstevel@tonic-gate 		}
51380Sstevel@tonic-gate 		error = EPROTO;
51390Sstevel@tonic-gate 		eprintsoline(so, error);
51400Sstevel@tonic-gate 		goto done;
51410Sstevel@tonic-gate 	}
51420Sstevel@tonic-gate 
51430Sstevel@tonic-gate 	len = opt_res->len;
51440Sstevel@tonic-gate 
51450Sstevel@tonic-gate copyout: {
51460Sstevel@tonic-gate 		t_uscalar_t size = MIN(len, maxlen);
51470Sstevel@tonic-gate 		bcopy(option, optval, size);
51480Sstevel@tonic-gate 		bcopy(&size, optlenp, sizeof (size));
51490Sstevel@tonic-gate 	}
51500Sstevel@tonic-gate done:
51510Sstevel@tonic-gate 	freemsg(mp);
51520Sstevel@tonic-gate done2:
51530Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
51540Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
51550Sstevel@tonic-gate 	return (error);
51560Sstevel@tonic-gate }
51570Sstevel@tonic-gate 
51580Sstevel@tonic-gate /*
51590Sstevel@tonic-gate  * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
51600Sstevel@tonic-gate  * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
51610Sstevel@tonic-gate  * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
51620Sstevel@tonic-gate  * setsockopt has to work even if the transport does not support the option.
51630Sstevel@tonic-gate  */
51640Sstevel@tonic-gate int
51650Sstevel@tonic-gate sotpi_setsockopt(struct sonode *so, int level, int option_name,
51660Sstevel@tonic-gate 	const void *optval, t_uscalar_t optlen)
51670Sstevel@tonic-gate {
51680Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
51690Sstevel@tonic-gate 	struct opthdr		oh;
51700Sstevel@tonic-gate 	mblk_t			*mp;
51710Sstevel@tonic-gate 	int			error = 0;
51720Sstevel@tonic-gate 	boolean_t		handled = B_FALSE;
51730Sstevel@tonic-gate 
51740Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
51757240Srh87107 	    (void *)so, level, option_name, optval, optlen,
51765240Snordmark 	    pr_state(so->so_state, so->so_mode)));
51770Sstevel@tonic-gate 
51780Sstevel@tonic-gate 
51790Sstevel@tonic-gate 	/* X/Open requires this check */
51800Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
51810Sstevel@tonic-gate 		if (xnet_check_print)
51820Sstevel@tonic-gate 			printf("sockfs: X/Open setsockopt check => EINVAL\n");
51830Sstevel@tonic-gate 		return (EINVAL);
51840Sstevel@tonic-gate 	}
51850Sstevel@tonic-gate 
51860Sstevel@tonic-gate 	/* Caller allocates aligned optval, or passes null */
51870Sstevel@tonic-gate 	ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
51880Sstevel@tonic-gate 	/* If optval is null optlen is 0, and vice-versa */
51890Sstevel@tonic-gate 	ASSERT(optval != NULL || optlen == 0);
51900Sstevel@tonic-gate 	ASSERT(optlen != 0 || optval == NULL);
51910Sstevel@tonic-gate 
51920Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
51930Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
51940Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
51950Sstevel@tonic-gate 
51960Sstevel@tonic-gate 	/*
51970Sstevel@tonic-gate 	 * For SOCKET or TCP level options, try to set it here itself
51980Sstevel@tonic-gate 	 * provided socket has not been popped and we know the tcp
51990Sstevel@tonic-gate 	 * structure (stored in so_priv).
52000Sstevel@tonic-gate 	 */
52010Sstevel@tonic-gate 	if ((level == SOL_SOCKET || level == IPPROTO_TCP) &&
52020Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
52030Sstevel@tonic-gate 	    (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) {
52040Sstevel@tonic-gate 		tcp_t		*tcp = so->so_priv;
52050Sstevel@tonic-gate 		boolean_t	onoff;
52060Sstevel@tonic-gate 
52070Sstevel@tonic-gate #define	intvalue	(*(int32_t *)optval)
52080Sstevel@tonic-gate 
52090Sstevel@tonic-gate 		switch (level) {
52100Sstevel@tonic-gate 		case SOL_SOCKET:
52110Sstevel@tonic-gate 			switch (option_name) {		/* Check length param */
52120Sstevel@tonic-gate 			case SO_DEBUG:
52130Sstevel@tonic-gate 			case SO_REUSEADDR:
52140Sstevel@tonic-gate 			case SO_DONTROUTE:
52150Sstevel@tonic-gate 			case SO_BROADCAST:
52160Sstevel@tonic-gate 			case SO_USELOOPBACK:
52170Sstevel@tonic-gate 			case SO_OOBINLINE:
52180Sstevel@tonic-gate 			case SO_DGRAM_ERRIND:
52190Sstevel@tonic-gate 				if (optlen != (t_uscalar_t)sizeof (int32_t)) {
52200Sstevel@tonic-gate 					error = EINVAL;
52210Sstevel@tonic-gate 					eprintsoline(so, error);
52220Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
52230Sstevel@tonic-gate 					goto done2;
52240Sstevel@tonic-gate 				}
52250Sstevel@tonic-gate 				ASSERT(optval);
52260Sstevel@tonic-gate 				onoff = intvalue != 0;
52270Sstevel@tonic-gate 				handled = B_TRUE;
52280Sstevel@tonic-gate 				break;
52290Sstevel@tonic-gate 			case SO_LINGER:
52300Sstevel@tonic-gate 				if (optlen !=
52310Sstevel@tonic-gate 				    (t_uscalar_t)sizeof (struct linger)) {
52320Sstevel@tonic-gate 					error = EINVAL;
52330Sstevel@tonic-gate 					eprintsoline(so, error);
52340Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
52350Sstevel@tonic-gate 					goto done2;
52360Sstevel@tonic-gate 				}
52370Sstevel@tonic-gate 				ASSERT(optval);
52380Sstevel@tonic-gate 				handled = B_TRUE;
52390Sstevel@tonic-gate 				break;
52400Sstevel@tonic-gate 			}
52410Sstevel@tonic-gate 
52420Sstevel@tonic-gate 			switch (option_name) {			/* Do actions */
52430Sstevel@tonic-gate 			case SO_LINGER: {
52440Sstevel@tonic-gate 				struct linger *lgr = (struct linger *)optval;
52450Sstevel@tonic-gate 
52460Sstevel@tonic-gate 				if (lgr->l_onoff) {
52470Sstevel@tonic-gate 					tcp->tcp_linger = 1;
52480Sstevel@tonic-gate 					tcp->tcp_lingertime = lgr->l_linger;
52490Sstevel@tonic-gate 					so->so_linger.l_onoff = SO_LINGER;
52500Sstevel@tonic-gate 					so->so_options |= SO_LINGER;
52510Sstevel@tonic-gate 				} else {
52520Sstevel@tonic-gate 					tcp->tcp_linger = 0;
52530Sstevel@tonic-gate 					tcp->tcp_lingertime = 0;
52540Sstevel@tonic-gate 					so->so_linger.l_onoff = 0;
52550Sstevel@tonic-gate 					so->so_options &= ~SO_LINGER;
52560Sstevel@tonic-gate 				}
52570Sstevel@tonic-gate 				so->so_linger.l_linger = lgr->l_linger;
52580Sstevel@tonic-gate 				handled = B_TRUE;
52590Sstevel@tonic-gate 				break;
52600Sstevel@tonic-gate 			}
52610Sstevel@tonic-gate 			case SO_DEBUG:
52620Sstevel@tonic-gate 				tcp->tcp_debug = onoff;
52630Sstevel@tonic-gate #ifdef SOCK_TEST
52640Sstevel@tonic-gate 				if (intvalue & 2)
52650Sstevel@tonic-gate 					sock_test_timelimit = 10 * hz;
52660Sstevel@tonic-gate 				else
52670Sstevel@tonic-gate 					sock_test_timelimit = 0;
52680Sstevel@tonic-gate 
52690Sstevel@tonic-gate 				if (intvalue & 4)
52700Sstevel@tonic-gate 					do_useracc = 0;
52710Sstevel@tonic-gate 				else
52720Sstevel@tonic-gate 					do_useracc = 1;
52730Sstevel@tonic-gate #endif /* SOCK_TEST */
52740Sstevel@tonic-gate 				break;
52750Sstevel@tonic-gate 			case SO_DONTROUTE:
52760Sstevel@tonic-gate 				/*
52770Sstevel@tonic-gate 				 * SO_DONTROUTE, SO_USELOOPBACK and
52780Sstevel@tonic-gate 				 * SO_BROADCAST are only of interest to IP.
52790Sstevel@tonic-gate 				 * We track them here only so
52800Sstevel@tonic-gate 				 * that we can report their current value.
52810Sstevel@tonic-gate 				 */
52820Sstevel@tonic-gate 				tcp->tcp_dontroute = onoff;
52830Sstevel@tonic-gate 				if (onoff)
52840Sstevel@tonic-gate 					so->so_options |= option_name;
52850Sstevel@tonic-gate 				else
52860Sstevel@tonic-gate 					so->so_options &= ~option_name;
52870Sstevel@tonic-gate 				break;
52880Sstevel@tonic-gate 			case SO_USELOOPBACK:
52890Sstevel@tonic-gate 				tcp->tcp_useloopback = onoff;
52900Sstevel@tonic-gate 				if (onoff)
52910Sstevel@tonic-gate 					so->so_options |= option_name;
52920Sstevel@tonic-gate 				else
52930Sstevel@tonic-gate 					so->so_options &= ~option_name;
52940Sstevel@tonic-gate 				break;
52950Sstevel@tonic-gate 			case SO_BROADCAST:
52960Sstevel@tonic-gate 				tcp->tcp_broadcast = onoff;
52970Sstevel@tonic-gate 				if (onoff)
52980Sstevel@tonic-gate 					so->so_options |= option_name;
52990Sstevel@tonic-gate 				else
53000Sstevel@tonic-gate 					so->so_options &= ~option_name;
53010Sstevel@tonic-gate 				break;
53020Sstevel@tonic-gate 			case SO_REUSEADDR:
53030Sstevel@tonic-gate 				tcp->tcp_reuseaddr = onoff;
53040Sstevel@tonic-gate 				if (onoff)
53050Sstevel@tonic-gate 					so->so_options |= option_name;
53060Sstevel@tonic-gate 				else
53070Sstevel@tonic-gate 					so->so_options &= ~option_name;
53080Sstevel@tonic-gate 				break;
53090Sstevel@tonic-gate 			case SO_OOBINLINE:
53100Sstevel@tonic-gate 				tcp->tcp_oobinline = onoff;
53110Sstevel@tonic-gate 				if (onoff)
53120Sstevel@tonic-gate 					so->so_options |= option_name;
53130Sstevel@tonic-gate 				else
53140Sstevel@tonic-gate 					so->so_options &= ~option_name;
53150Sstevel@tonic-gate 				break;
53160Sstevel@tonic-gate 			case SO_DGRAM_ERRIND:
53170Sstevel@tonic-gate 				tcp->tcp_dgram_errind = onoff;
53180Sstevel@tonic-gate 				if (onoff)
53190Sstevel@tonic-gate 					so->so_options |= option_name;
53200Sstevel@tonic-gate 				else
53210Sstevel@tonic-gate 					so->so_options &= ~option_name;
53220Sstevel@tonic-gate 				break;
53230Sstevel@tonic-gate 			}
53240Sstevel@tonic-gate 			break;
53250Sstevel@tonic-gate 		case IPPROTO_TCP:
53260Sstevel@tonic-gate 			switch (option_name) {
53270Sstevel@tonic-gate 			case TCP_NODELAY:
53280Sstevel@tonic-gate 				if (optlen != (t_uscalar_t)sizeof (int32_t)) {
53290Sstevel@tonic-gate 					error = EINVAL;
53300Sstevel@tonic-gate 					eprintsoline(so, error);
53310Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
53320Sstevel@tonic-gate 					goto done2;
53330Sstevel@tonic-gate 				}
53340Sstevel@tonic-gate 				ASSERT(optval);
53350Sstevel@tonic-gate 				tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss;
53360Sstevel@tonic-gate 				handled = B_TRUE;
53370Sstevel@tonic-gate 				break;
53380Sstevel@tonic-gate 			}
53390Sstevel@tonic-gate 			break;
53400Sstevel@tonic-gate 		default:
53410Sstevel@tonic-gate 			handled = B_FALSE;
53420Sstevel@tonic-gate 			break;
53430Sstevel@tonic-gate 		}
53440Sstevel@tonic-gate 	}
53450Sstevel@tonic-gate 
53460Sstevel@tonic-gate 	if (handled) {
53470Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
53480Sstevel@tonic-gate 		goto done2;
53490Sstevel@tonic-gate 	}
53500Sstevel@tonic-gate 
53510Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
53520Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_NEGOTIATE;
53530Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen;
53540Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
53550Sstevel@tonic-gate 
53560Sstevel@tonic-gate 	oh.level = level;
53570Sstevel@tonic-gate 	oh.name = option_name;
53580Sstevel@tonic-gate 	oh.len = optlen;
53590Sstevel@tonic-gate 
53600Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
53610Sstevel@tonic-gate 	    &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP);
53620Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
53630Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
53645240Snordmark 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
53650Sstevel@tonic-gate 	mp = NULL;
53660Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
53670Sstevel@tonic-gate 	if (error) {
53680Sstevel@tonic-gate 		eprintsoline(so, error);
53690Sstevel@tonic-gate 		goto done;
53700Sstevel@tonic-gate 	}
53710Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
53720Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
53730Sstevel@tonic-gate 	if (error) {
53740Sstevel@tonic-gate 		eprintsoline(so, error);
53750Sstevel@tonic-gate 		goto done;
53760Sstevel@tonic-gate 	}
53770Sstevel@tonic-gate 	ASSERT(mp);
53780Sstevel@tonic-gate 	/* No need to verify T_optmgmt_ack */
53790Sstevel@tonic-gate 	freemsg(mp);
53800Sstevel@tonic-gate done:
53810Sstevel@tonic-gate 	/*
53820Sstevel@tonic-gate 	 * Check for SOL_SOCKET options and record their values.
53830Sstevel@tonic-gate 	 * If we know about a SOL_SOCKET parameter and the transport
53840Sstevel@tonic-gate 	 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
53850Sstevel@tonic-gate 	 * EPROTO) we let the setsockopt succeed.
53860Sstevel@tonic-gate 	 */
53870Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
53880Sstevel@tonic-gate 		/* Check parameters */
53890Sstevel@tonic-gate 		switch (option_name) {
53900Sstevel@tonic-gate 		case SO_DEBUG:
53910Sstevel@tonic-gate 		case SO_REUSEADDR:
53920Sstevel@tonic-gate 		case SO_KEEPALIVE:
53930Sstevel@tonic-gate 		case SO_DONTROUTE:
53940Sstevel@tonic-gate 		case SO_BROADCAST:
53950Sstevel@tonic-gate 		case SO_USELOOPBACK:
53960Sstevel@tonic-gate 		case SO_OOBINLINE:
53970Sstevel@tonic-gate 		case SO_SNDBUF:
53980Sstevel@tonic-gate 		case SO_RCVBUF:
53990Sstevel@tonic-gate #ifdef notyet
54000Sstevel@tonic-gate 		case SO_SNDLOWAT:
54010Sstevel@tonic-gate 		case SO_RCVLOWAT:
54020Sstevel@tonic-gate 		case SO_SNDTIMEO:
54030Sstevel@tonic-gate 		case SO_RCVTIMEO:
54040Sstevel@tonic-gate #endif /* notyet */
54050Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
54060Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (int32_t)) {
54070Sstevel@tonic-gate 				error = EINVAL;
54080Sstevel@tonic-gate 				eprintsoline(so, error);
54090Sstevel@tonic-gate 				goto done2;
54100Sstevel@tonic-gate 			}
54110Sstevel@tonic-gate 			ASSERT(optval);
54120Sstevel@tonic-gate 			handled = B_TRUE;
54130Sstevel@tonic-gate 			break;
54140Sstevel@tonic-gate 		case SO_LINGER:
54150Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (struct linger)) {
54160Sstevel@tonic-gate 				error = EINVAL;
54170Sstevel@tonic-gate 				eprintsoline(so, error);
54180Sstevel@tonic-gate 				goto done2;
54190Sstevel@tonic-gate 			}
54200Sstevel@tonic-gate 			ASSERT(optval);
54210Sstevel@tonic-gate 			handled = B_TRUE;
54220Sstevel@tonic-gate 			break;
54230Sstevel@tonic-gate 		}
54240Sstevel@tonic-gate 
54250Sstevel@tonic-gate #define	intvalue	(*(int32_t *)optval)
54260Sstevel@tonic-gate 
54270Sstevel@tonic-gate 		switch (option_name) {
54280Sstevel@tonic-gate 		case SO_TYPE:
54290Sstevel@tonic-gate 		case SO_ERROR:
54300Sstevel@tonic-gate 		case SO_ACCEPTCONN:
54310Sstevel@tonic-gate 			/* Can't be set */
54320Sstevel@tonic-gate 			error = ENOPROTOOPT;
54330Sstevel@tonic-gate 			goto done2;
54340Sstevel@tonic-gate 		case SO_LINGER: {
54350Sstevel@tonic-gate 			struct linger *l = (struct linger *)optval;
54360Sstevel@tonic-gate 
54370Sstevel@tonic-gate 			so->so_linger.l_linger = l->l_linger;
54380Sstevel@tonic-gate 			if (l->l_onoff) {
54390Sstevel@tonic-gate 				so->so_linger.l_onoff = SO_LINGER;
54400Sstevel@tonic-gate 				so->so_options |= SO_LINGER;
54410Sstevel@tonic-gate 			} else {
54420Sstevel@tonic-gate 				so->so_linger.l_onoff = 0;
54430Sstevel@tonic-gate 				so->so_options &= ~SO_LINGER;
54440Sstevel@tonic-gate 			}
54450Sstevel@tonic-gate 			break;
54460Sstevel@tonic-gate 		}
54470Sstevel@tonic-gate 
54480Sstevel@tonic-gate 		case SO_DEBUG:
54490Sstevel@tonic-gate #ifdef SOCK_TEST
54500Sstevel@tonic-gate 			if (intvalue & 2)
54510Sstevel@tonic-gate 				sock_test_timelimit = 10 * hz;
54520Sstevel@tonic-gate 			else
54530Sstevel@tonic-gate 				sock_test_timelimit = 0;
54540Sstevel@tonic-gate 
54550Sstevel@tonic-gate 			if (intvalue & 4)
54560Sstevel@tonic-gate 				do_useracc = 0;
54570Sstevel@tonic-gate 			else
54580Sstevel@tonic-gate 				do_useracc = 1;
54590Sstevel@tonic-gate #endif /* SOCK_TEST */
54600Sstevel@tonic-gate 			/* FALLTHRU */
54610Sstevel@tonic-gate 		case SO_REUSEADDR:
54620Sstevel@tonic-gate 		case SO_KEEPALIVE:
54630Sstevel@tonic-gate 		case SO_DONTROUTE:
54640Sstevel@tonic-gate 		case SO_BROADCAST:
54650Sstevel@tonic-gate 		case SO_USELOOPBACK:
54660Sstevel@tonic-gate 		case SO_OOBINLINE:
54670Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
54680Sstevel@tonic-gate 			if (intvalue != 0) {
54690Sstevel@tonic-gate 				dprintso(so, 1,
54705240Snordmark 				    ("sotpi_setsockopt: setting 0x%x\n",
54715240Snordmark 				    option_name));
54720Sstevel@tonic-gate 				so->so_options |= option_name;
54730Sstevel@tonic-gate 			} else {
54740Sstevel@tonic-gate 				dprintso(so, 1,
54755240Snordmark 				    ("sotpi_setsockopt: clearing 0x%x\n",
54765240Snordmark 				    option_name));
54770Sstevel@tonic-gate 				so->so_options &= ~option_name;
54780Sstevel@tonic-gate 			}
54790Sstevel@tonic-gate 			break;
54800Sstevel@tonic-gate 		/*
54810Sstevel@tonic-gate 		 * The following options are only returned by us when the
54820Sstevel@tonic-gate 		 * T_SVR4_OPTMGMT_REQ fails.
54830Sstevel@tonic-gate 		 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
54840Sstevel@tonic-gate 		 * since the transport might adjust the value and not
54850Sstevel@tonic-gate 		 * return exactly what was set by the application.
54860Sstevel@tonic-gate 		 */
54870Sstevel@tonic-gate 		case SO_SNDBUF:
54880Sstevel@tonic-gate 			so->so_sndbuf = intvalue;
54890Sstevel@tonic-gate 			break;
54900Sstevel@tonic-gate 		case SO_RCVBUF:
54910Sstevel@tonic-gate 			so->so_rcvbuf = intvalue;
54920Sstevel@tonic-gate 			break;
54930Sstevel@tonic-gate #ifdef notyet
54940Sstevel@tonic-gate 		/*
54950Sstevel@tonic-gate 		 * We do not implement the semantics of these options
54960Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
54970Sstevel@tonic-gate 		 */
54980Sstevel@tonic-gate 		case SO_SNDLOWAT:
54990Sstevel@tonic-gate 			so->so_sndlowat = intvalue;
55000Sstevel@tonic-gate 			break;
55010Sstevel@tonic-gate 		case SO_RCVLOWAT:
55020Sstevel@tonic-gate 			so->so_rcvlowat = intvalue;
55030Sstevel@tonic-gate 			break;
55040Sstevel@tonic-gate 		case SO_SNDTIMEO:
55050Sstevel@tonic-gate 			so->so_sndtimeo = intvalue;
55060Sstevel@tonic-gate 			break;
55070Sstevel@tonic-gate 		case SO_RCVTIMEO:
55080Sstevel@tonic-gate 			so->so_rcvtimeo = intvalue;
55090Sstevel@tonic-gate 			break;
55100Sstevel@tonic-gate #endif /* notyet */
55110Sstevel@tonic-gate 		}
55120Sstevel@tonic-gate #undef	intvalue
55130Sstevel@tonic-gate 
55140Sstevel@tonic-gate 		if (error) {
55150Sstevel@tonic-gate 			if ((error == ENOPROTOOPT || error == EPROTO ||
55160Sstevel@tonic-gate 			    error == EINVAL) && handled) {
55170Sstevel@tonic-gate 				dprintso(so, 1,
55180Sstevel@tonic-gate 				    ("setsockopt: ignoring error %d for 0x%x\n",
55190Sstevel@tonic-gate 				    error, option_name));
55200Sstevel@tonic-gate 				error = 0;
55210Sstevel@tonic-gate 			}
55220Sstevel@tonic-gate 		}
55230Sstevel@tonic-gate 	}
55240Sstevel@tonic-gate done2:
55250Sstevel@tonic-gate ret:
55260Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
55270Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
55280Sstevel@tonic-gate 	return (error);
55290Sstevel@tonic-gate }
5530