xref: /onnv-gate/usr/src/uts/common/fs/sockfs/sockcommon_sops.c (revision 11042:2d6e217af1b4)
18348SEric.Yu@Sun.COM /*
28348SEric.Yu@Sun.COM  * CDDL HEADER START
38348SEric.Yu@Sun.COM  *
48348SEric.Yu@Sun.COM  * The contents of this file are subject to the terms of the
58348SEric.Yu@Sun.COM  * Common Development and Distribution License (the "License").
68348SEric.Yu@Sun.COM  * You may not use this file except in compliance with the License.
78348SEric.Yu@Sun.COM  *
88348SEric.Yu@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98348SEric.Yu@Sun.COM  * or http://www.opensolaris.org/os/licensing.
108348SEric.Yu@Sun.COM  * See the License for the specific language governing permissions
118348SEric.Yu@Sun.COM  * and limitations under the License.
128348SEric.Yu@Sun.COM  *
138348SEric.Yu@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
148348SEric.Yu@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158348SEric.Yu@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
168348SEric.Yu@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
178348SEric.Yu@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
188348SEric.Yu@Sun.COM  *
198348SEric.Yu@Sun.COM  * CDDL HEADER END
208348SEric.Yu@Sun.COM  */
218348SEric.Yu@Sun.COM 
228348SEric.Yu@Sun.COM /*
238489Sshenjian  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
248348SEric.Yu@Sun.COM  * Use is subject to license terms.
258348SEric.Yu@Sun.COM  */
268348SEric.Yu@Sun.COM 
278348SEric.Yu@Sun.COM #include <sys/types.h>
288348SEric.Yu@Sun.COM #include <sys/param.h>
298348SEric.Yu@Sun.COM #include <sys/systm.h>
308348SEric.Yu@Sun.COM #include <sys/sysmacros.h>
318348SEric.Yu@Sun.COM #include <sys/debug.h>
328348SEric.Yu@Sun.COM #include <sys/cmn_err.h>
338348SEric.Yu@Sun.COM 
348348SEric.Yu@Sun.COM #include <sys/stropts.h>
358348SEric.Yu@Sun.COM #include <sys/socket.h>
368348SEric.Yu@Sun.COM #include <sys/socketvar.h>
378348SEric.Yu@Sun.COM 
388348SEric.Yu@Sun.COM #define	_SUN_TPI_VERSION	2
398348SEric.Yu@Sun.COM #include <sys/tihdr.h>
408348SEric.Yu@Sun.COM #include <sys/sockio.h>
418348SEric.Yu@Sun.COM #include <sys/kmem_impl.h>
428348SEric.Yu@Sun.COM 
438348SEric.Yu@Sun.COM #include <sys/strsubr.h>
448348SEric.Yu@Sun.COM #include <sys/strsun.h>
458348SEric.Yu@Sun.COM #include <sys/ddi.h>
468348SEric.Yu@Sun.COM #include <netinet/in.h>
478348SEric.Yu@Sun.COM #include <inet/ip.h>
488348SEric.Yu@Sun.COM 
498348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h>
508348SEric.Yu@Sun.COM 
518348SEric.Yu@Sun.COM #include <sys/socket_proto.h>
528348SEric.Yu@Sun.COM 
538348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi_impl.h>
549491SAnders.Persson@Sun.COM #include <fs/sockfs/sodirect.h>
558348SEric.Yu@Sun.COM #include <sys/tihdr.h>
568348SEric.Yu@Sun.COM #include <fs/sockfs/nl7c.h>
578348SEric.Yu@Sun.COM #include <inet/kssl/ksslapi.h>
588348SEric.Yu@Sun.COM 
598348SEric.Yu@Sun.COM 
608348SEric.Yu@Sun.COM extern int xnet_skip_checks;
618348SEric.Yu@Sun.COM extern int xnet_check_print;
628348SEric.Yu@Sun.COM 
638348SEric.Yu@Sun.COM static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t);
648348SEric.Yu@Sun.COM 
658348SEric.Yu@Sun.COM 
668348SEric.Yu@Sun.COM /*ARGSUSED*/
678348SEric.Yu@Sun.COM int
688348SEric.Yu@Sun.COM so_accept_notsupp(struct sonode *lso, int fflag,
698348SEric.Yu@Sun.COM     struct cred *cr, struct sonode **nsop)
708348SEric.Yu@Sun.COM {
718348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
728348SEric.Yu@Sun.COM }
738348SEric.Yu@Sun.COM 
748348SEric.Yu@Sun.COM /*ARGSUSED*/
758348SEric.Yu@Sun.COM int
768348SEric.Yu@Sun.COM so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
778348SEric.Yu@Sun.COM {
788348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
798348SEric.Yu@Sun.COM }
808348SEric.Yu@Sun.COM 
818348SEric.Yu@Sun.COM /*ARGSUSED*/
828348SEric.Yu@Sun.COM int
838348SEric.Yu@Sun.COM so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
848348SEric.Yu@Sun.COM     socklen_t *len, struct cred *cr)
858348SEric.Yu@Sun.COM {
868348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
878348SEric.Yu@Sun.COM }
888348SEric.Yu@Sun.COM 
898348SEric.Yu@Sun.COM /*ARGSUSED*/
908348SEric.Yu@Sun.COM int
918348SEric.Yu@Sun.COM so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
928348SEric.Yu@Sun.COM     socklen_t *addrlen, boolean_t accept, struct cred *cr)
938348SEric.Yu@Sun.COM {
948348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
958348SEric.Yu@Sun.COM }
968348SEric.Yu@Sun.COM 
978348SEric.Yu@Sun.COM /*ARGSUSED*/
988348SEric.Yu@Sun.COM int
998348SEric.Yu@Sun.COM so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
1008348SEric.Yu@Sun.COM {
1018348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
1028348SEric.Yu@Sun.COM }
1038348SEric.Yu@Sun.COM 
1048348SEric.Yu@Sun.COM /*ARGSUSED*/
1058348SEric.Yu@Sun.COM int
1068348SEric.Yu@Sun.COM so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
1078348SEric.Yu@Sun.COM     struct cred *cr, mblk_t **mpp)
1088348SEric.Yu@Sun.COM {
1098348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
1108348SEric.Yu@Sun.COM }
1118348SEric.Yu@Sun.COM 
1128348SEric.Yu@Sun.COM /*
1138348SEric.Yu@Sun.COM  * Generic Socket Ops
1148348SEric.Yu@Sun.COM  */
1158348SEric.Yu@Sun.COM 
1168348SEric.Yu@Sun.COM /* ARGSUSED */
1178348SEric.Yu@Sun.COM int
1188348SEric.Yu@Sun.COM so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
1198348SEric.Yu@Sun.COM {
1208348SEric.Yu@Sun.COM 	return (socket_init_common(so, pso, flags, cr));
1218348SEric.Yu@Sun.COM }
1228348SEric.Yu@Sun.COM 
1238348SEric.Yu@Sun.COM int
1248348SEric.Yu@Sun.COM so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
1258348SEric.Yu@Sun.COM     int flags, struct cred *cr)
1268348SEric.Yu@Sun.COM {
1278348SEric.Yu@Sun.COM 	int error;
1288348SEric.Yu@Sun.COM 
1298348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
1308348SEric.Yu@Sun.COM 
1318348SEric.Yu@Sun.COM 	ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
1328348SEric.Yu@Sun.COM 
1338348SEric.Yu@Sun.COM 	/* X/Open requires this check */
1348348SEric.Yu@Sun.COM 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
1358348SEric.Yu@Sun.COM 		if (xnet_check_print) {
1368348SEric.Yu@Sun.COM 			printf("sockfs: X/Open bind state check "
1378348SEric.Yu@Sun.COM 			    "caused EINVAL\n");
1388348SEric.Yu@Sun.COM 		}
1398348SEric.Yu@Sun.COM 		error = EINVAL;
1408348SEric.Yu@Sun.COM 		goto done;
1418348SEric.Yu@Sun.COM 	}
1428348SEric.Yu@Sun.COM 
1438348SEric.Yu@Sun.COM 	/*
1448348SEric.Yu@Sun.COM 	 * a bind to a NULL address is interpreted as unbind. So just
1458348SEric.Yu@Sun.COM 	 * do the downcall.
1468348SEric.Yu@Sun.COM 	 */
1478348SEric.Yu@Sun.COM 	if (name == NULL)
1488348SEric.Yu@Sun.COM 		goto dobind;
1498348SEric.Yu@Sun.COM 
1508348SEric.Yu@Sun.COM 	switch (so->so_family) {
1518348SEric.Yu@Sun.COM 	case AF_INET:
1528348SEric.Yu@Sun.COM 		if ((size_t)namelen != sizeof (sin_t)) {
1538348SEric.Yu@Sun.COM 			error = name->sa_family != so->so_family ?
1548348SEric.Yu@Sun.COM 			    EAFNOSUPPORT : EINVAL;
1558348SEric.Yu@Sun.COM 			eprintsoline(so, error);
1568348SEric.Yu@Sun.COM 			goto done;
1578348SEric.Yu@Sun.COM 		}
1588348SEric.Yu@Sun.COM 
1598348SEric.Yu@Sun.COM 		if ((flags & _SOBIND_XPG4_2) &&
1608348SEric.Yu@Sun.COM 		    (name->sa_family != so->so_family)) {
1618348SEric.Yu@Sun.COM 			/*
1628348SEric.Yu@Sun.COM 			 * This check has to be made for X/Open
1638348SEric.Yu@Sun.COM 			 * sockets however application failures have
1648348SEric.Yu@Sun.COM 			 * been observed when it is applied to
1658348SEric.Yu@Sun.COM 			 * all sockets.
1668348SEric.Yu@Sun.COM 			 */
1678348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
1688348SEric.Yu@Sun.COM 			eprintsoline(so, error);
1698348SEric.Yu@Sun.COM 			goto done;
1708348SEric.Yu@Sun.COM 		}
1718348SEric.Yu@Sun.COM 		/*
1728348SEric.Yu@Sun.COM 		 * Force a zero sa_family to match so_family.
1738348SEric.Yu@Sun.COM 		 *
1748348SEric.Yu@Sun.COM 		 * Some programs like inetd(1M) don't set the
1758348SEric.Yu@Sun.COM 		 * family field. Other programs leave
1768348SEric.Yu@Sun.COM 		 * sin_family set to garbage - SunOS 4.X does
1778348SEric.Yu@Sun.COM 		 * not check the family field on a bind.
1788348SEric.Yu@Sun.COM 		 * We use the family field that
1798348SEric.Yu@Sun.COM 		 * was passed in to the socket() call.
1808348SEric.Yu@Sun.COM 		 */
1818348SEric.Yu@Sun.COM 		name->sa_family = so->so_family;
1828348SEric.Yu@Sun.COM 		break;
1838348SEric.Yu@Sun.COM 
1848348SEric.Yu@Sun.COM 	case AF_INET6: {
1858348SEric.Yu@Sun.COM #ifdef DEBUG
1868348SEric.Yu@Sun.COM 		sin6_t *sin6 = (sin6_t *)name;
1878348SEric.Yu@Sun.COM #endif
1888348SEric.Yu@Sun.COM 		if ((size_t)namelen != sizeof (sin6_t)) {
1898348SEric.Yu@Sun.COM 			error = name->sa_family != so->so_family ?
1908348SEric.Yu@Sun.COM 			    EAFNOSUPPORT : EINVAL;
1918348SEric.Yu@Sun.COM 			eprintsoline(so, error);
1928348SEric.Yu@Sun.COM 			goto done;
1938348SEric.Yu@Sun.COM 		}
1948348SEric.Yu@Sun.COM 
1958348SEric.Yu@Sun.COM 		if (name->sa_family != so->so_family) {
1968348SEric.Yu@Sun.COM 			/*
1978348SEric.Yu@Sun.COM 			 * With IPv6 we require the family to match
1988348SEric.Yu@Sun.COM 			 * unlike in IPv4.
1998348SEric.Yu@Sun.COM 			 */
2008348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
2018348SEric.Yu@Sun.COM 			eprintsoline(so, error);
2028348SEric.Yu@Sun.COM 			goto done;
2038348SEric.Yu@Sun.COM 		}
2048348SEric.Yu@Sun.COM #ifdef DEBUG
2058348SEric.Yu@Sun.COM 		/*
2068348SEric.Yu@Sun.COM 		 * Verify that apps don't forget to clear
2078348SEric.Yu@Sun.COM 		 * sin6_scope_id etc
2088348SEric.Yu@Sun.COM 		 */
2098348SEric.Yu@Sun.COM 		if (sin6->sin6_scope_id != 0 &&
2108348SEric.Yu@Sun.COM 		    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
2118348SEric.Yu@Sun.COM 			zcmn_err(getzoneid(), CE_WARN,
2128348SEric.Yu@Sun.COM 			    "bind with uninitialized sin6_scope_id "
2138348SEric.Yu@Sun.COM 			    "(%d) on socket. Pid = %d\n",
2148348SEric.Yu@Sun.COM 			    (int)sin6->sin6_scope_id,
2158348SEric.Yu@Sun.COM 			    (int)curproc->p_pid);
2168348SEric.Yu@Sun.COM 		}
2178348SEric.Yu@Sun.COM 		if (sin6->__sin6_src_id != 0) {
2188348SEric.Yu@Sun.COM 			zcmn_err(getzoneid(), CE_WARN,
2198348SEric.Yu@Sun.COM 			    "bind with uninitialized __sin6_src_id "
2208348SEric.Yu@Sun.COM 			    "(%d) on socket. Pid = %d\n",
2218348SEric.Yu@Sun.COM 			    (int)sin6->__sin6_src_id,
2228348SEric.Yu@Sun.COM 			    (int)curproc->p_pid);
2238348SEric.Yu@Sun.COM 		}
2248348SEric.Yu@Sun.COM #endif /* DEBUG */
2258348SEric.Yu@Sun.COM 
2268348SEric.Yu@Sun.COM 		break;
2278348SEric.Yu@Sun.COM 	}
2288348SEric.Yu@Sun.COM 	default:
2298348SEric.Yu@Sun.COM 		/* Just pass the request to the protocol */
2308348SEric.Yu@Sun.COM 		goto dobind;
2318348SEric.Yu@Sun.COM 	}
2328348SEric.Yu@Sun.COM 
2338348SEric.Yu@Sun.COM 	/*
2348348SEric.Yu@Sun.COM 	 * First we check if either NCA or KSSL has been enabled for
2358348SEric.Yu@Sun.COM 	 * the requested address, and if so, we fall back to TPI.
2368348SEric.Yu@Sun.COM 	 * If neither of those two services are enabled, then we just
2378348SEric.Yu@Sun.COM 	 * pass the request to the protocol.
2388348SEric.Yu@Sun.COM 	 *
2398348SEric.Yu@Sun.COM 	 * Note that KSSL can only be enabled on a socket if NCA is NOT
2408348SEric.Yu@Sun.COM 	 * enabled for that socket, hence the else-statement below.
2418348SEric.Yu@Sun.COM 	 */
2428348SEric.Yu@Sun.COM 	if (nl7c_enabled && ((so->so_family == AF_INET ||
2438348SEric.Yu@Sun.COM 	    so->so_family == AF_INET6) &&
2448348SEric.Yu@Sun.COM 	    nl7c_lookup_addr(name, namelen) != NULL)) {
2458348SEric.Yu@Sun.COM 		/*
2468348SEric.Yu@Sun.COM 		 * NL7C is not supported in non-global zones,
2478348SEric.Yu@Sun.COM 		 * we enforce this restriction here.
2488348SEric.Yu@Sun.COM 		 */
2498348SEric.Yu@Sun.COM 		if (so->so_zoneid == GLOBAL_ZONEID) {
2508348SEric.Yu@Sun.COM 			/* NCA should be used, so fall back to TPI */
2518348SEric.Yu@Sun.COM 			error = so_tpi_fallback(so, cr);
2528348SEric.Yu@Sun.COM 			SO_UNBLOCK_FALLBACK(so);
2538348SEric.Yu@Sun.COM 			if (error)
2548348SEric.Yu@Sun.COM 				return (error);
2558348SEric.Yu@Sun.COM 			else
2568348SEric.Yu@Sun.COM 				return (SOP_BIND(so, name, namelen, flags, cr));
2578348SEric.Yu@Sun.COM 		}
2588348SEric.Yu@Sun.COM 	} else if (so->so_type == SOCK_STREAM) {
2598348SEric.Yu@Sun.COM 		/* Check if KSSL has been configured for this address */
2608348SEric.Yu@Sun.COM 		kssl_ent_t ent;
2618348SEric.Yu@Sun.COM 		kssl_endpt_type_t type;
2628348SEric.Yu@Sun.COM 		struct T_bind_req bind_req;
2638348SEric.Yu@Sun.COM 		mblk_t *mp;
2648348SEric.Yu@Sun.COM 
2658348SEric.Yu@Sun.COM 		/*
2668348SEric.Yu@Sun.COM 		 * TODO: Check with KSSL team if we could add a function call
2678348SEric.Yu@Sun.COM 		 * that only queries whether KSSL is enabled for the given
2688348SEric.Yu@Sun.COM 		 * address.
2698348SEric.Yu@Sun.COM 		 */
2708348SEric.Yu@Sun.COM 		bind_req.PRIM_type = T_BIND_REQ;
2718348SEric.Yu@Sun.COM 		bind_req.ADDR_length = namelen;
2728348SEric.Yu@Sun.COM 		bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
2738348SEric.Yu@Sun.COM 		mp = soallocproto2(&bind_req, sizeof (bind_req),
2748778SErik.Nordmark@Sun.COM 		    name, namelen, 0, _ALLOC_SLEEP, cr);
2758348SEric.Yu@Sun.COM 
2768348SEric.Yu@Sun.COM 		type = kssl_check_proxy(mp, so, &ent);
2778348SEric.Yu@Sun.COM 		freemsg(mp);
2788348SEric.Yu@Sun.COM 
2798348SEric.Yu@Sun.COM 		if (type != KSSL_NO_PROXY) {
2808348SEric.Yu@Sun.COM 			/*
2818348SEric.Yu@Sun.COM 			 * KSSL has been configured for this address, so
2828348SEric.Yu@Sun.COM 			 * we must fall back to TPI.
2838348SEric.Yu@Sun.COM 			 */
2848348SEric.Yu@Sun.COM 			kssl_release_ent(ent, so, type);
2858348SEric.Yu@Sun.COM 			error = so_tpi_fallback(so, cr);
2868348SEric.Yu@Sun.COM 			SO_UNBLOCK_FALLBACK(so);
2878348SEric.Yu@Sun.COM 			if (error)
2888348SEric.Yu@Sun.COM 				return (error);
2898348SEric.Yu@Sun.COM 			else
2908348SEric.Yu@Sun.COM 				return (SOP_BIND(so, name, namelen, flags, cr));
2918348SEric.Yu@Sun.COM 		}
2928348SEric.Yu@Sun.COM 	}
2938348SEric.Yu@Sun.COM 
2948348SEric.Yu@Sun.COM dobind:
2958348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_bind)
2968348SEric.Yu@Sun.COM 	    (so->so_proto_handle, name, namelen, cr);
2978348SEric.Yu@Sun.COM done:
2988348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
2998348SEric.Yu@Sun.COM 
3008348SEric.Yu@Sun.COM 	return (error);
3018348SEric.Yu@Sun.COM }
3028348SEric.Yu@Sun.COM 
3038348SEric.Yu@Sun.COM int
3048348SEric.Yu@Sun.COM so_listen(struct sonode *so, int backlog, struct cred *cr)
3058348SEric.Yu@Sun.COM {
3068348SEric.Yu@Sun.COM 	int	error = 0;
3078348SEric.Yu@Sun.COM 
3088348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
3098348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
3108348SEric.Yu@Sun.COM 
3118348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog,
3128348SEric.Yu@Sun.COM 	    cr);
3138348SEric.Yu@Sun.COM 
3148348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
3158348SEric.Yu@Sun.COM 
3168348SEric.Yu@Sun.COM 	return (error);
3178348SEric.Yu@Sun.COM }
3188348SEric.Yu@Sun.COM 
3198348SEric.Yu@Sun.COM 
3208348SEric.Yu@Sun.COM int
3218348SEric.Yu@Sun.COM so_connect(struct sonode *so, const struct sockaddr *name,
3228348SEric.Yu@Sun.COM     socklen_t namelen, int fflag, int flags, struct cred *cr)
3238348SEric.Yu@Sun.COM {
3248348SEric.Yu@Sun.COM 	int error = 0;
3258348SEric.Yu@Sun.COM 	sock_connid_t id;
3268348SEric.Yu@Sun.COM 
3278348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
3288348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
3298348SEric.Yu@Sun.COM 
3308348SEric.Yu@Sun.COM 	/*
3318348SEric.Yu@Sun.COM 	 * If there is a pending error, return error
3328348SEric.Yu@Sun.COM 	 * This can happen if a non blocking operation caused an error.
3338348SEric.Yu@Sun.COM 	 */
3348348SEric.Yu@Sun.COM 
3358348SEric.Yu@Sun.COM 	if (so->so_error != 0) {
3368348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
3378348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
3388348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
3398348SEric.Yu@Sun.COM 		if (error != 0)
3408348SEric.Yu@Sun.COM 			goto done;
3418348SEric.Yu@Sun.COM 	}
3428348SEric.Yu@Sun.COM 
3438348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
3448348SEric.Yu@Sun.COM 	    name, namelen, &id, cr);
3458348SEric.Yu@Sun.COM 
3468348SEric.Yu@Sun.COM 	if (error == EINPROGRESS)
3478348SEric.Yu@Sun.COM 		error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id);
3488348SEric.Yu@Sun.COM 
3498348SEric.Yu@Sun.COM done:
3508348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
3518348SEric.Yu@Sun.COM 	return (error);
3528348SEric.Yu@Sun.COM }
3538348SEric.Yu@Sun.COM 
3548348SEric.Yu@Sun.COM /*ARGSUSED*/
3558348SEric.Yu@Sun.COM int
3568348SEric.Yu@Sun.COM so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
3578348SEric.Yu@Sun.COM {
3588348SEric.Yu@Sun.COM 	int error = 0;
3598348SEric.Yu@Sun.COM 	struct sonode *nso;
3608348SEric.Yu@Sun.COM 
3618348SEric.Yu@Sun.COM 	*nsop = NULL;
3628348SEric.Yu@Sun.COM 
3638348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
3648348SEric.Yu@Sun.COM 	if ((so->so_state & SS_ACCEPTCONN) == 0) {
3658348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
3668348SEric.Yu@Sun.COM 		return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
3678348SEric.Yu@Sun.COM 		    EOPNOTSUPP : EINVAL);
3688348SEric.Yu@Sun.COM 	}
3698348SEric.Yu@Sun.COM 
3708348SEric.Yu@Sun.COM 	if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
3718348SEric.Yu@Sun.COM 	    &nso)) == 0) {
3728348SEric.Yu@Sun.COM 		ASSERT(nso != NULL);
3738348SEric.Yu@Sun.COM 
3748348SEric.Yu@Sun.COM 		/* finish the accept */
3758348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
3768348SEric.Yu@Sun.COM 		    nso->so_proto_handle, (sock_upper_handle_t)nso, cr);
3778348SEric.Yu@Sun.COM 		if (error != 0) {
3788348SEric.Yu@Sun.COM 			(void) socket_close(nso, 0, cr);
3798348SEric.Yu@Sun.COM 			socket_destroy(nso);
3808348SEric.Yu@Sun.COM 		} else {
3818348SEric.Yu@Sun.COM 			*nsop = nso;
3828348SEric.Yu@Sun.COM 		}
3838348SEric.Yu@Sun.COM 	}
3848348SEric.Yu@Sun.COM 
3858348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
3868348SEric.Yu@Sun.COM 	return (error);
3878348SEric.Yu@Sun.COM }
3888348SEric.Yu@Sun.COM 
3898348SEric.Yu@Sun.COM int
3908348SEric.Yu@Sun.COM so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
3918348SEric.Yu@Sun.COM     struct cred *cr)
3928348SEric.Yu@Sun.COM {
3938348SEric.Yu@Sun.COM 	int error, flags;
3948348SEric.Yu@Sun.COM 	boolean_t dontblock;
3958348SEric.Yu@Sun.COM 	ssize_t orig_resid;
3968348SEric.Yu@Sun.COM 	mblk_t  *mp;
3978348SEric.Yu@Sun.COM 
3988348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
3998348SEric.Yu@Sun.COM 
4008348SEric.Yu@Sun.COM 	flags = msg->msg_flags;
4018348SEric.Yu@Sun.COM 	error = 0;
4028348SEric.Yu@Sun.COM 	dontblock = (flags & MSG_DONTWAIT) ||
4038348SEric.Yu@Sun.COM 	    (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
4048348SEric.Yu@Sun.COM 
4058348SEric.Yu@Sun.COM 	if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
4068348SEric.Yu@Sun.COM 		/*
4078348SEric.Yu@Sun.COM 		 * Old way of passing fd's is not supported
4088348SEric.Yu@Sun.COM 		 */
4098348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
4108348SEric.Yu@Sun.COM 		return (EOPNOTSUPP);
4118348SEric.Yu@Sun.COM 	}
4128348SEric.Yu@Sun.COM 
4138348SEric.Yu@Sun.COM 	if ((so->so_mode & SM_ATOMIC) &&
4148348SEric.Yu@Sun.COM 	    uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
4158348SEric.Yu@Sun.COM 	    so->so_proto_props.sopp_maxpsz != -1) {
4168348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
4178348SEric.Yu@Sun.COM 		return (EMSGSIZE);
4188348SEric.Yu@Sun.COM 	}
4198348SEric.Yu@Sun.COM 
4208348SEric.Yu@Sun.COM 	/*
4218348SEric.Yu@Sun.COM 	 * For atomic sends we will only do one iteration.
4228348SEric.Yu@Sun.COM 	 */
4238348SEric.Yu@Sun.COM 	do {
4248348SEric.Yu@Sun.COM 		if (so->so_state & SS_CANTSENDMORE) {
4258348SEric.Yu@Sun.COM 			error = EPIPE;
4268348SEric.Yu@Sun.COM 			break;
4278348SEric.Yu@Sun.COM 		}
4288348SEric.Yu@Sun.COM 
4298348SEric.Yu@Sun.COM 		if (so->so_error != 0) {
4308348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
4318348SEric.Yu@Sun.COM 			error = sogeterr(so, B_TRUE);
4328348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
4338348SEric.Yu@Sun.COM 			if (error != 0)
4348348SEric.Yu@Sun.COM 				break;
4358348SEric.Yu@Sun.COM 		}
4368348SEric.Yu@Sun.COM 
4378348SEric.Yu@Sun.COM 		/*
4388348SEric.Yu@Sun.COM 		 * Send down OOB messages even if the send path is being
4398348SEric.Yu@Sun.COM 		 * flow controlled (assuming the protocol supports OOB data).
4408348SEric.Yu@Sun.COM 		 */
4418348SEric.Yu@Sun.COM 		if (flags & MSG_OOB) {
4428348SEric.Yu@Sun.COM 			if ((so->so_mode & SM_EXDATA) == 0) {
4438348SEric.Yu@Sun.COM 				error = EOPNOTSUPP;
4448348SEric.Yu@Sun.COM 				break;
4458348SEric.Yu@Sun.COM 			}
4468348SEric.Yu@Sun.COM 		} else if (so->so_snd_qfull) {
4478348SEric.Yu@Sun.COM 			/*
4488348SEric.Yu@Sun.COM 			 * Need to wait until the protocol is ready to receive
4498348SEric.Yu@Sun.COM 			 * more data for transmission.
4508348SEric.Yu@Sun.COM 			 */
4518348SEric.Yu@Sun.COM 			if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
4528348SEric.Yu@Sun.COM 				break;
4538348SEric.Yu@Sun.COM 		}
4548348SEric.Yu@Sun.COM 
4558348SEric.Yu@Sun.COM 		/*
4568348SEric.Yu@Sun.COM 		 * Time to send data to the protocol. We either copy the
4578348SEric.Yu@Sun.COM 		 * data into mblks or pass the uio directly to the protocol.
4588348SEric.Yu@Sun.COM 		 * We decide what to do based on the available down calls.
4598348SEric.Yu@Sun.COM 		 */
4608348SEric.Yu@Sun.COM 		if (so->so_downcalls->sd_send_uio != NULL) {
4618348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_send_uio)
4628348SEric.Yu@Sun.COM 			    (so->so_proto_handle, uiop, msg, cr);
4638348SEric.Yu@Sun.COM 			if (error != 0)
4648348SEric.Yu@Sun.COM 				break;
4658348SEric.Yu@Sun.COM 		} else {
4668348SEric.Yu@Sun.COM 			/* save the resid in case of failure */
4678348SEric.Yu@Sun.COM 			orig_resid = uiop->uio_resid;
4688348SEric.Yu@Sun.COM 
4698348SEric.Yu@Sun.COM 			if ((mp = socopyinuio(uiop,
4708348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_maxpsz,
4718348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_wroff,
4728348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_maxblk,
473*11042SErik.Nordmark@Sun.COM 			    so->so_proto_props.sopp_tail, &error)) == NULL) {
4748348SEric.Yu@Sun.COM 				break;
4758348SEric.Yu@Sun.COM 			}
4768348SEric.Yu@Sun.COM 			ASSERT(uiop->uio_resid >= 0);
4778348SEric.Yu@Sun.COM 
4788348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_send)
4798348SEric.Yu@Sun.COM 			    (so->so_proto_handle, mp, msg, cr);
4808348SEric.Yu@Sun.COM 			if (error != 0) {
4818348SEric.Yu@Sun.COM 				/*
4828348SEric.Yu@Sun.COM 				 * The send failed. We do not have to free the
4838348SEric.Yu@Sun.COM 				 * mblks, because that is the protocol's
4848348SEric.Yu@Sun.COM 				 * responsibility. However, uio_resid must
4858348SEric.Yu@Sun.COM 				 * remain accurate, so adjust that here.
4868348SEric.Yu@Sun.COM 				 */
4878348SEric.Yu@Sun.COM 				uiop->uio_resid = orig_resid;
4888348SEric.Yu@Sun.COM 					break;
4898348SEric.Yu@Sun.COM 			}
4908348SEric.Yu@Sun.COM 		}
4918348SEric.Yu@Sun.COM 	} while (uiop->uio_resid > 0);
4928348SEric.Yu@Sun.COM 
4938348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
4948348SEric.Yu@Sun.COM 
4958348SEric.Yu@Sun.COM 	return (error);
4968348SEric.Yu@Sun.COM }
4978348SEric.Yu@Sun.COM 
4988348SEric.Yu@Sun.COM int
4998348SEric.Yu@Sun.COM so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
5008348SEric.Yu@Sun.COM     struct cred *cr, mblk_t **mpp)
5018348SEric.Yu@Sun.COM {
5028348SEric.Yu@Sun.COM 	int error;
5038348SEric.Yu@Sun.COM 	boolean_t dontblock;
5048348SEric.Yu@Sun.COM 	size_t size;
5058348SEric.Yu@Sun.COM 	mblk_t *mp = *mpp;
5068348SEric.Yu@Sun.COM 
5078348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
5088348SEric.Yu@Sun.COM 
5098348SEric.Yu@Sun.COM 	error = 0;
5108348SEric.Yu@Sun.COM 	dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
5118348SEric.Yu@Sun.COM 	    (fflag & (FNONBLOCK|FNDELAY));
5128348SEric.Yu@Sun.COM 	size = msgdsize(mp);
5138348SEric.Yu@Sun.COM 
5148401SAnders.Persson@Sun.COM 	if ((so->so_mode & SM_SENDFILESUPP) == 0 ||
5158401SAnders.Persson@Sun.COM 	    so->so_downcalls->sd_send == NULL) {
5168348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
5178348SEric.Yu@Sun.COM 		return (EOPNOTSUPP);
5188348SEric.Yu@Sun.COM 	}
5198348SEric.Yu@Sun.COM 
5208348SEric.Yu@Sun.COM 	if ((so->so_mode & SM_ATOMIC) &&
5218348SEric.Yu@Sun.COM 	    size > so->so_proto_props.sopp_maxpsz &&
5228348SEric.Yu@Sun.COM 	    so->so_proto_props.sopp_maxpsz != -1) {
5238348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
5248348SEric.Yu@Sun.COM 		return (EMSGSIZE);
5258348SEric.Yu@Sun.COM 	}
5268348SEric.Yu@Sun.COM 
5278348SEric.Yu@Sun.COM 	while (mp != NULL) {
5288348SEric.Yu@Sun.COM 		mblk_t *nmp, *last_mblk;
5298348SEric.Yu@Sun.COM 		size_t mlen;
5308348SEric.Yu@Sun.COM 
5318348SEric.Yu@Sun.COM 		if (so->so_state & SS_CANTSENDMORE) {
5328348SEric.Yu@Sun.COM 			error = EPIPE;
5338348SEric.Yu@Sun.COM 			break;
5348348SEric.Yu@Sun.COM 		}
5358348SEric.Yu@Sun.COM 		if (so->so_error != 0) {
5368348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
5378348SEric.Yu@Sun.COM 			error = sogeterr(so, B_TRUE);
5388348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
5398348SEric.Yu@Sun.COM 			if (error != 0)
5408348SEric.Yu@Sun.COM 				break;
5418348SEric.Yu@Sun.COM 		}
5428348SEric.Yu@Sun.COM 		if (so->so_snd_qfull) {
5438348SEric.Yu@Sun.COM 			/*
5448348SEric.Yu@Sun.COM 			 * Need to wait until the protocol is ready to receive
5458348SEric.Yu@Sun.COM 			 * more data for transmission.
5468348SEric.Yu@Sun.COM 			 */
5478348SEric.Yu@Sun.COM 			if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
5488348SEric.Yu@Sun.COM 				break;
5498348SEric.Yu@Sun.COM 		}
5508348SEric.Yu@Sun.COM 
5518348SEric.Yu@Sun.COM 		/*
5528348SEric.Yu@Sun.COM 		 * We only allow so_maxpsz of data to be sent down to
5538348SEric.Yu@Sun.COM 		 * the protocol at time.
5548348SEric.Yu@Sun.COM 		 */
5558348SEric.Yu@Sun.COM 		mlen = MBLKL(mp);
5568348SEric.Yu@Sun.COM 		nmp = mp->b_cont;
5578348SEric.Yu@Sun.COM 		last_mblk = mp;
5588348SEric.Yu@Sun.COM 		while (nmp != NULL) {
5598348SEric.Yu@Sun.COM 			mlen += MBLKL(nmp);
5608348SEric.Yu@Sun.COM 			if (mlen > so->so_proto_props.sopp_maxpsz) {
5618348SEric.Yu@Sun.COM 				last_mblk->b_cont = NULL;
5628348SEric.Yu@Sun.COM 				break;
5638348SEric.Yu@Sun.COM 			}
5648348SEric.Yu@Sun.COM 			last_mblk = nmp;
5658348SEric.Yu@Sun.COM 			nmp = nmp->b_cont;
5668348SEric.Yu@Sun.COM 		}
5678348SEric.Yu@Sun.COM 
5688348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_send)
5698348SEric.Yu@Sun.COM 		    (so->so_proto_handle, mp, msg, cr);
5708348SEric.Yu@Sun.COM 		if (error != 0) {
5718348SEric.Yu@Sun.COM 			/*
5728348SEric.Yu@Sun.COM 			 * The send failed. The protocol will free the mblks
5738348SEric.Yu@Sun.COM 			 * that were sent down. Let the caller deal with the
5748348SEric.Yu@Sun.COM 			 * rest.
5758348SEric.Yu@Sun.COM 			 */
5768348SEric.Yu@Sun.COM 			*mpp = nmp;
5778348SEric.Yu@Sun.COM 			break;
5788348SEric.Yu@Sun.COM 		}
5798348SEric.Yu@Sun.COM 
5808348SEric.Yu@Sun.COM 		*mpp = mp = nmp;
5818348SEric.Yu@Sun.COM 	}
5828348SEric.Yu@Sun.COM 
5838348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
5848348SEric.Yu@Sun.COM 
5858348SEric.Yu@Sun.COM 	return (error);
5868348SEric.Yu@Sun.COM }
5878348SEric.Yu@Sun.COM 
5888348SEric.Yu@Sun.COM int
5898348SEric.Yu@Sun.COM so_shutdown(struct sonode *so, int how, struct cred *cr)
5908348SEric.Yu@Sun.COM {
5918348SEric.Yu@Sun.COM 	int error;
5928348SEric.Yu@Sun.COM 
5938348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
5948348SEric.Yu@Sun.COM 
5958348SEric.Yu@Sun.COM 	/*
5968348SEric.Yu@Sun.COM 	 * SunOS 4.X has no check for datagram sockets.
5978348SEric.Yu@Sun.COM 	 * 5.X checks that it is connected (ENOTCONN)
5988348SEric.Yu@Sun.COM 	 * X/Open requires that we check the connected state.
5998348SEric.Yu@Sun.COM 	 */
6008348SEric.Yu@Sun.COM 	if (!(so->so_state & SS_ISCONNECTED)) {
6018348SEric.Yu@Sun.COM 		if (!xnet_skip_checks) {
6028348SEric.Yu@Sun.COM 			error = ENOTCONN;
6038348SEric.Yu@Sun.COM 			if (xnet_check_print) {
6048348SEric.Yu@Sun.COM 				printf("sockfs: X/Open shutdown check "
6058348SEric.Yu@Sun.COM 				    "caused ENOTCONN\n");
6068348SEric.Yu@Sun.COM 			}
6078348SEric.Yu@Sun.COM 		}
6088348SEric.Yu@Sun.COM 		goto done;
6098348SEric.Yu@Sun.COM 	}
6108348SEric.Yu@Sun.COM 
6118348SEric.Yu@Sun.COM 	error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
6128348SEric.Yu@Sun.COM 	    how, cr));
6138348SEric.Yu@Sun.COM 
6148348SEric.Yu@Sun.COM 	/*
6158348SEric.Yu@Sun.COM 	 * Protocol agreed to shutdown. We need to flush the
6168348SEric.Yu@Sun.COM 	 * receive buffer if the receive side is being shutdown.
6178348SEric.Yu@Sun.COM 	 */
6188348SEric.Yu@Sun.COM 	if (error == 0 && how != SHUT_WR) {
6198348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
6208348SEric.Yu@Sun.COM 		/* wait for active reader to finish */
6218348SEric.Yu@Sun.COM 		(void) so_lock_read(so, 0);
6228348SEric.Yu@Sun.COM 
6238348SEric.Yu@Sun.COM 		so_rcv_flush(so);
6248348SEric.Yu@Sun.COM 
6258348SEric.Yu@Sun.COM 		so_unlock_read(so);
6268348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
6278348SEric.Yu@Sun.COM 	}
6288348SEric.Yu@Sun.COM 
6298348SEric.Yu@Sun.COM done:
6308348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
6318348SEric.Yu@Sun.COM 	return (error);
6328348SEric.Yu@Sun.COM }
6338348SEric.Yu@Sun.COM 
6348348SEric.Yu@Sun.COM int
6358348SEric.Yu@Sun.COM so_getsockname(struct sonode *so, struct sockaddr *addr,
6368348SEric.Yu@Sun.COM     socklen_t *addrlen, struct cred *cr)
6378348SEric.Yu@Sun.COM {
6388348SEric.Yu@Sun.COM 	int error;
6398348SEric.Yu@Sun.COM 
6408348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
6418348SEric.Yu@Sun.COM 
6428348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_getsockname)
6438348SEric.Yu@Sun.COM 	    (so->so_proto_handle, addr, addrlen, cr);
6448348SEric.Yu@Sun.COM 
6458348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
6468348SEric.Yu@Sun.COM 	return (error);
6478348SEric.Yu@Sun.COM }
6488348SEric.Yu@Sun.COM 
6498348SEric.Yu@Sun.COM int
6508348SEric.Yu@Sun.COM so_getpeername(struct sonode *so, struct sockaddr *addr,
6518348SEric.Yu@Sun.COM     socklen_t *addrlen, boolean_t accept, struct cred *cr)
6528348SEric.Yu@Sun.COM {
6538348SEric.Yu@Sun.COM 	int error;
6548348SEric.Yu@Sun.COM 
6558348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
6568348SEric.Yu@Sun.COM 
6578348SEric.Yu@Sun.COM 	if (accept) {
6588348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getpeername)
6598348SEric.Yu@Sun.COM 		    (so->so_proto_handle, addr, addrlen, cr);
6608348SEric.Yu@Sun.COM 	} else if (!(so->so_state & SS_ISCONNECTED)) {
6618348SEric.Yu@Sun.COM 		error = ENOTCONN;
6628348SEric.Yu@Sun.COM 	} else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
6638348SEric.Yu@Sun.COM 		/* Added this check for X/Open */
6648348SEric.Yu@Sun.COM 		error = EINVAL;
6658348SEric.Yu@Sun.COM 		if (xnet_check_print) {
6668348SEric.Yu@Sun.COM 			printf("sockfs: X/Open getpeername check => EINVAL\n");
6678348SEric.Yu@Sun.COM 		}
6688348SEric.Yu@Sun.COM 	} else {
6698348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getpeername)
6708348SEric.Yu@Sun.COM 		    (so->so_proto_handle, addr, addrlen, cr);
6718348SEric.Yu@Sun.COM 	}
6728348SEric.Yu@Sun.COM 
6738348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
6748348SEric.Yu@Sun.COM 	return (error);
6758348SEric.Yu@Sun.COM }
6768348SEric.Yu@Sun.COM 
6778348SEric.Yu@Sun.COM int
6788348SEric.Yu@Sun.COM so_getsockopt(struct sonode *so, int level, int option_name,
6798348SEric.Yu@Sun.COM     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
6808348SEric.Yu@Sun.COM {
6818348SEric.Yu@Sun.COM 	int error = 0;
6828348SEric.Yu@Sun.COM 
6838348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
6848348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so,
6858348SEric.Yu@Sun.COM 	    SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
6868348SEric.Yu@Sun.COM 
6878465SEric.Yu@Sun.COM 	error = socket_getopt_common(so, level, option_name, optval, optlenp,
6888465SEric.Yu@Sun.COM 	    flags);
6898348SEric.Yu@Sun.COM 	if (error < 0) {
6908348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getsockopt)
6918348SEric.Yu@Sun.COM 		    (so->so_proto_handle, level, option_name, optval, optlenp,
6928348SEric.Yu@Sun.COM 		    cr);
6938348SEric.Yu@Sun.COM 		if (error ==  ENOPROTOOPT) {
6948348SEric.Yu@Sun.COM 			if (level == SOL_SOCKET) {
6958348SEric.Yu@Sun.COM 				/*
6968348SEric.Yu@Sun.COM 				 * If a protocol does not support a particular
6978348SEric.Yu@Sun.COM 				 * socket option, set can fail (not allowed)
6988348SEric.Yu@Sun.COM 				 * but get can not fail. This is the previous
6998348SEric.Yu@Sun.COM 				 * sockfs bahvior.
7008348SEric.Yu@Sun.COM 				 */
7018348SEric.Yu@Sun.COM 				switch (option_name) {
7028348SEric.Yu@Sun.COM 				case SO_LINGER:
7038348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
7048348SEric.Yu@Sun.COM 					    sizeof (struct linger)) {
7058348SEric.Yu@Sun.COM 						error = EINVAL;
7068348SEric.Yu@Sun.COM 						break;
7078348SEric.Yu@Sun.COM 					}
7088348SEric.Yu@Sun.COM 					error = 0;
7098348SEric.Yu@Sun.COM 					bzero(optval, sizeof (struct linger));
7108348SEric.Yu@Sun.COM 					*optlenp = sizeof (struct linger);
7118348SEric.Yu@Sun.COM 					break;
7128348SEric.Yu@Sun.COM 				case SO_RCVTIMEO:
7138348SEric.Yu@Sun.COM 				case SO_SNDTIMEO:
7148348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
7158348SEric.Yu@Sun.COM 					    sizeof (struct timeval)) {
7168348SEric.Yu@Sun.COM 						error = EINVAL;
7178348SEric.Yu@Sun.COM 						break;
7188348SEric.Yu@Sun.COM 					}
7198348SEric.Yu@Sun.COM 					error = 0;
7208348SEric.Yu@Sun.COM 					bzero(optval, sizeof (struct timeval));
7218348SEric.Yu@Sun.COM 					*optlenp = sizeof (struct timeval);
7228348SEric.Yu@Sun.COM 					break;
7238348SEric.Yu@Sun.COM 				case SO_SND_BUFINFO:
7248348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
7258348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo)) {
7268348SEric.Yu@Sun.COM 						error = EINVAL;
7278348SEric.Yu@Sun.COM 						break;
7288348SEric.Yu@Sun.COM 					}
7298348SEric.Yu@Sun.COM 					error = 0;
7308348SEric.Yu@Sun.COM 					bzero(optval,
7318348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo));
7328348SEric.Yu@Sun.COM 					*optlenp =
7338348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo);
7348348SEric.Yu@Sun.COM 					break;
7358348SEric.Yu@Sun.COM 				case SO_DEBUG:
7368348SEric.Yu@Sun.COM 				case SO_REUSEADDR:
7378348SEric.Yu@Sun.COM 				case SO_KEEPALIVE:
7388348SEric.Yu@Sun.COM 				case SO_DONTROUTE:
7398348SEric.Yu@Sun.COM 				case SO_BROADCAST:
7408348SEric.Yu@Sun.COM 				case SO_USELOOPBACK:
7418348SEric.Yu@Sun.COM 				case SO_OOBINLINE:
7428348SEric.Yu@Sun.COM 				case SO_DGRAM_ERRIND:
7438348SEric.Yu@Sun.COM 				case SO_SNDBUF:
7448348SEric.Yu@Sun.COM 				case SO_RCVBUF:
7458348SEric.Yu@Sun.COM 					error = 0;
7468348SEric.Yu@Sun.COM 					*((int32_t *)optval) = 0;
7478348SEric.Yu@Sun.COM 					*optlenp = sizeof (int32_t);
7488348SEric.Yu@Sun.COM 					break;
7498348SEric.Yu@Sun.COM 				default:
7508348SEric.Yu@Sun.COM 					break;
7518348SEric.Yu@Sun.COM 				}
7528348SEric.Yu@Sun.COM 			}
7538348SEric.Yu@Sun.COM 		}
7548348SEric.Yu@Sun.COM 	}
7558348SEric.Yu@Sun.COM 
7568348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
7578348SEric.Yu@Sun.COM 	return (error);
7588348SEric.Yu@Sun.COM }
7598348SEric.Yu@Sun.COM 
7608348SEric.Yu@Sun.COM int
7618348SEric.Yu@Sun.COM so_setsockopt(struct sonode *so, int level, int option_name,
7628348SEric.Yu@Sun.COM     const void *optval, socklen_t optlen, struct cred *cr)
7638348SEric.Yu@Sun.COM {
7648348SEric.Yu@Sun.COM 	int error = 0;
7658612SAnders.Persson@Sun.COM 	struct timeval tl;
7668612SAnders.Persson@Sun.COM 	const void *opt = optval;
7678348SEric.Yu@Sun.COM 
7688348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so,
7698348SEric.Yu@Sun.COM 	    SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
7708348SEric.Yu@Sun.COM 
7718348SEric.Yu@Sun.COM 	/* X/Open requires this check */
7728348SEric.Yu@Sun.COM 	if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
7738348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
7748348SEric.Yu@Sun.COM 		if (xnet_check_print)
7758348SEric.Yu@Sun.COM 			printf("sockfs: X/Open setsockopt check => EINVAL\n");
7768348SEric.Yu@Sun.COM 		return (EINVAL);
7778348SEric.Yu@Sun.COM 	}
7788348SEric.Yu@Sun.COM 
7798465SEric.Yu@Sun.COM 	if (level == SOL_SOCKET) {
7808465SEric.Yu@Sun.COM 		switch (option_name) {
7818465SEric.Yu@Sun.COM 		case SO_RCVTIMEO:
7828465SEric.Yu@Sun.COM 		case SO_SNDTIMEO: {
7838586Sshenjian 			/*
7848586Sshenjian 			 * We pass down these two options to protocol in order
7858586Sshenjian 			 * to support some third part protocols which need to
7868586Sshenjian 			 * know them. For those protocols which don't care
7878586Sshenjian 			 * these two options, simply return 0.
7888586Sshenjian 			 */
7898465SEric.Yu@Sun.COM 			clock_t t_usec;
7908348SEric.Yu@Sun.COM 
7918575Sshenjian 			if (get_udatamodel() == DATAMODEL_NONE ||
7928575Sshenjian 			    get_udatamodel() == DATAMODEL_NATIVE) {
7938489Sshenjian 				if (optlen != sizeof (struct timeval)) {
7948489Sshenjian 					error = EINVAL;
7958489Sshenjian 					goto done;
7968489Sshenjian 				}
7978489Sshenjian 				bcopy((struct timeval *)optval, &tl,
7988489Sshenjian 				    sizeof (struct timeval));
7998489Sshenjian 			} else {
8008489Sshenjian 				if (optlen != sizeof (struct timeval32)) {
8018489Sshenjian 					error = EINVAL;
8028489Sshenjian 					goto done;
8038489Sshenjian 				}
8048489Sshenjian 				TIMEVAL32_TO_TIMEVAL(&tl,
8058489Sshenjian 				    (struct timeval32 *)optval);
8068465SEric.Yu@Sun.COM 			}
8078612SAnders.Persson@Sun.COM 			opt = &tl;
8088612SAnders.Persson@Sun.COM 			optlen = sizeof (tl);
8098489Sshenjian 			t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
8108465SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
8118465SEric.Yu@Sun.COM 			if (option_name == SO_RCVTIMEO)
8128465SEric.Yu@Sun.COM 				so->so_rcvtimeo = drv_usectohz(t_usec);
8138465SEric.Yu@Sun.COM 			else
8148465SEric.Yu@Sun.COM 				so->so_sndtimeo = drv_usectohz(t_usec);
8158465SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
8168586Sshenjian 			break;
8178348SEric.Yu@Sun.COM 		}
8188465SEric.Yu@Sun.COM 		case SO_RCVBUF:
8198465SEric.Yu@Sun.COM 			/*
8208465SEric.Yu@Sun.COM 			 * XXX XPG 4.2 applications retrieve SO_RCVBUF from
8218465SEric.Yu@Sun.COM 			 * sockfs since the transport might adjust the value
8228465SEric.Yu@Sun.COM 			 * and not return exactly what was set by the
8238465SEric.Yu@Sun.COM 			 * application.
8248465SEric.Yu@Sun.COM 			 */
8258465SEric.Yu@Sun.COM 			so->so_xpg_rcvbuf = *(int32_t *)optval;
8268465SEric.Yu@Sun.COM 			break;
8278465SEric.Yu@Sun.COM 		}
8288348SEric.Yu@Sun.COM 	}
8298348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_setsockopt)
8308612SAnders.Persson@Sun.COM 	    (so->so_proto_handle, level, option_name, opt, optlen, cr);
8318489Sshenjian done:
8328348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
8338348SEric.Yu@Sun.COM 	return (error);
8348348SEric.Yu@Sun.COM }
8358348SEric.Yu@Sun.COM 
8368348SEric.Yu@Sun.COM int
8378348SEric.Yu@Sun.COM so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
8388348SEric.Yu@Sun.COM     struct cred *cr, int32_t *rvalp)
8398348SEric.Yu@Sun.COM {
8408348SEric.Yu@Sun.COM 	int error = 0;
8418348SEric.Yu@Sun.COM 
8428348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
8438348SEric.Yu@Sun.COM 
8448348SEric.Yu@Sun.COM 	/*
8458348SEric.Yu@Sun.COM 	 * If there is a pending error, return error
8468348SEric.Yu@Sun.COM 	 * This can happen if a non blocking operation caused an error.
8478348SEric.Yu@Sun.COM 	 */
8488348SEric.Yu@Sun.COM 	if (so->so_error != 0) {
8498348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
8508348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
8518348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
8528348SEric.Yu@Sun.COM 		if (error != 0)
8538348SEric.Yu@Sun.COM 			goto done;
8548348SEric.Yu@Sun.COM 	}
8558348SEric.Yu@Sun.COM 
8568348SEric.Yu@Sun.COM 	/*
8578348SEric.Yu@Sun.COM 	 * calling strioc can result in the socket falling back to TPI,
8588348SEric.Yu@Sun.COM 	 * if that is supported.
8598348SEric.Yu@Sun.COM 	 */
8608348SEric.Yu@Sun.COM 	if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
8618348SEric.Yu@Sun.COM 	    (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
8628348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
8638348SEric.Yu@Sun.COM 		    cmd, arg, mode, rvalp, cr);
8648348SEric.Yu@Sun.COM 	}
8658348SEric.Yu@Sun.COM 
8668348SEric.Yu@Sun.COM done:
8678348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
8688348SEric.Yu@Sun.COM 
8698348SEric.Yu@Sun.COM 	return (error);
8708348SEric.Yu@Sun.COM }
8718348SEric.Yu@Sun.COM 
8728348SEric.Yu@Sun.COM int
8738348SEric.Yu@Sun.COM so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
8748348SEric.Yu@Sun.COM     struct pollhead **phpp)
8758348SEric.Yu@Sun.COM {
8768348SEric.Yu@Sun.COM 	int state = so->so_state;
8778348SEric.Yu@Sun.COM 	*reventsp = 0;
8788348SEric.Yu@Sun.COM 
8798861SRao.Shoaib@Sun.COM 	/*
8808861SRao.Shoaib@Sun.COM 	 * In sockets the errors are represented as input/output events
8818861SRao.Shoaib@Sun.COM 	 */
8828348SEric.Yu@Sun.COM 	if (so->so_error != 0 &&
8838861SRao.Shoaib@Sun.COM 	    ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
8848348SEric.Yu@Sun.COM 		*reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
8858348SEric.Yu@Sun.COM 		return (0);
8868348SEric.Yu@Sun.COM 	}
8878348SEric.Yu@Sun.COM 
8888348SEric.Yu@Sun.COM 	/*
8898861SRao.Shoaib@Sun.COM 	 * If the socket is in a state where it can send data
8908861SRao.Shoaib@Sun.COM 	 * turn on POLLWRBAND and POLLOUT events.
8918348SEric.Yu@Sun.COM 	 */
8928861SRao.Shoaib@Sun.COM 	if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
8938861SRao.Shoaib@Sun.COM 		/*
8948861SRao.Shoaib@Sun.COM 		 * out of band data is allowed even if the connection
8958861SRao.Shoaib@Sun.COM 		 * is flow controlled
8968861SRao.Shoaib@Sun.COM 		 */
8978861SRao.Shoaib@Sun.COM 		*reventsp |= POLLWRBAND & events;
8988861SRao.Shoaib@Sun.COM 		if (!so->so_snd_qfull) {
8998861SRao.Shoaib@Sun.COM 			/*
9008861SRao.Shoaib@Sun.COM 			 * As long as there is buffer to send data
9018861SRao.Shoaib@Sun.COM 			 * turn on POLLOUT events
9028861SRao.Shoaib@Sun.COM 			 */
9038861SRao.Shoaib@Sun.COM 			*reventsp |= POLLOUT & events;
9048861SRao.Shoaib@Sun.COM 		}
9058348SEric.Yu@Sun.COM 	}
9068348SEric.Yu@Sun.COM 
9078348SEric.Yu@Sun.COM 	/*
9088348SEric.Yu@Sun.COM 	 * Turn on POLLIN whenever there is data on the receive queue,
9098348SEric.Yu@Sun.COM 	 * or the socket is in a state where no more data will be received.
9108348SEric.Yu@Sun.COM 	 * Also, if the socket is accepting connections, flip the bit if
9118348SEric.Yu@Sun.COM 	 * there is something on the queue.
9128427SAnders.Persson@Sun.COM 	 *
9138427SAnders.Persson@Sun.COM 	 * We do an initial check for events without holding locks. However,
9148427SAnders.Persson@Sun.COM 	 * if there are no event available, then we redo the check for POLLIN
9158427SAnders.Persson@Sun.COM 	 * events under the lock.
9168348SEric.Yu@Sun.COM 	 */
9178348SEric.Yu@Sun.COM 
9188348SEric.Yu@Sun.COM 	/* Pending connections */
9198348SEric.Yu@Sun.COM 	if (so->so_acceptq_len > 0)
9208348SEric.Yu@Sun.COM 		*reventsp |= (POLLIN|POLLRDNORM) & events;
9218348SEric.Yu@Sun.COM 
9228348SEric.Yu@Sun.COM 	/* Data */
9238348SEric.Yu@Sun.COM 	/* so_downcalls is null for sctp */
9248348SEric.Yu@Sun.COM 	if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
9258348SEric.Yu@Sun.COM 		*reventsp |= (*so->so_downcalls->sd_poll)
9268348SEric.Yu@Sun.COM 		    (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
9278348SEric.Yu@Sun.COM 		    CRED()) & events;
9288348SEric.Yu@Sun.COM 		ASSERT((*reventsp & ~events) == 0);
9298348SEric.Yu@Sun.COM 		/* do not recheck events */
9308348SEric.Yu@Sun.COM 		events &= ~SO_PROTO_POLLEV;
9318348SEric.Yu@Sun.COM 	} else {
9328348SEric.Yu@Sun.COM 		if (SO_HAVE_DATA(so))
9338348SEric.Yu@Sun.COM 			*reventsp |= (POLLIN|POLLRDNORM) & events;
9348348SEric.Yu@Sun.COM 
9358348SEric.Yu@Sun.COM 		/* Urgent data */
9368861SRao.Shoaib@Sun.COM 		if ((state & SS_OOBPEND) != 0) {
9378861SRao.Shoaib@Sun.COM 			*reventsp |= (POLLRDBAND | POLLPRI) & events;
9388861SRao.Shoaib@Sun.COM 		}
9398348SEric.Yu@Sun.COM 	}
9408348SEric.Yu@Sun.COM 
9418348SEric.Yu@Sun.COM 	if (!*reventsp && !anyyet) {
9428348SEric.Yu@Sun.COM 		/* Check for read events again, but this time under lock */
9438348SEric.Yu@Sun.COM 		if (events & (POLLIN|POLLRDNORM)) {
9448348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
9458348SEric.Yu@Sun.COM 			if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) {
9468348SEric.Yu@Sun.COM 				mutex_exit(&so->so_lock);
9478348SEric.Yu@Sun.COM 				*reventsp |= (POLLIN|POLLRDNORM) & events;
9488348SEric.Yu@Sun.COM 				return (0);
9498348SEric.Yu@Sun.COM 			} else {
9508348SEric.Yu@Sun.COM 				so->so_pollev |= SO_POLLEV_IN;
9518348SEric.Yu@Sun.COM 				mutex_exit(&so->so_lock);
9528348SEric.Yu@Sun.COM 			}
9538348SEric.Yu@Sun.COM 		}
9548348SEric.Yu@Sun.COM 		*phpp = &so->so_poll_list;
9558348SEric.Yu@Sun.COM 	}
9568348SEric.Yu@Sun.COM 	return (0);
9578348SEric.Yu@Sun.COM }
9588348SEric.Yu@Sun.COM 
9598348SEric.Yu@Sun.COM /*
9608348SEric.Yu@Sun.COM  * Generic Upcalls
9618348SEric.Yu@Sun.COM  */
9628348SEric.Yu@Sun.COM void
9638348SEric.Yu@Sun.COM so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
9648348SEric.Yu@Sun.COM     cred_t *peer_cred, pid_t peer_cpid)
9658348SEric.Yu@Sun.COM {
9668348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
9678348SEric.Yu@Sun.COM 
9688348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
9698348SEric.Yu@Sun.COM 	ASSERT(so->so_proto_handle != NULL);
9708348SEric.Yu@Sun.COM 
9718348SEric.Yu@Sun.COM 	if (peer_cred != NULL) {
9728348SEric.Yu@Sun.COM 		if (so->so_peercred != NULL)
9738348SEric.Yu@Sun.COM 			crfree(so->so_peercred);
9748348SEric.Yu@Sun.COM 		crhold(peer_cred);
9758348SEric.Yu@Sun.COM 		so->so_peercred = peer_cred;
9768348SEric.Yu@Sun.COM 		so->so_cpid = peer_cpid;
9778348SEric.Yu@Sun.COM 	}
9788348SEric.Yu@Sun.COM 
9798348SEric.Yu@Sun.COM 	so->so_proto_connid = id;
9808348SEric.Yu@Sun.COM 	soisconnected(so);
9818348SEric.Yu@Sun.COM 	/*
9828348SEric.Yu@Sun.COM 	 * Wake ones who're waiting for conn to become established.
9838348SEric.Yu@Sun.COM 	 */
9848348SEric.Yu@Sun.COM 	so_notify_connected(so);
9858348SEric.Yu@Sun.COM }
9868348SEric.Yu@Sun.COM 
9878348SEric.Yu@Sun.COM int
9888348SEric.Yu@Sun.COM so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
9898348SEric.Yu@Sun.COM {
9908348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
9918348SEric.Yu@Sun.COM 
9928348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
9938348SEric.Yu@Sun.COM 
9948348SEric.Yu@Sun.COM 	so->so_proto_connid = id;
9958348SEric.Yu@Sun.COM 	soisdisconnected(so, error);
9968348SEric.Yu@Sun.COM 	so_notify_disconnected(so, error);
9978348SEric.Yu@Sun.COM 
9988348SEric.Yu@Sun.COM 	return (0);
9998348SEric.Yu@Sun.COM }
10008348SEric.Yu@Sun.COM 
10018348SEric.Yu@Sun.COM void
10028348SEric.Yu@Sun.COM so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
10038348SEric.Yu@Sun.COM     uintptr_t arg)
10048348SEric.Yu@Sun.COM {
10058348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
10068348SEric.Yu@Sun.COM 
10078348SEric.Yu@Sun.COM 	switch (action) {
10088348SEric.Yu@Sun.COM 	case SOCK_OPCTL_SHUT_SEND:
10098348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
10108348SEric.Yu@Sun.COM 		socantsendmore(so);
10118348SEric.Yu@Sun.COM 		so_notify_disconnecting(so);
10128348SEric.Yu@Sun.COM 		break;
10138348SEric.Yu@Sun.COM 	case SOCK_OPCTL_SHUT_RECV: {
10148348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
10158348SEric.Yu@Sun.COM 		socantrcvmore(so);
10168348SEric.Yu@Sun.COM 		so_notify_eof(so);
10178348SEric.Yu@Sun.COM 		break;
10188348SEric.Yu@Sun.COM 	}
10198348SEric.Yu@Sun.COM 	case SOCK_OPCTL_ENAB_ACCEPT:
10208348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
10218348SEric.Yu@Sun.COM 		so->so_state |= SS_ACCEPTCONN;
10228348SEric.Yu@Sun.COM 		so->so_backlog = (unsigned int)arg;
10238348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
10248348SEric.Yu@Sun.COM 		break;
10258348SEric.Yu@Sun.COM 	default:
10268348SEric.Yu@Sun.COM 		ASSERT(0);
10278348SEric.Yu@Sun.COM 		break;
10288348SEric.Yu@Sun.COM 	}
10298348SEric.Yu@Sun.COM }
10308348SEric.Yu@Sun.COM 
10318348SEric.Yu@Sun.COM void
10328348SEric.Yu@Sun.COM so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
10338348SEric.Yu@Sun.COM {
10348348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
10358348SEric.Yu@Sun.COM 
10368348SEric.Yu@Sun.COM 	if (qfull) {
10378348SEric.Yu@Sun.COM 		so_snd_qfull(so);
10388348SEric.Yu@Sun.COM 	} else {
10398348SEric.Yu@Sun.COM 		so_snd_qnotfull(so);
10408348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
10418348SEric.Yu@Sun.COM 		so_notify_writable(so);
10428348SEric.Yu@Sun.COM 	}
10438348SEric.Yu@Sun.COM }
10448348SEric.Yu@Sun.COM 
10458348SEric.Yu@Sun.COM sock_upper_handle_t
10468348SEric.Yu@Sun.COM so_newconn(sock_upper_handle_t parenthandle,
10478348SEric.Yu@Sun.COM     sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
10488348SEric.Yu@Sun.COM     struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
10498348SEric.Yu@Sun.COM {
10508348SEric.Yu@Sun.COM 	struct sonode	*so = (struct sonode *)parenthandle;
10518348SEric.Yu@Sun.COM 	struct sonode	*nso;
10528348SEric.Yu@Sun.COM 	int error;
10538348SEric.Yu@Sun.COM 
10548348SEric.Yu@Sun.COM 	ASSERT(proto_handle != NULL);
10558348SEric.Yu@Sun.COM 
10568348SEric.Yu@Sun.COM 	if ((so->so_state & SS_ACCEPTCONN) == 0 ||
10578348SEric.Yu@Sun.COM 	    so->so_acceptq_len >= so->so_backlog)
10588348SEric.Yu@Sun.COM 		return (NULL);
10598348SEric.Yu@Sun.COM 
10608348SEric.Yu@Sun.COM 	nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
10618348SEric.Yu@Sun.COM 	    &error);
10628348SEric.Yu@Sun.COM 	if (nso == NULL)
10638348SEric.Yu@Sun.COM 		return (NULL);
10648348SEric.Yu@Sun.COM 
10658348SEric.Yu@Sun.COM 	if (peer_cred != NULL) {
10668348SEric.Yu@Sun.COM 		crhold(peer_cred);
10678348SEric.Yu@Sun.COM 		nso->so_peercred = peer_cred;
10688348SEric.Yu@Sun.COM 		nso->so_cpid = peer_cpid;
10698348SEric.Yu@Sun.COM 	}
10708348SEric.Yu@Sun.COM 
10718820SAnders.Persson@Sun.COM 	/*
10728820SAnders.Persson@Sun.COM 	 * The new socket (nso), proto_handle and sock_upcallsp are all
10738820SAnders.Persson@Sun.COM 	 * valid at this point. But as soon as nso is placed in the accept
10748820SAnders.Persson@Sun.COM 	 * queue that can no longer be assumed (since an accept() thread may
10758820SAnders.Persson@Sun.COM 	 * pull it off the queue and close the socket).
10768820SAnders.Persson@Sun.COM 	 */
10778820SAnders.Persson@Sun.COM 	*sock_upcallsp = &so_upcalls;
10788820SAnders.Persson@Sun.COM 
10798348SEric.Yu@Sun.COM 	(void) so_acceptq_enqueue(so, nso);
10808820SAnders.Persson@Sun.COM 
10818348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
10828348SEric.Yu@Sun.COM 	so_notify_newconn(so);
10838348SEric.Yu@Sun.COM 
10848348SEric.Yu@Sun.COM 	return ((sock_upper_handle_t)nso);
10858348SEric.Yu@Sun.COM }
10868348SEric.Yu@Sun.COM 
10878348SEric.Yu@Sun.COM void
10888348SEric.Yu@Sun.COM so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
10898348SEric.Yu@Sun.COM {
10908348SEric.Yu@Sun.COM 	struct sonode *so;
10918348SEric.Yu@Sun.COM 
10928348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
10938348SEric.Yu@Sun.COM 
10948348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
10958348SEric.Yu@Sun.COM 
10968348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXBLK)
10978348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
10988348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_WROFF)
10998348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
11008348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_TAIL)
11018348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_tail = soppp->sopp_tail;
11028348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
11038348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
11048348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
11058348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
11068348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
11078348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
11088348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MINPSZ)
11098348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
11108348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
11118348SEric.Yu@Sun.COM 		if (soppp->sopp_zcopyflag & ZCVMSAFE) {
11128348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
11138348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
11148348SEric.Yu@Sun.COM 		} else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
11158348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
11168348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
11178348SEric.Yu@Sun.COM 		}
11188348SEric.Yu@Sun.COM 
11198348SEric.Yu@Sun.COM 		if (soppp->sopp_zcopyflag & COPYCACHED) {
11208348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
11218348SEric.Yu@Sun.COM 		}
11228348SEric.Yu@Sun.COM 	}
11238348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
11248348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
11258348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
11268348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
11278348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
11288348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
11298348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
11308348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
113110103SAnders.Persson@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
113210103SAnders.Persson@Sun.COM 		so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
11338348SEric.Yu@Sun.COM 
11348348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
11358348SEric.Yu@Sun.COM 
11368348SEric.Yu@Sun.COM #ifdef DEBUG
11378348SEric.Yu@Sun.COM 	soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
11388348SEric.Yu@Sun.COM 	    SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
11398348SEric.Yu@Sun.COM 	    SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
114010103SAnders.Persson@Sun.COM 	    SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
114110103SAnders.Persson@Sun.COM 	    SOCKOPT_LOOPBACK);
11428348SEric.Yu@Sun.COM 	ASSERT(soppp->sopp_flags == 0);
11438348SEric.Yu@Sun.COM #endif
11448348SEric.Yu@Sun.COM }
11458348SEric.Yu@Sun.COM 
11468348SEric.Yu@Sun.COM /* ARGSUSED */
11478348SEric.Yu@Sun.COM ssize_t
11488348SEric.Yu@Sun.COM so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
11498348SEric.Yu@Sun.COM     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
11508348SEric.Yu@Sun.COM {
11518348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
11528348SEric.Yu@Sun.COM 	boolean_t force_push = B_TRUE;
11538348SEric.Yu@Sun.COM 	int space_left;
11548348SEric.Yu@Sun.COM 	sodirect_t *sodp = so->so_direct;
11558348SEric.Yu@Sun.COM 
11568348SEric.Yu@Sun.COM 	ASSERT(errorp != NULL);
11578348SEric.Yu@Sun.COM 	*errorp = 0;
11588348SEric.Yu@Sun.COM 	if (mp == NULL) {
11598348SEric.Yu@Sun.COM 		if (msg_size > 0) {
11608348SEric.Yu@Sun.COM 			ASSERT(so->so_downcalls->sd_recv_uio != NULL);
11618348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
11628348SEric.Yu@Sun.COM 			/* the notify functions will drop the lock */
11638348SEric.Yu@Sun.COM 			if (flags & MSG_OOB)
11648348SEric.Yu@Sun.COM 				so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
11658348SEric.Yu@Sun.COM 			else
11668348SEric.Yu@Sun.COM 				so_notify_data(so, msg_size);
11678348SEric.Yu@Sun.COM 			return (0);
11688348SEric.Yu@Sun.COM 		}
11698348SEric.Yu@Sun.COM 		/*
11708348SEric.Yu@Sun.COM 		 * recv space check
11718348SEric.Yu@Sun.COM 		 */
11728348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
11738348SEric.Yu@Sun.COM 		space_left = so->so_rcvbuf - so->so_rcv_queued;
11748348SEric.Yu@Sun.COM 		if (space_left <= 0) {
11758348SEric.Yu@Sun.COM 			so->so_flowctrld = B_TRUE;
11768348SEric.Yu@Sun.COM 			*errorp = ENOSPC;
11778348SEric.Yu@Sun.COM 			space_left = -1;
11788348SEric.Yu@Sun.COM 		}
11798348SEric.Yu@Sun.COM 		goto done_unlock;
11808348SEric.Yu@Sun.COM 	}
11818348SEric.Yu@Sun.COM 
11828348SEric.Yu@Sun.COM 	ASSERT(mp->b_next == NULL);
11838348SEric.Yu@Sun.COM 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
11848348SEric.Yu@Sun.COM 	ASSERT(msg_size == msgdsize(mp));
11858348SEric.Yu@Sun.COM 
11868348SEric.Yu@Sun.COM 	if (flags & MSG_OOB) {
11878348SEric.Yu@Sun.COM 		so_queue_oob(sock_handle, mp, msg_size);
11888348SEric.Yu@Sun.COM 		return (0);
11898348SEric.Yu@Sun.COM 	}
11908348SEric.Yu@Sun.COM 
11918348SEric.Yu@Sun.COM 	if (force_pushp != NULL)
11928348SEric.Yu@Sun.COM 		force_push = *force_pushp;
11938348SEric.Yu@Sun.COM 
11948348SEric.Yu@Sun.COM 	if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
11958348SEric.Yu@Sun.COM 		/* The read pointer is not aligned correctly for TPI */
11968348SEric.Yu@Sun.COM 		zcmn_err(getzoneid(), CE_WARN,
11978348SEric.Yu@Sun.COM 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
11988348SEric.Yu@Sun.COM 		    (void *)mp->b_rptr);
11998348SEric.Yu@Sun.COM 		freemsg(mp);
12009491SAnders.Persson@Sun.COM 		mutex_enter(&so->so_lock);
12019491SAnders.Persson@Sun.COM 		if (sodp != NULL)
12029491SAnders.Persson@Sun.COM 			SOD_UIOAFINI(sodp);
12039491SAnders.Persson@Sun.COM 		mutex_exit(&so->so_lock);
12048348SEric.Yu@Sun.COM 
12058348SEric.Yu@Sun.COM 		return (so->so_rcvbuf - so->so_rcv_queued);
12068348SEric.Yu@Sun.COM 	}
12078348SEric.Yu@Sun.COM 
12088348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
12098963SAnders.Persson@Sun.COM 	if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
12109491SAnders.Persson@Sun.COM 		if (sodp != NULL)
12119491SAnders.Persson@Sun.COM 			SOD_DISABLE(sodp);
12128348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
12138348SEric.Yu@Sun.COM 		*errorp = EOPNOTSUPP;
12148348SEric.Yu@Sun.COM 		return (-1);
12158348SEric.Yu@Sun.COM 	}
12168348SEric.Yu@Sun.COM 	if (so->so_state & SS_CANTRCVMORE) {
12178348SEric.Yu@Sun.COM 		freemsg(mp);
12189491SAnders.Persson@Sun.COM 		if (sodp != NULL)
12199491SAnders.Persson@Sun.COM 			SOD_DISABLE(sodp);
12208348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
12218348SEric.Yu@Sun.COM 		return (0);
12228348SEric.Yu@Sun.COM 	}
12238348SEric.Yu@Sun.COM 
12248348SEric.Yu@Sun.COM 	/* process the mblk via I/OAT if capable */
12259491SAnders.Persson@Sun.COM 	if (sodp != NULL && sodp->sod_enabled) {
12268348SEric.Yu@Sun.COM 		if (DB_TYPE(mp) == M_DATA) {
12279491SAnders.Persson@Sun.COM 			sod_uioa_mblk_init(sodp, mp, msg_size);
12288348SEric.Yu@Sun.COM 		} else {
12298348SEric.Yu@Sun.COM 			SOD_UIOAFINI(sodp);
12308348SEric.Yu@Sun.COM 		}
12318348SEric.Yu@Sun.COM 	}
12328348SEric.Yu@Sun.COM 
12338348SEric.Yu@Sun.COM 	if (mp->b_next == NULL) {
12348348SEric.Yu@Sun.COM 		so_enqueue_msg(so, mp, msg_size);
12358348SEric.Yu@Sun.COM 	} else {
12368348SEric.Yu@Sun.COM 		do {
12378348SEric.Yu@Sun.COM 			mblk_t *nmp;
12388348SEric.Yu@Sun.COM 
12398348SEric.Yu@Sun.COM 			if ((nmp = mp->b_next) != NULL) {
12408348SEric.Yu@Sun.COM 				mp->b_next = NULL;
12418348SEric.Yu@Sun.COM 			}
12428348SEric.Yu@Sun.COM 			so_enqueue_msg(so, mp, msgdsize(mp));
12438348SEric.Yu@Sun.COM 			mp = nmp;
12448348SEric.Yu@Sun.COM 		} while (mp != NULL);
12458348SEric.Yu@Sun.COM 	}
12468348SEric.Yu@Sun.COM 
12478348SEric.Yu@Sun.COM 	space_left = so->so_rcvbuf - so->so_rcv_queued;
12488348SEric.Yu@Sun.COM 	if (space_left <= 0) {
12498348SEric.Yu@Sun.COM 		so->so_flowctrld = B_TRUE;
12508348SEric.Yu@Sun.COM 		*errorp = ENOSPC;
12518348SEric.Yu@Sun.COM 		space_left = -1;
12528348SEric.Yu@Sun.COM 	}
12538348SEric.Yu@Sun.COM 
12548348SEric.Yu@Sun.COM 	if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
12559491SAnders.Persson@Sun.COM 	    so->so_rcv_queued >= so->so_rcv_wanted) {
12568348SEric.Yu@Sun.COM 		SOCKET_TIMER_CANCEL(so);
12578348SEric.Yu@Sun.COM 		/*
12588348SEric.Yu@Sun.COM 		 * so_notify_data will release the lock
12598348SEric.Yu@Sun.COM 		 */
12608348SEric.Yu@Sun.COM 		so_notify_data(so, so->so_rcv_queued);
12618348SEric.Yu@Sun.COM 
12628348SEric.Yu@Sun.COM 		if (force_pushp != NULL)
12638348SEric.Yu@Sun.COM 			*force_pushp = B_TRUE;
12648348SEric.Yu@Sun.COM 		goto done;
12658348SEric.Yu@Sun.COM 	} else if (so->so_rcv_timer_tid == 0) {
12668348SEric.Yu@Sun.COM 		/* Make sure the recv push timer is running */
12678348SEric.Yu@Sun.COM 		SOCKET_TIMER_START(so);
12688348SEric.Yu@Sun.COM 	}
12698348SEric.Yu@Sun.COM 
12708348SEric.Yu@Sun.COM done_unlock:
12718348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
12728348SEric.Yu@Sun.COM done:
12738348SEric.Yu@Sun.COM 	return (space_left);
12748348SEric.Yu@Sun.COM }
12758348SEric.Yu@Sun.COM 
12768348SEric.Yu@Sun.COM /*
12778348SEric.Yu@Sun.COM  * Set the offset of where the oob data is relative to the bytes in
12788348SEric.Yu@Sun.COM  * queued. Also generate SIGURG
12798348SEric.Yu@Sun.COM  */
12808348SEric.Yu@Sun.COM void
12818348SEric.Yu@Sun.COM so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
12828348SEric.Yu@Sun.COM {
12838348SEric.Yu@Sun.COM 	struct sonode *so;
12848348SEric.Yu@Sun.COM 
12858348SEric.Yu@Sun.COM 	ASSERT(offset >= 0);
12868348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
12878348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
12889491SAnders.Persson@Sun.COM 	if (so->so_direct != NULL)
12899491SAnders.Persson@Sun.COM 		SOD_UIOAFINI(so->so_direct);
12908348SEric.Yu@Sun.COM 
12918348SEric.Yu@Sun.COM 	/*
12928348SEric.Yu@Sun.COM 	 * New urgent data on the way so forget about any old
12938348SEric.Yu@Sun.COM 	 * urgent data.
12948348SEric.Yu@Sun.COM 	 */
12958348SEric.Yu@Sun.COM 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
12968348SEric.Yu@Sun.COM 
12978348SEric.Yu@Sun.COM 	/*
12988348SEric.Yu@Sun.COM 	 * Record that urgent data is pending.
12998348SEric.Yu@Sun.COM 	 */
13008348SEric.Yu@Sun.COM 	so->so_state |= SS_OOBPEND;
13018348SEric.Yu@Sun.COM 
13028348SEric.Yu@Sun.COM 	if (so->so_oobmsg != NULL) {
13038348SEric.Yu@Sun.COM 		dprintso(so, 1, ("sock: discarding old oob\n"));
13048348SEric.Yu@Sun.COM 		freemsg(so->so_oobmsg);
13058348SEric.Yu@Sun.COM 		so->so_oobmsg = NULL;
13068348SEric.Yu@Sun.COM 	}
13078348SEric.Yu@Sun.COM 
13088348SEric.Yu@Sun.COM 	/*
13098348SEric.Yu@Sun.COM 	 * set the offset where the urgent byte is
13108348SEric.Yu@Sun.COM 	 */
13118348SEric.Yu@Sun.COM 	so->so_oobmark = so->so_rcv_queued + offset;
13128348SEric.Yu@Sun.COM 	if (so->so_oobmark == 0)
13138348SEric.Yu@Sun.COM 		so->so_state |= SS_RCVATMARK;
13148348SEric.Yu@Sun.COM 	else
13158348SEric.Yu@Sun.COM 		so->so_state &= ~SS_RCVATMARK;
13168348SEric.Yu@Sun.COM 
13178348SEric.Yu@Sun.COM 	so_notify_oobsig(so);
13188348SEric.Yu@Sun.COM }
13198348SEric.Yu@Sun.COM 
13208348SEric.Yu@Sun.COM /*
13218348SEric.Yu@Sun.COM  * Queue the OOB byte
13228348SEric.Yu@Sun.COM  */
13238348SEric.Yu@Sun.COM static void
13248348SEric.Yu@Sun.COM so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len)
13258348SEric.Yu@Sun.COM {
13268348SEric.Yu@Sun.COM 	struct sonode *so;
13278348SEric.Yu@Sun.COM 
13288348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
13298348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
13309491SAnders.Persson@Sun.COM 	if (so->so_direct != NULL)
13319491SAnders.Persson@Sun.COM 		SOD_UIOAFINI(so->so_direct);
13328348SEric.Yu@Sun.COM 
13338348SEric.Yu@Sun.COM 	ASSERT(mp != NULL);
13348348SEric.Yu@Sun.COM 	if (!IS_SO_OOB_INLINE(so)) {
13358348SEric.Yu@Sun.COM 		so->so_oobmsg = mp;
13368348SEric.Yu@Sun.COM 		so->so_state |= SS_HAVEOOBDATA;
13378348SEric.Yu@Sun.COM 	} else {
13388348SEric.Yu@Sun.COM 		so_enqueue_msg(so, mp, len);
13398348SEric.Yu@Sun.COM 	}
13408348SEric.Yu@Sun.COM 
13418348SEric.Yu@Sun.COM 	so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
13428348SEric.Yu@Sun.COM }
13438348SEric.Yu@Sun.COM 
13448348SEric.Yu@Sun.COM int
13458348SEric.Yu@Sun.COM so_close(struct sonode *so, int flag, struct cred *cr)
13468348SEric.Yu@Sun.COM {
13478348SEric.Yu@Sun.COM 	int error;
13488348SEric.Yu@Sun.COM 
13498348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
13508348SEric.Yu@Sun.COM 
13518348SEric.Yu@Sun.COM 	/*
13528348SEric.Yu@Sun.COM 	 * At this point there will be no more upcalls from the protocol
13538348SEric.Yu@Sun.COM 	 */
13548348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
13558399SRao.Shoaib@Sun.COM 
13568399SRao.Shoaib@Sun.COM 	ASSERT(so_verify_oobstate(so));
13578399SRao.Shoaib@Sun.COM 
13588348SEric.Yu@Sun.COM 	so_rcv_flush(so);
13598348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
13608348SEric.Yu@Sun.COM 
13618348SEric.Yu@Sun.COM 	return (error);
13628348SEric.Yu@Sun.COM }
13638348SEric.Yu@Sun.COM 
13648348SEric.Yu@Sun.COM void
13658348SEric.Yu@Sun.COM so_zcopy_notify(sock_upper_handle_t sock_handle)
13668348SEric.Yu@Sun.COM {
13678348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
13688348SEric.Yu@Sun.COM 
13698348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
13708348SEric.Yu@Sun.COM 	so->so_copyflag |= STZCNOTIFY;
13718348SEric.Yu@Sun.COM 	cv_broadcast(&so->so_copy_cv);
13728348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
13738348SEric.Yu@Sun.COM }
13748348SEric.Yu@Sun.COM 
13758348SEric.Yu@Sun.COM void
13768348SEric.Yu@Sun.COM so_set_error(sock_upper_handle_t sock_handle, int error)
13778348SEric.Yu@Sun.COM {
13788348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
13798348SEric.Yu@Sun.COM 
13808348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
13818348SEric.Yu@Sun.COM 
13828348SEric.Yu@Sun.COM 	soseterror(so, error);
13838348SEric.Yu@Sun.COM 
13848348SEric.Yu@Sun.COM 	so_notify_error(so);
13858348SEric.Yu@Sun.COM }
13868348SEric.Yu@Sun.COM 
13878348SEric.Yu@Sun.COM /*
13888348SEric.Yu@Sun.COM  * so_recvmsg - read data from the socket
13898348SEric.Yu@Sun.COM  *
13908348SEric.Yu@Sun.COM  * There are two ways of obtaining data; either we ask the protocol to
13918348SEric.Yu@Sun.COM  * copy directly into the supplied buffer, or we copy data from the
13928348SEric.Yu@Sun.COM  * sonode's receive queue. The decision which one to use depends on
13938348SEric.Yu@Sun.COM  * whether the protocol has a sd_recv_uio down call.
13948348SEric.Yu@Sun.COM  */
13958348SEric.Yu@Sun.COM int
13968348SEric.Yu@Sun.COM so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
13978348SEric.Yu@Sun.COM     struct cred *cr)
13988348SEric.Yu@Sun.COM {
13998348SEric.Yu@Sun.COM 	rval_t 		rval;
14008348SEric.Yu@Sun.COM 	int 		flags = 0;
14018348SEric.Yu@Sun.COM 	t_uscalar_t	controllen, namelen;
14028348SEric.Yu@Sun.COM 	int 		error = 0;
14038348SEric.Yu@Sun.COM 	int ret;
14048348SEric.Yu@Sun.COM 	mblk_t		*mctlp = NULL;
14058348SEric.Yu@Sun.COM 	union T_primitives *tpr;
14068348SEric.Yu@Sun.COM 	void		*control;
14078348SEric.Yu@Sun.COM 	ssize_t		saved_resid;
14088348SEric.Yu@Sun.COM 	struct uio	*suiop;
14098348SEric.Yu@Sun.COM 
14108348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
14118348SEric.Yu@Sun.COM 
14128348SEric.Yu@Sun.COM 	if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
14138348SEric.Yu@Sun.COM 	    (so->so_mode & SM_CONNREQUIRED)) {
14148348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
14158348SEric.Yu@Sun.COM 		return (ENOTCONN);
14168348SEric.Yu@Sun.COM 	}
14178348SEric.Yu@Sun.COM 
14188348SEric.Yu@Sun.COM 	if (msg->msg_flags & MSG_PEEK)
14198348SEric.Yu@Sun.COM 		msg->msg_flags &= ~MSG_WAITALL;
14208348SEric.Yu@Sun.COM 
14218348SEric.Yu@Sun.COM 	if (so->so_mode & SM_ATOMIC)
14228348SEric.Yu@Sun.COM 		msg->msg_flags |= MSG_TRUNC;
14238348SEric.Yu@Sun.COM 
14248348SEric.Yu@Sun.COM 	if (msg->msg_flags & MSG_OOB) {
14258348SEric.Yu@Sun.COM 		if ((so->so_mode & SM_EXDATA) == 0) {
14268348SEric.Yu@Sun.COM 			error = EOPNOTSUPP;
14278348SEric.Yu@Sun.COM 		} else if (so->so_downcalls->sd_recv_uio != NULL) {
14288348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_recv_uio)
14298348SEric.Yu@Sun.COM 			    (so->so_proto_handle, uiop, msg, cr);
14308348SEric.Yu@Sun.COM 		} else {
14318348SEric.Yu@Sun.COM 			error = sorecvoob(so, msg, uiop, msg->msg_flags,
14328348SEric.Yu@Sun.COM 			    IS_SO_OOB_INLINE(so));
14338348SEric.Yu@Sun.COM 		}
14348348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
14358348SEric.Yu@Sun.COM 		return (error);
14368348SEric.Yu@Sun.COM 	}
14378348SEric.Yu@Sun.COM 
14388348SEric.Yu@Sun.COM 	/*
14398348SEric.Yu@Sun.COM 	 * If the protocol has the recv down call, then pass the request
14408348SEric.Yu@Sun.COM 	 * down.
14418348SEric.Yu@Sun.COM 	 */
14428348SEric.Yu@Sun.COM 	if (so->so_downcalls->sd_recv_uio != NULL) {
14438348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_recv_uio)
14448348SEric.Yu@Sun.COM 		    (so->so_proto_handle, uiop, msg, cr);
14458348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
14468348SEric.Yu@Sun.COM 		return (error);
14478348SEric.Yu@Sun.COM 	}
14488348SEric.Yu@Sun.COM 
14498348SEric.Yu@Sun.COM 	/*
14508348SEric.Yu@Sun.COM 	 * Reading data from the socket buffer
14518348SEric.Yu@Sun.COM 	 */
14528348SEric.Yu@Sun.COM 	flags = msg->msg_flags;
14538348SEric.Yu@Sun.COM 	msg->msg_flags = 0;
14548348SEric.Yu@Sun.COM 
14558348SEric.Yu@Sun.COM 	/*
14568348SEric.Yu@Sun.COM 	 * Set msg_controllen and msg_namelen to zero here to make it
14578348SEric.Yu@Sun.COM 	 * simpler in the cases that no control or name is returned.
14588348SEric.Yu@Sun.COM 	 */
14598348SEric.Yu@Sun.COM 	controllen = msg->msg_controllen;
14608348SEric.Yu@Sun.COM 	namelen = msg->msg_namelen;
14618348SEric.Yu@Sun.COM 	msg->msg_controllen = 0;
14628348SEric.Yu@Sun.COM 	msg->msg_namelen = 0;
14638348SEric.Yu@Sun.COM 
14648348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
14658348SEric.Yu@Sun.COM 	/* Set SOREADLOCKED */
14668348SEric.Yu@Sun.COM 	error = so_lock_read_intr(so,
14678348SEric.Yu@Sun.COM 	    uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
14688348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
14698348SEric.Yu@Sun.COM 	if (error) {
14708348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
14718348SEric.Yu@Sun.COM 		return (error);
14728348SEric.Yu@Sun.COM 	}
14738348SEric.Yu@Sun.COM 
14748348SEric.Yu@Sun.COM 	suiop = sod_rcv_init(so, flags, &uiop);
14758348SEric.Yu@Sun.COM retry:
14768348SEric.Yu@Sun.COM 	saved_resid = uiop->uio_resid;
14778348SEric.Yu@Sun.COM 	error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
14788348SEric.Yu@Sun.COM 	if (error != 0) {
14798348SEric.Yu@Sun.COM 		goto out;
14808348SEric.Yu@Sun.COM 	}
14818348SEric.Yu@Sun.COM 	/*
14828348SEric.Yu@Sun.COM 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
14838348SEric.Yu@Sun.COM 	 * For non-datagrams MOREDATA is used to set MSG_EOR.
14848348SEric.Yu@Sun.COM 	 */
14858348SEric.Yu@Sun.COM 	ASSERT(!(rval.r_val1 & MORECTL));
14868348SEric.Yu@Sun.COM 	if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
14878348SEric.Yu@Sun.COM 		msg->msg_flags |= MSG_TRUNC;
14888348SEric.Yu@Sun.COM 	if (mctlp == NULL) {
14898348SEric.Yu@Sun.COM 		dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
14908348SEric.Yu@Sun.COM 
14918348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
14928348SEric.Yu@Sun.COM 		/* Set MSG_EOR based on MOREDATA */
14938348SEric.Yu@Sun.COM 		if (!(rval.r_val1 & MOREDATA)) {
14948348SEric.Yu@Sun.COM 			if (so->so_state & SS_SAVEDEOR) {
14958348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
14968348SEric.Yu@Sun.COM 				so->so_state &= ~SS_SAVEDEOR;
14978348SEric.Yu@Sun.COM 			}
14988348SEric.Yu@Sun.COM 		}
14998348SEric.Yu@Sun.COM 		/*
15008348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
15018348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
15028348SEric.Yu@Sun.COM 		 */
15038348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
15048348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
15058348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
15069752SAnders.Persson@Sun.COM 			flags |= MSG_NOMARK;
15078348SEric.Yu@Sun.COM 			goto retry;
15088348SEric.Yu@Sun.COM 		}
15098348SEric.Yu@Sun.COM 
15108348SEric.Yu@Sun.COM 		goto out_locked;
15118348SEric.Yu@Sun.COM 	}
15129752SAnders.Persson@Sun.COM 	/* so_queue_msg has already verified length and alignment */
15138348SEric.Yu@Sun.COM 	tpr = (union T_primitives *)mctlp->b_rptr;
15148348SEric.Yu@Sun.COM 	dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
15158348SEric.Yu@Sun.COM 	switch (tpr->type) {
15168348SEric.Yu@Sun.COM 	case T_DATA_IND: {
15178348SEric.Yu@Sun.COM 		/*
15188348SEric.Yu@Sun.COM 		 * Set msg_flags to MSG_EOR based on
15198348SEric.Yu@Sun.COM 		 * MORE_flag and MOREDATA.
15208348SEric.Yu@Sun.COM 		 */
15218348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
15228348SEric.Yu@Sun.COM 		so->so_state &= ~SS_SAVEDEOR;
15238348SEric.Yu@Sun.COM 		if (!(tpr->data_ind.MORE_flag & 1)) {
15248348SEric.Yu@Sun.COM 			if (!(rval.r_val1 & MOREDATA))
15258348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
15268348SEric.Yu@Sun.COM 			else
15278348SEric.Yu@Sun.COM 				so->so_state |= SS_SAVEDEOR;
15288348SEric.Yu@Sun.COM 		}
15298348SEric.Yu@Sun.COM 		freemsg(mctlp);
15308348SEric.Yu@Sun.COM 		/*
15318348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
15328348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
15338348SEric.Yu@Sun.COM 		 */
15348348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
15358348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
15368348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
15379752SAnders.Persson@Sun.COM 			flags |= MSG_NOMARK;
15388348SEric.Yu@Sun.COM 			goto retry;
15398348SEric.Yu@Sun.COM 		}
15408348SEric.Yu@Sun.COM 		goto out_locked;
15418348SEric.Yu@Sun.COM 	}
15428348SEric.Yu@Sun.COM 	case T_UNITDATA_IND: {
15438348SEric.Yu@Sun.COM 		void *addr;
15448348SEric.Yu@Sun.COM 		t_uscalar_t addrlen;
15458348SEric.Yu@Sun.COM 		void *abuf;
15468348SEric.Yu@Sun.COM 		t_uscalar_t optlen;
15478348SEric.Yu@Sun.COM 		void *opt;
15488348SEric.Yu@Sun.COM 
15498348SEric.Yu@Sun.COM 		if (namelen != 0) {
15508348SEric.Yu@Sun.COM 			/* Caller wants source address */
15518348SEric.Yu@Sun.COM 			addrlen = tpr->unitdata_ind.SRC_length;
15528348SEric.Yu@Sun.COM 			addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
15538348SEric.Yu@Sun.COM 			    addrlen, 1);
15548348SEric.Yu@Sun.COM 			if (addr == NULL) {
15558348SEric.Yu@Sun.COM 				freemsg(mctlp);
15568348SEric.Yu@Sun.COM 				error = EPROTO;
15578348SEric.Yu@Sun.COM 				eprintsoline(so, error);
15588348SEric.Yu@Sun.COM 				goto out;
15598348SEric.Yu@Sun.COM 			}
15608348SEric.Yu@Sun.COM 			ASSERT(so->so_family != AF_UNIX);
15618348SEric.Yu@Sun.COM 		}
15628348SEric.Yu@Sun.COM 		optlen = tpr->unitdata_ind.OPT_length;
15638348SEric.Yu@Sun.COM 		if (optlen != 0) {
15648348SEric.Yu@Sun.COM 			t_uscalar_t ncontrollen;
15658348SEric.Yu@Sun.COM 
15668348SEric.Yu@Sun.COM 			/*
15678348SEric.Yu@Sun.COM 			 * Extract any source address option.
15688348SEric.Yu@Sun.COM 			 * Determine how large cmsg buffer is needed.
15698348SEric.Yu@Sun.COM 			 */
15708348SEric.Yu@Sun.COM 			opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
15718348SEric.Yu@Sun.COM 			    optlen, __TPI_ALIGN_SIZE);
15728348SEric.Yu@Sun.COM 
15738348SEric.Yu@Sun.COM 			if (opt == NULL) {
15748348SEric.Yu@Sun.COM 				freemsg(mctlp);
15758348SEric.Yu@Sun.COM 				error = EPROTO;
15768348SEric.Yu@Sun.COM 				eprintsoline(so, error);
15778348SEric.Yu@Sun.COM 				goto out;
15788348SEric.Yu@Sun.COM 			}
15798348SEric.Yu@Sun.COM 			if (so->so_family == AF_UNIX)
15808348SEric.Yu@Sun.COM 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
15818348SEric.Yu@Sun.COM 			ncontrollen = so_cmsglen(mctlp, opt, optlen,
15828348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2));
15838348SEric.Yu@Sun.COM 			if (controllen != 0)
15848348SEric.Yu@Sun.COM 				controllen = ncontrollen;
15858348SEric.Yu@Sun.COM 			else if (ncontrollen != 0)
15868348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_CTRUNC;
15878348SEric.Yu@Sun.COM 		} else {
15888348SEric.Yu@Sun.COM 			controllen = 0;
15898348SEric.Yu@Sun.COM 		}
15908348SEric.Yu@Sun.COM 
15918348SEric.Yu@Sun.COM 		if (namelen != 0) {
15928348SEric.Yu@Sun.COM 			/*
15938348SEric.Yu@Sun.COM 			 * Return address to caller.
15948348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
15958348SEric.Yu@Sun.COM 			 * exceeds msg_namelen.
15968348SEric.Yu@Sun.COM 			 * NOTE: AF_UNIX NUL termination is ensured by
15978348SEric.Yu@Sun.COM 			 * the sender's copyin_name().
15988348SEric.Yu@Sun.COM 			 */
15998348SEric.Yu@Sun.COM 			abuf = kmem_alloc(addrlen, KM_SLEEP);
16008348SEric.Yu@Sun.COM 
16018348SEric.Yu@Sun.COM 			bcopy(addr, abuf, addrlen);
16028348SEric.Yu@Sun.COM 			msg->msg_name = abuf;
16038348SEric.Yu@Sun.COM 			msg->msg_namelen = addrlen;
16048348SEric.Yu@Sun.COM 		}
16058348SEric.Yu@Sun.COM 
16068348SEric.Yu@Sun.COM 		if (controllen != 0) {
16078348SEric.Yu@Sun.COM 			/*
16088348SEric.Yu@Sun.COM 			 * Return control msg to caller.
16098348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
16108348SEric.Yu@Sun.COM 			 * exceeds msg_controllen.
16118348SEric.Yu@Sun.COM 			 */
16128348SEric.Yu@Sun.COM 			control = kmem_zalloc(controllen, KM_SLEEP);
16138348SEric.Yu@Sun.COM 
16148348SEric.Yu@Sun.COM 			error = so_opt2cmsg(mctlp, opt, optlen,
16158348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2), control, controllen);
16168348SEric.Yu@Sun.COM 			if (error) {
16178348SEric.Yu@Sun.COM 				freemsg(mctlp);
16188348SEric.Yu@Sun.COM 				if (msg->msg_namelen != 0)
16198348SEric.Yu@Sun.COM 					kmem_free(msg->msg_name,
16208348SEric.Yu@Sun.COM 					    msg->msg_namelen);
16218348SEric.Yu@Sun.COM 				kmem_free(control, controllen);
16228348SEric.Yu@Sun.COM 				eprintsoline(so, error);
16238348SEric.Yu@Sun.COM 				goto out;
16248348SEric.Yu@Sun.COM 			}
16258348SEric.Yu@Sun.COM 			msg->msg_control = control;
16268348SEric.Yu@Sun.COM 			msg->msg_controllen = controllen;
16278348SEric.Yu@Sun.COM 		}
16288348SEric.Yu@Sun.COM 
16298348SEric.Yu@Sun.COM 		freemsg(mctlp);
16308348SEric.Yu@Sun.COM 		goto out;
16318348SEric.Yu@Sun.COM 	}
16328348SEric.Yu@Sun.COM 	case T_OPTDATA_IND: {
16338348SEric.Yu@Sun.COM 		struct T_optdata_req *tdr;
16348348SEric.Yu@Sun.COM 		void *opt;
16358348SEric.Yu@Sun.COM 		t_uscalar_t optlen;
16368348SEric.Yu@Sun.COM 
16378348SEric.Yu@Sun.COM 		tdr = (struct T_optdata_req *)mctlp->b_rptr;
16388348SEric.Yu@Sun.COM 		optlen = tdr->OPT_length;
16398348SEric.Yu@Sun.COM 		if (optlen != 0) {
16408348SEric.Yu@Sun.COM 			t_uscalar_t ncontrollen;
16418348SEric.Yu@Sun.COM 			/*
16428348SEric.Yu@Sun.COM 			 * Determine how large cmsg buffer is needed.
16438348SEric.Yu@Sun.COM 			 */
16448348SEric.Yu@Sun.COM 			opt = sogetoff(mctlp,
16458348SEric.Yu@Sun.COM 			    tpr->optdata_ind.OPT_offset, optlen,
16468348SEric.Yu@Sun.COM 			    __TPI_ALIGN_SIZE);
16478348SEric.Yu@Sun.COM 
16488348SEric.Yu@Sun.COM 			if (opt == NULL) {
16498348SEric.Yu@Sun.COM 				freemsg(mctlp);
16508348SEric.Yu@Sun.COM 				error = EPROTO;
16518348SEric.Yu@Sun.COM 				eprintsoline(so, error);
16528348SEric.Yu@Sun.COM 				goto out;
16538348SEric.Yu@Sun.COM 			}
16548348SEric.Yu@Sun.COM 
16558348SEric.Yu@Sun.COM 			ncontrollen = so_cmsglen(mctlp, opt, optlen,
16568348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2));
16578348SEric.Yu@Sun.COM 			if (controllen != 0)
16588348SEric.Yu@Sun.COM 				controllen = ncontrollen;
16598348SEric.Yu@Sun.COM 			else if (ncontrollen != 0)
16608348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_CTRUNC;
16618348SEric.Yu@Sun.COM 		} else {
16628348SEric.Yu@Sun.COM 			controllen = 0;
16638348SEric.Yu@Sun.COM 		}
16648348SEric.Yu@Sun.COM 
16658348SEric.Yu@Sun.COM 		if (controllen != 0) {
16668348SEric.Yu@Sun.COM 			/*
16678348SEric.Yu@Sun.COM 			 * Return control msg to caller.
16688348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
16698348SEric.Yu@Sun.COM 			 * exceeds msg_controllen.
16708348SEric.Yu@Sun.COM 			 */
16718348SEric.Yu@Sun.COM 			control = kmem_zalloc(controllen, KM_SLEEP);
16728348SEric.Yu@Sun.COM 
16738348SEric.Yu@Sun.COM 			error = so_opt2cmsg(mctlp, opt, optlen,
16748348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2), control, controllen);
16758348SEric.Yu@Sun.COM 			if (error) {
16768348SEric.Yu@Sun.COM 				freemsg(mctlp);
16778348SEric.Yu@Sun.COM 				kmem_free(control, controllen);
16788348SEric.Yu@Sun.COM 				eprintsoline(so, error);
16798348SEric.Yu@Sun.COM 				goto out;
16808348SEric.Yu@Sun.COM 			}
16818348SEric.Yu@Sun.COM 			msg->msg_control = control;
16828348SEric.Yu@Sun.COM 			msg->msg_controllen = controllen;
16838348SEric.Yu@Sun.COM 		}
16848348SEric.Yu@Sun.COM 
16858348SEric.Yu@Sun.COM 		/*
16868348SEric.Yu@Sun.COM 		 * Set msg_flags to MSG_EOR based on
16878348SEric.Yu@Sun.COM 		 * DATA_flag and MOREDATA.
16888348SEric.Yu@Sun.COM 		 */
16898348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
16908348SEric.Yu@Sun.COM 		so->so_state &= ~SS_SAVEDEOR;
16918348SEric.Yu@Sun.COM 		if (!(tpr->data_ind.MORE_flag & 1)) {
16928348SEric.Yu@Sun.COM 			if (!(rval.r_val1 & MOREDATA))
16938348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
16948348SEric.Yu@Sun.COM 			else
16958348SEric.Yu@Sun.COM 				so->so_state |= SS_SAVEDEOR;
16968348SEric.Yu@Sun.COM 		}
16978348SEric.Yu@Sun.COM 		freemsg(mctlp);
16988348SEric.Yu@Sun.COM 		/*
16998348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
17008348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
17018348SEric.Yu@Sun.COM 		 * Not possible to wait if control info was received.
17028348SEric.Yu@Sun.COM 		 */
17038348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
17048348SEric.Yu@Sun.COM 		    controllen == 0 &&
17058348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
17068348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
17079752SAnders.Persson@Sun.COM 			flags |= MSG_NOMARK;
17088348SEric.Yu@Sun.COM 			goto retry;
17098348SEric.Yu@Sun.COM 		}
17108348SEric.Yu@Sun.COM 		goto out_locked;
17118348SEric.Yu@Sun.COM 	}
17128348SEric.Yu@Sun.COM 	default:
17138348SEric.Yu@Sun.COM 		cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
17148348SEric.Yu@Sun.COM 		    tpr->type);
17158348SEric.Yu@Sun.COM 		freemsg(mctlp);
17168348SEric.Yu@Sun.COM 		error = EPROTO;
17178348SEric.Yu@Sun.COM 		ASSERT(0);
17188348SEric.Yu@Sun.COM 	}
17198348SEric.Yu@Sun.COM out:
17208348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
17218348SEric.Yu@Sun.COM out_locked:
17228348SEric.Yu@Sun.COM 	ret = sod_rcv_done(so, suiop, uiop);
17238348SEric.Yu@Sun.COM 	if (ret != 0 && error == 0)
17248348SEric.Yu@Sun.COM 		error = ret;
17258348SEric.Yu@Sun.COM 
17268348SEric.Yu@Sun.COM 	so_unlock_read(so);	/* Clear SOREADLOCKED */
17278348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
17288348SEric.Yu@Sun.COM 
17298348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
17308348SEric.Yu@Sun.COM 
17318348SEric.Yu@Sun.COM 	return (error);
17328348SEric.Yu@Sun.COM }
17338348SEric.Yu@Sun.COM 
17348348SEric.Yu@Sun.COM sonodeops_t so_sonodeops = {
17358348SEric.Yu@Sun.COM 	so_init,		/* sop_init	*/
17368348SEric.Yu@Sun.COM 	so_accept,		/* sop_accept   */
17378348SEric.Yu@Sun.COM 	so_bind,		/* sop_bind	*/
17388348SEric.Yu@Sun.COM 	so_listen,		/* sop_listen   */
17398348SEric.Yu@Sun.COM 	so_connect,		/* sop_connect  */
17408348SEric.Yu@Sun.COM 	so_recvmsg,		/* sop_recvmsg  */
17418348SEric.Yu@Sun.COM 	so_sendmsg,		/* sop_sendmsg  */
17428348SEric.Yu@Sun.COM 	so_sendmblk,		/* sop_sendmblk */
17438348SEric.Yu@Sun.COM 	so_getpeername,		/* sop_getpeername */
17448348SEric.Yu@Sun.COM 	so_getsockname,		/* sop_getsockname */
17458348SEric.Yu@Sun.COM 	so_shutdown,		/* sop_shutdown */
17468348SEric.Yu@Sun.COM 	so_getsockopt,		/* sop_getsockopt */
17478348SEric.Yu@Sun.COM 	so_setsockopt,		/* sop_setsockopt */
17488348SEric.Yu@Sun.COM 	so_ioctl,		/* sop_ioctl    */
17498348SEric.Yu@Sun.COM 	so_poll,		/* sop_poll	*/
17508348SEric.Yu@Sun.COM 	so_close,		/* sop_close */
17518348SEric.Yu@Sun.COM };
17528348SEric.Yu@Sun.COM 
17538348SEric.Yu@Sun.COM sock_upcalls_t so_upcalls = {
17548348SEric.Yu@Sun.COM 	so_newconn,
17558348SEric.Yu@Sun.COM 	so_connected,
17568348SEric.Yu@Sun.COM 	so_disconnected,
17578348SEric.Yu@Sun.COM 	so_opctl,
17588348SEric.Yu@Sun.COM 	so_queue_msg,
17598348SEric.Yu@Sun.COM 	so_set_prop,
17608348SEric.Yu@Sun.COM 	so_txq_full,
17618348SEric.Yu@Sun.COM 	so_signal_oob,
17628348SEric.Yu@Sun.COM 	so_zcopy_notify,
17638348SEric.Yu@Sun.COM 	so_set_error
17648348SEric.Yu@Sun.COM };
1765