xref: /onnv-gate/usr/src/uts/common/fs/sockfs/sockcommon_sops.c (revision 8348:4137e18bfaf0)
1*8348SEric.Yu@Sun.COM /*
2*8348SEric.Yu@Sun.COM  * CDDL HEADER START
3*8348SEric.Yu@Sun.COM  *
4*8348SEric.Yu@Sun.COM  * The contents of this file are subject to the terms of the
5*8348SEric.Yu@Sun.COM  * Common Development and Distribution License (the "License").
6*8348SEric.Yu@Sun.COM  * You may not use this file except in compliance with the License.
7*8348SEric.Yu@Sun.COM  *
8*8348SEric.Yu@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*8348SEric.Yu@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*8348SEric.Yu@Sun.COM  * See the License for the specific language governing permissions
11*8348SEric.Yu@Sun.COM  * and limitations under the License.
12*8348SEric.Yu@Sun.COM  *
13*8348SEric.Yu@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*8348SEric.Yu@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*8348SEric.Yu@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*8348SEric.Yu@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*8348SEric.Yu@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*8348SEric.Yu@Sun.COM  *
19*8348SEric.Yu@Sun.COM  * CDDL HEADER END
20*8348SEric.Yu@Sun.COM  */
21*8348SEric.Yu@Sun.COM 
22*8348SEric.Yu@Sun.COM /*
23*8348SEric.Yu@Sun.COM  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*8348SEric.Yu@Sun.COM  * Use is subject to license terms.
25*8348SEric.Yu@Sun.COM  */
26*8348SEric.Yu@Sun.COM 
27*8348SEric.Yu@Sun.COM #pragma ident	"@(#)sockcommon_sops.c	1.1	07/06/14 SMI"
28*8348SEric.Yu@Sun.COM 
29*8348SEric.Yu@Sun.COM #include <sys/types.h>
30*8348SEric.Yu@Sun.COM #include <sys/param.h>
31*8348SEric.Yu@Sun.COM #include <sys/systm.h>
32*8348SEric.Yu@Sun.COM #include <sys/sysmacros.h>
33*8348SEric.Yu@Sun.COM #include <sys/debug.h>
34*8348SEric.Yu@Sun.COM #include <sys/cmn_err.h>
35*8348SEric.Yu@Sun.COM 
36*8348SEric.Yu@Sun.COM #include <sys/stropts.h>
37*8348SEric.Yu@Sun.COM #include <sys/socket.h>
38*8348SEric.Yu@Sun.COM #include <sys/socketvar.h>
39*8348SEric.Yu@Sun.COM 
40*8348SEric.Yu@Sun.COM #define	_SUN_TPI_VERSION	2
41*8348SEric.Yu@Sun.COM #include <sys/tihdr.h>
42*8348SEric.Yu@Sun.COM #include <sys/sockio.h>
43*8348SEric.Yu@Sun.COM #include <sys/sodirect.h>
44*8348SEric.Yu@Sun.COM #include <sys/kmem_impl.h>
45*8348SEric.Yu@Sun.COM 
46*8348SEric.Yu@Sun.COM #include <sys/strsubr.h>
47*8348SEric.Yu@Sun.COM #include <sys/strsun.h>
48*8348SEric.Yu@Sun.COM #include <sys/ddi.h>
49*8348SEric.Yu@Sun.COM #include <netinet/in.h>
50*8348SEric.Yu@Sun.COM #include <inet/ip.h>
51*8348SEric.Yu@Sun.COM 
52*8348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h>
53*8348SEric.Yu@Sun.COM 
54*8348SEric.Yu@Sun.COM #include <sys/socket_proto.h>
55*8348SEric.Yu@Sun.COM 
56*8348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi_impl.h>
57*8348SEric.Yu@Sun.COM #include <sys/tihdr.h>
58*8348SEric.Yu@Sun.COM #include <fs/sockfs/nl7c.h>
59*8348SEric.Yu@Sun.COM #include <inet/kssl/ksslapi.h>
60*8348SEric.Yu@Sun.COM 
61*8348SEric.Yu@Sun.COM 
62*8348SEric.Yu@Sun.COM extern int xnet_skip_checks;
63*8348SEric.Yu@Sun.COM extern int xnet_check_print;
64*8348SEric.Yu@Sun.COM 
65*8348SEric.Yu@Sun.COM static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t);
66*8348SEric.Yu@Sun.COM 
67*8348SEric.Yu@Sun.COM 
68*8348SEric.Yu@Sun.COM /*ARGSUSED*/
69*8348SEric.Yu@Sun.COM int
70*8348SEric.Yu@Sun.COM so_accept_notsupp(struct sonode *lso, int fflag,
71*8348SEric.Yu@Sun.COM     struct cred *cr, struct sonode **nsop)
72*8348SEric.Yu@Sun.COM {
73*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
74*8348SEric.Yu@Sun.COM }
75*8348SEric.Yu@Sun.COM 
76*8348SEric.Yu@Sun.COM /*ARGSUSED*/
77*8348SEric.Yu@Sun.COM int
78*8348SEric.Yu@Sun.COM so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
79*8348SEric.Yu@Sun.COM {
80*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
81*8348SEric.Yu@Sun.COM }
82*8348SEric.Yu@Sun.COM 
83*8348SEric.Yu@Sun.COM /*ARGSUSED*/
84*8348SEric.Yu@Sun.COM int
85*8348SEric.Yu@Sun.COM so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
86*8348SEric.Yu@Sun.COM     socklen_t *len, struct cred *cr)
87*8348SEric.Yu@Sun.COM {
88*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
89*8348SEric.Yu@Sun.COM }
90*8348SEric.Yu@Sun.COM 
91*8348SEric.Yu@Sun.COM /*ARGSUSED*/
92*8348SEric.Yu@Sun.COM int
93*8348SEric.Yu@Sun.COM so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
94*8348SEric.Yu@Sun.COM     socklen_t *addrlen, boolean_t accept, struct cred *cr)
95*8348SEric.Yu@Sun.COM {
96*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
97*8348SEric.Yu@Sun.COM }
98*8348SEric.Yu@Sun.COM 
99*8348SEric.Yu@Sun.COM /*ARGSUSED*/
100*8348SEric.Yu@Sun.COM int
101*8348SEric.Yu@Sun.COM so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
102*8348SEric.Yu@Sun.COM {
103*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
104*8348SEric.Yu@Sun.COM }
105*8348SEric.Yu@Sun.COM 
106*8348SEric.Yu@Sun.COM /*ARGSUSED*/
107*8348SEric.Yu@Sun.COM int
108*8348SEric.Yu@Sun.COM so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
109*8348SEric.Yu@Sun.COM     struct cred *cr, mblk_t **mpp)
110*8348SEric.Yu@Sun.COM {
111*8348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
112*8348SEric.Yu@Sun.COM }
113*8348SEric.Yu@Sun.COM 
114*8348SEric.Yu@Sun.COM /*
115*8348SEric.Yu@Sun.COM  * Generic Socket Ops
116*8348SEric.Yu@Sun.COM  */
117*8348SEric.Yu@Sun.COM 
118*8348SEric.Yu@Sun.COM /* ARGSUSED */
119*8348SEric.Yu@Sun.COM int
120*8348SEric.Yu@Sun.COM so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
121*8348SEric.Yu@Sun.COM {
122*8348SEric.Yu@Sun.COM 	return (socket_init_common(so, pso, flags, cr));
123*8348SEric.Yu@Sun.COM }
124*8348SEric.Yu@Sun.COM 
125*8348SEric.Yu@Sun.COM int
126*8348SEric.Yu@Sun.COM so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
127*8348SEric.Yu@Sun.COM     int flags, struct cred *cr)
128*8348SEric.Yu@Sun.COM {
129*8348SEric.Yu@Sun.COM 	int error;
130*8348SEric.Yu@Sun.COM 
131*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
132*8348SEric.Yu@Sun.COM 
133*8348SEric.Yu@Sun.COM 	ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
134*8348SEric.Yu@Sun.COM 
135*8348SEric.Yu@Sun.COM 	/* X/Open requires this check */
136*8348SEric.Yu@Sun.COM 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
137*8348SEric.Yu@Sun.COM 		if (xnet_check_print) {
138*8348SEric.Yu@Sun.COM 			printf("sockfs: X/Open bind state check "
139*8348SEric.Yu@Sun.COM 			    "caused EINVAL\n");
140*8348SEric.Yu@Sun.COM 		}
141*8348SEric.Yu@Sun.COM 		error = EINVAL;
142*8348SEric.Yu@Sun.COM 		goto done;
143*8348SEric.Yu@Sun.COM 	}
144*8348SEric.Yu@Sun.COM 
145*8348SEric.Yu@Sun.COM 	/*
146*8348SEric.Yu@Sun.COM 	 * a bind to a NULL address is interpreted as unbind. So just
147*8348SEric.Yu@Sun.COM 	 * do the downcall.
148*8348SEric.Yu@Sun.COM 	 */
149*8348SEric.Yu@Sun.COM 	if (name == NULL)
150*8348SEric.Yu@Sun.COM 		goto dobind;
151*8348SEric.Yu@Sun.COM 
152*8348SEric.Yu@Sun.COM 	switch (so->so_family) {
153*8348SEric.Yu@Sun.COM 	case AF_INET:
154*8348SEric.Yu@Sun.COM 		if ((size_t)namelen != sizeof (sin_t)) {
155*8348SEric.Yu@Sun.COM 			error = name->sa_family != so->so_family ?
156*8348SEric.Yu@Sun.COM 			    EAFNOSUPPORT : EINVAL;
157*8348SEric.Yu@Sun.COM 			eprintsoline(so, error);
158*8348SEric.Yu@Sun.COM 			goto done;
159*8348SEric.Yu@Sun.COM 		}
160*8348SEric.Yu@Sun.COM 
161*8348SEric.Yu@Sun.COM 		if ((flags & _SOBIND_XPG4_2) &&
162*8348SEric.Yu@Sun.COM 		    (name->sa_family != so->so_family)) {
163*8348SEric.Yu@Sun.COM 			/*
164*8348SEric.Yu@Sun.COM 			 * This check has to be made for X/Open
165*8348SEric.Yu@Sun.COM 			 * sockets however application failures have
166*8348SEric.Yu@Sun.COM 			 * been observed when it is applied to
167*8348SEric.Yu@Sun.COM 			 * all sockets.
168*8348SEric.Yu@Sun.COM 			 */
169*8348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
170*8348SEric.Yu@Sun.COM 			eprintsoline(so, error);
171*8348SEric.Yu@Sun.COM 			goto done;
172*8348SEric.Yu@Sun.COM 		}
173*8348SEric.Yu@Sun.COM 		/*
174*8348SEric.Yu@Sun.COM 		 * Force a zero sa_family to match so_family.
175*8348SEric.Yu@Sun.COM 		 *
176*8348SEric.Yu@Sun.COM 		 * Some programs like inetd(1M) don't set the
177*8348SEric.Yu@Sun.COM 		 * family field. Other programs leave
178*8348SEric.Yu@Sun.COM 		 * sin_family set to garbage - SunOS 4.X does
179*8348SEric.Yu@Sun.COM 		 * not check the family field on a bind.
180*8348SEric.Yu@Sun.COM 		 * We use the family field that
181*8348SEric.Yu@Sun.COM 		 * was passed in to the socket() call.
182*8348SEric.Yu@Sun.COM 		 */
183*8348SEric.Yu@Sun.COM 		name->sa_family = so->so_family;
184*8348SEric.Yu@Sun.COM 		break;
185*8348SEric.Yu@Sun.COM 
186*8348SEric.Yu@Sun.COM 	case AF_INET6: {
187*8348SEric.Yu@Sun.COM #ifdef DEBUG
188*8348SEric.Yu@Sun.COM 		sin6_t *sin6 = (sin6_t *)name;
189*8348SEric.Yu@Sun.COM #endif
190*8348SEric.Yu@Sun.COM 		if ((size_t)namelen != sizeof (sin6_t)) {
191*8348SEric.Yu@Sun.COM 			error = name->sa_family != so->so_family ?
192*8348SEric.Yu@Sun.COM 			    EAFNOSUPPORT : EINVAL;
193*8348SEric.Yu@Sun.COM 			eprintsoline(so, error);
194*8348SEric.Yu@Sun.COM 			goto done;
195*8348SEric.Yu@Sun.COM 		}
196*8348SEric.Yu@Sun.COM 
197*8348SEric.Yu@Sun.COM 		if (name->sa_family != so->so_family) {
198*8348SEric.Yu@Sun.COM 			/*
199*8348SEric.Yu@Sun.COM 			 * With IPv6 we require the family to match
200*8348SEric.Yu@Sun.COM 			 * unlike in IPv4.
201*8348SEric.Yu@Sun.COM 			 */
202*8348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
203*8348SEric.Yu@Sun.COM 			eprintsoline(so, error);
204*8348SEric.Yu@Sun.COM 			goto done;
205*8348SEric.Yu@Sun.COM 		}
206*8348SEric.Yu@Sun.COM #ifdef DEBUG
207*8348SEric.Yu@Sun.COM 		/*
208*8348SEric.Yu@Sun.COM 		 * Verify that apps don't forget to clear
209*8348SEric.Yu@Sun.COM 		 * sin6_scope_id etc
210*8348SEric.Yu@Sun.COM 		 */
211*8348SEric.Yu@Sun.COM 		if (sin6->sin6_scope_id != 0 &&
212*8348SEric.Yu@Sun.COM 		    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
213*8348SEric.Yu@Sun.COM 			zcmn_err(getzoneid(), CE_WARN,
214*8348SEric.Yu@Sun.COM 			    "bind with uninitialized sin6_scope_id "
215*8348SEric.Yu@Sun.COM 			    "(%d) on socket. Pid = %d\n",
216*8348SEric.Yu@Sun.COM 			    (int)sin6->sin6_scope_id,
217*8348SEric.Yu@Sun.COM 			    (int)curproc->p_pid);
218*8348SEric.Yu@Sun.COM 		}
219*8348SEric.Yu@Sun.COM 		if (sin6->__sin6_src_id != 0) {
220*8348SEric.Yu@Sun.COM 			zcmn_err(getzoneid(), CE_WARN,
221*8348SEric.Yu@Sun.COM 			    "bind with uninitialized __sin6_src_id "
222*8348SEric.Yu@Sun.COM 			    "(%d) on socket. Pid = %d\n",
223*8348SEric.Yu@Sun.COM 			    (int)sin6->__sin6_src_id,
224*8348SEric.Yu@Sun.COM 			    (int)curproc->p_pid);
225*8348SEric.Yu@Sun.COM 		}
226*8348SEric.Yu@Sun.COM #endif /* DEBUG */
227*8348SEric.Yu@Sun.COM 
228*8348SEric.Yu@Sun.COM 		break;
229*8348SEric.Yu@Sun.COM 	}
230*8348SEric.Yu@Sun.COM 	default:
231*8348SEric.Yu@Sun.COM 		/* Just pass the request to the protocol */
232*8348SEric.Yu@Sun.COM 		goto dobind;
233*8348SEric.Yu@Sun.COM 	}
234*8348SEric.Yu@Sun.COM 
235*8348SEric.Yu@Sun.COM 	/*
236*8348SEric.Yu@Sun.COM 	 * First we check if either NCA or KSSL has been enabled for
237*8348SEric.Yu@Sun.COM 	 * the requested address, and if so, we fall back to TPI.
238*8348SEric.Yu@Sun.COM 	 * If neither of those two services are enabled, then we just
239*8348SEric.Yu@Sun.COM 	 * pass the request to the protocol.
240*8348SEric.Yu@Sun.COM 	 *
241*8348SEric.Yu@Sun.COM 	 * Note that KSSL can only be enabled on a socket if NCA is NOT
242*8348SEric.Yu@Sun.COM 	 * enabled for that socket, hence the else-statement below.
243*8348SEric.Yu@Sun.COM 	 */
244*8348SEric.Yu@Sun.COM 	if (nl7c_enabled && ((so->so_family == AF_INET ||
245*8348SEric.Yu@Sun.COM 	    so->so_family == AF_INET6) &&
246*8348SEric.Yu@Sun.COM 	    nl7c_lookup_addr(name, namelen) != NULL)) {
247*8348SEric.Yu@Sun.COM 		/*
248*8348SEric.Yu@Sun.COM 		 * NL7C is not supported in non-global zones,
249*8348SEric.Yu@Sun.COM 		 * we enforce this restriction here.
250*8348SEric.Yu@Sun.COM 		 */
251*8348SEric.Yu@Sun.COM 		if (so->so_zoneid == GLOBAL_ZONEID) {
252*8348SEric.Yu@Sun.COM 			/* NCA should be used, so fall back to TPI */
253*8348SEric.Yu@Sun.COM 			error = so_tpi_fallback(so, cr);
254*8348SEric.Yu@Sun.COM 			SO_UNBLOCK_FALLBACK(so);
255*8348SEric.Yu@Sun.COM 			if (error)
256*8348SEric.Yu@Sun.COM 				return (error);
257*8348SEric.Yu@Sun.COM 			else
258*8348SEric.Yu@Sun.COM 				return (SOP_BIND(so, name, namelen, flags, cr));
259*8348SEric.Yu@Sun.COM 		}
260*8348SEric.Yu@Sun.COM 	} else if (so->so_type == SOCK_STREAM) {
261*8348SEric.Yu@Sun.COM 		/* Check if KSSL has been configured for this address */
262*8348SEric.Yu@Sun.COM 		kssl_ent_t ent;
263*8348SEric.Yu@Sun.COM 		kssl_endpt_type_t type;
264*8348SEric.Yu@Sun.COM 		struct T_bind_req bind_req;
265*8348SEric.Yu@Sun.COM 		mblk_t *mp;
266*8348SEric.Yu@Sun.COM 
267*8348SEric.Yu@Sun.COM 		/*
268*8348SEric.Yu@Sun.COM 		 * TODO: Check with KSSL team if we could add a function call
269*8348SEric.Yu@Sun.COM 		 * that only queries whether KSSL is enabled for the given
270*8348SEric.Yu@Sun.COM 		 * address.
271*8348SEric.Yu@Sun.COM 		 */
272*8348SEric.Yu@Sun.COM 		bind_req.PRIM_type = T_BIND_REQ;
273*8348SEric.Yu@Sun.COM 		bind_req.ADDR_length = namelen;
274*8348SEric.Yu@Sun.COM 		bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
275*8348SEric.Yu@Sun.COM 		mp = soallocproto2(&bind_req, sizeof (bind_req),
276*8348SEric.Yu@Sun.COM 		    name, namelen, 0, _ALLOC_SLEEP);
277*8348SEric.Yu@Sun.COM 
278*8348SEric.Yu@Sun.COM 		type = kssl_check_proxy(mp, so, &ent);
279*8348SEric.Yu@Sun.COM 		freemsg(mp);
280*8348SEric.Yu@Sun.COM 
281*8348SEric.Yu@Sun.COM 		if (type != KSSL_NO_PROXY) {
282*8348SEric.Yu@Sun.COM 			/*
283*8348SEric.Yu@Sun.COM 			 * KSSL has been configured for this address, so
284*8348SEric.Yu@Sun.COM 			 * we must fall back to TPI.
285*8348SEric.Yu@Sun.COM 			 */
286*8348SEric.Yu@Sun.COM 			kssl_release_ent(ent, so, type);
287*8348SEric.Yu@Sun.COM 			error = so_tpi_fallback(so, cr);
288*8348SEric.Yu@Sun.COM 			SO_UNBLOCK_FALLBACK(so);
289*8348SEric.Yu@Sun.COM 			if (error)
290*8348SEric.Yu@Sun.COM 				return (error);
291*8348SEric.Yu@Sun.COM 			else
292*8348SEric.Yu@Sun.COM 				return (SOP_BIND(so, name, namelen, flags, cr));
293*8348SEric.Yu@Sun.COM 		}
294*8348SEric.Yu@Sun.COM 	}
295*8348SEric.Yu@Sun.COM 
296*8348SEric.Yu@Sun.COM dobind:
297*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_bind)
298*8348SEric.Yu@Sun.COM 	    (so->so_proto_handle, name, namelen, cr);
299*8348SEric.Yu@Sun.COM done:
300*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
301*8348SEric.Yu@Sun.COM 
302*8348SEric.Yu@Sun.COM 	return (error);
303*8348SEric.Yu@Sun.COM }
304*8348SEric.Yu@Sun.COM 
305*8348SEric.Yu@Sun.COM int
306*8348SEric.Yu@Sun.COM so_listen(struct sonode *so, int backlog, struct cred *cr)
307*8348SEric.Yu@Sun.COM {
308*8348SEric.Yu@Sun.COM 	int	error = 0;
309*8348SEric.Yu@Sun.COM 
310*8348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
311*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
312*8348SEric.Yu@Sun.COM 
313*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog,
314*8348SEric.Yu@Sun.COM 	    cr);
315*8348SEric.Yu@Sun.COM 
316*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
317*8348SEric.Yu@Sun.COM 
318*8348SEric.Yu@Sun.COM 	return (error);
319*8348SEric.Yu@Sun.COM }
320*8348SEric.Yu@Sun.COM 
321*8348SEric.Yu@Sun.COM 
322*8348SEric.Yu@Sun.COM int
323*8348SEric.Yu@Sun.COM so_connect(struct sonode *so, const struct sockaddr *name,
324*8348SEric.Yu@Sun.COM     socklen_t namelen, int fflag, int flags, struct cred *cr)
325*8348SEric.Yu@Sun.COM {
326*8348SEric.Yu@Sun.COM 	int error = 0;
327*8348SEric.Yu@Sun.COM 	sock_connid_t id;
328*8348SEric.Yu@Sun.COM 
329*8348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
330*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
331*8348SEric.Yu@Sun.COM 
332*8348SEric.Yu@Sun.COM 	/*
333*8348SEric.Yu@Sun.COM 	 * If there is a pending error, return error
334*8348SEric.Yu@Sun.COM 	 * This can happen if a non blocking operation caused an error.
335*8348SEric.Yu@Sun.COM 	 */
336*8348SEric.Yu@Sun.COM 
337*8348SEric.Yu@Sun.COM 	if (so->so_error != 0) {
338*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
339*8348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
340*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
341*8348SEric.Yu@Sun.COM 		if (error != 0)
342*8348SEric.Yu@Sun.COM 			goto done;
343*8348SEric.Yu@Sun.COM 	}
344*8348SEric.Yu@Sun.COM 
345*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
346*8348SEric.Yu@Sun.COM 	    name, namelen, &id, cr);
347*8348SEric.Yu@Sun.COM 
348*8348SEric.Yu@Sun.COM 	if (error == EINPROGRESS)
349*8348SEric.Yu@Sun.COM 		error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id);
350*8348SEric.Yu@Sun.COM 
351*8348SEric.Yu@Sun.COM done:
352*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
353*8348SEric.Yu@Sun.COM 	return (error);
354*8348SEric.Yu@Sun.COM }
355*8348SEric.Yu@Sun.COM 
356*8348SEric.Yu@Sun.COM /*ARGSUSED*/
357*8348SEric.Yu@Sun.COM int
358*8348SEric.Yu@Sun.COM so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
359*8348SEric.Yu@Sun.COM {
360*8348SEric.Yu@Sun.COM 	int error = 0;
361*8348SEric.Yu@Sun.COM 	struct sonode *nso;
362*8348SEric.Yu@Sun.COM 
363*8348SEric.Yu@Sun.COM 	*nsop = NULL;
364*8348SEric.Yu@Sun.COM 
365*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
366*8348SEric.Yu@Sun.COM 	if ((so->so_state & SS_ACCEPTCONN) == 0) {
367*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
368*8348SEric.Yu@Sun.COM 		return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
369*8348SEric.Yu@Sun.COM 		    EOPNOTSUPP : EINVAL);
370*8348SEric.Yu@Sun.COM 	}
371*8348SEric.Yu@Sun.COM 
372*8348SEric.Yu@Sun.COM 	if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
373*8348SEric.Yu@Sun.COM 	    &nso)) == 0) {
374*8348SEric.Yu@Sun.COM 		ASSERT(nso != NULL);
375*8348SEric.Yu@Sun.COM 
376*8348SEric.Yu@Sun.COM 		/* finish the accept */
377*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
378*8348SEric.Yu@Sun.COM 		    nso->so_proto_handle, (sock_upper_handle_t)nso, cr);
379*8348SEric.Yu@Sun.COM 		if (error != 0) {
380*8348SEric.Yu@Sun.COM 			(void) socket_close(nso, 0, cr);
381*8348SEric.Yu@Sun.COM 			socket_destroy(nso);
382*8348SEric.Yu@Sun.COM 		} else {
383*8348SEric.Yu@Sun.COM 			*nsop = nso;
384*8348SEric.Yu@Sun.COM 		}
385*8348SEric.Yu@Sun.COM 	}
386*8348SEric.Yu@Sun.COM 
387*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
388*8348SEric.Yu@Sun.COM 	return (error);
389*8348SEric.Yu@Sun.COM }
390*8348SEric.Yu@Sun.COM 
391*8348SEric.Yu@Sun.COM int
392*8348SEric.Yu@Sun.COM so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
393*8348SEric.Yu@Sun.COM     struct cred *cr)
394*8348SEric.Yu@Sun.COM {
395*8348SEric.Yu@Sun.COM 	int error, flags;
396*8348SEric.Yu@Sun.COM 	boolean_t dontblock;
397*8348SEric.Yu@Sun.COM 	ssize_t orig_resid;
398*8348SEric.Yu@Sun.COM 	mblk_t  *mp;
399*8348SEric.Yu@Sun.COM 
400*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
401*8348SEric.Yu@Sun.COM 
402*8348SEric.Yu@Sun.COM 	flags = msg->msg_flags;
403*8348SEric.Yu@Sun.COM 	error = 0;
404*8348SEric.Yu@Sun.COM 	dontblock = (flags & MSG_DONTWAIT) ||
405*8348SEric.Yu@Sun.COM 	    (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
406*8348SEric.Yu@Sun.COM 
407*8348SEric.Yu@Sun.COM 	if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
408*8348SEric.Yu@Sun.COM 		/*
409*8348SEric.Yu@Sun.COM 		 * Old way of passing fd's is not supported
410*8348SEric.Yu@Sun.COM 		 */
411*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
412*8348SEric.Yu@Sun.COM 		return (EOPNOTSUPP);
413*8348SEric.Yu@Sun.COM 	}
414*8348SEric.Yu@Sun.COM 
415*8348SEric.Yu@Sun.COM 	if ((so->so_mode & SM_ATOMIC) &&
416*8348SEric.Yu@Sun.COM 	    uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
417*8348SEric.Yu@Sun.COM 	    so->so_proto_props.sopp_maxpsz != -1) {
418*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
419*8348SEric.Yu@Sun.COM 		return (EMSGSIZE);
420*8348SEric.Yu@Sun.COM 	}
421*8348SEric.Yu@Sun.COM 
422*8348SEric.Yu@Sun.COM 	/*
423*8348SEric.Yu@Sun.COM 	 * For atomic sends we will only do one iteration.
424*8348SEric.Yu@Sun.COM 	 */
425*8348SEric.Yu@Sun.COM 	do {
426*8348SEric.Yu@Sun.COM 		if (so->so_state & SS_CANTSENDMORE) {
427*8348SEric.Yu@Sun.COM 			error = EPIPE;
428*8348SEric.Yu@Sun.COM 			break;
429*8348SEric.Yu@Sun.COM 		}
430*8348SEric.Yu@Sun.COM 
431*8348SEric.Yu@Sun.COM 		if (so->so_error != 0) {
432*8348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
433*8348SEric.Yu@Sun.COM 			error = sogeterr(so, B_TRUE);
434*8348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
435*8348SEric.Yu@Sun.COM 			if (error != 0)
436*8348SEric.Yu@Sun.COM 				break;
437*8348SEric.Yu@Sun.COM 		}
438*8348SEric.Yu@Sun.COM 
439*8348SEric.Yu@Sun.COM 		/*
440*8348SEric.Yu@Sun.COM 		 * Send down OOB messages even if the send path is being
441*8348SEric.Yu@Sun.COM 		 * flow controlled (assuming the protocol supports OOB data).
442*8348SEric.Yu@Sun.COM 		 */
443*8348SEric.Yu@Sun.COM 		if (flags & MSG_OOB) {
444*8348SEric.Yu@Sun.COM 			if ((so->so_mode & SM_EXDATA) == 0) {
445*8348SEric.Yu@Sun.COM 				error = EOPNOTSUPP;
446*8348SEric.Yu@Sun.COM 				break;
447*8348SEric.Yu@Sun.COM 			}
448*8348SEric.Yu@Sun.COM 		} else if (so->so_snd_qfull) {
449*8348SEric.Yu@Sun.COM 			/*
450*8348SEric.Yu@Sun.COM 			 * Need to wait until the protocol is ready to receive
451*8348SEric.Yu@Sun.COM 			 * more data for transmission.
452*8348SEric.Yu@Sun.COM 			 */
453*8348SEric.Yu@Sun.COM 			if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
454*8348SEric.Yu@Sun.COM 				break;
455*8348SEric.Yu@Sun.COM 		}
456*8348SEric.Yu@Sun.COM 
457*8348SEric.Yu@Sun.COM 		/*
458*8348SEric.Yu@Sun.COM 		 * Time to send data to the protocol. We either copy the
459*8348SEric.Yu@Sun.COM 		 * data into mblks or pass the uio directly to the protocol.
460*8348SEric.Yu@Sun.COM 		 * We decide what to do based on the available down calls.
461*8348SEric.Yu@Sun.COM 		 */
462*8348SEric.Yu@Sun.COM 		if (so->so_downcalls->sd_send_uio != NULL) {
463*8348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_send_uio)
464*8348SEric.Yu@Sun.COM 			    (so->so_proto_handle, uiop, msg, cr);
465*8348SEric.Yu@Sun.COM 			if (error != 0)
466*8348SEric.Yu@Sun.COM 				break;
467*8348SEric.Yu@Sun.COM 		} else {
468*8348SEric.Yu@Sun.COM 			/* save the resid in case of failure */
469*8348SEric.Yu@Sun.COM 			orig_resid = uiop->uio_resid;
470*8348SEric.Yu@Sun.COM 
471*8348SEric.Yu@Sun.COM 			if ((mp = socopyinuio(uiop,
472*8348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_maxpsz,
473*8348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_wroff,
474*8348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_maxblk,
475*8348SEric.Yu@Sun.COM 			    so->so_proto_props.sopp_tail, &error)) == NULL) {
476*8348SEric.Yu@Sun.COM 				break;
477*8348SEric.Yu@Sun.COM 			}
478*8348SEric.Yu@Sun.COM 			ASSERT(uiop->uio_resid >= 0);
479*8348SEric.Yu@Sun.COM 
480*8348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_send)
481*8348SEric.Yu@Sun.COM 			    (so->so_proto_handle, mp, msg, cr);
482*8348SEric.Yu@Sun.COM 			if (error != 0) {
483*8348SEric.Yu@Sun.COM 				/*
484*8348SEric.Yu@Sun.COM 				 * The send failed. We do not have to free the
485*8348SEric.Yu@Sun.COM 				 * mblks, because that is the protocol's
486*8348SEric.Yu@Sun.COM 				 * responsibility. However, uio_resid must
487*8348SEric.Yu@Sun.COM 				 * remain accurate, so adjust that here.
488*8348SEric.Yu@Sun.COM 				 */
489*8348SEric.Yu@Sun.COM 				uiop->uio_resid = orig_resid;
490*8348SEric.Yu@Sun.COM 					break;
491*8348SEric.Yu@Sun.COM 			}
492*8348SEric.Yu@Sun.COM 		}
493*8348SEric.Yu@Sun.COM 	} while (uiop->uio_resid > 0);
494*8348SEric.Yu@Sun.COM 
495*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
496*8348SEric.Yu@Sun.COM 
497*8348SEric.Yu@Sun.COM 	return (error);
498*8348SEric.Yu@Sun.COM }
499*8348SEric.Yu@Sun.COM 
500*8348SEric.Yu@Sun.COM int
501*8348SEric.Yu@Sun.COM so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
502*8348SEric.Yu@Sun.COM     struct cred *cr, mblk_t **mpp)
503*8348SEric.Yu@Sun.COM {
504*8348SEric.Yu@Sun.COM 	int error;
505*8348SEric.Yu@Sun.COM 	boolean_t dontblock;
506*8348SEric.Yu@Sun.COM 	size_t size;
507*8348SEric.Yu@Sun.COM 	mblk_t *mp = *mpp;
508*8348SEric.Yu@Sun.COM 
509*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
510*8348SEric.Yu@Sun.COM 
511*8348SEric.Yu@Sun.COM 	error = 0;
512*8348SEric.Yu@Sun.COM 	dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
513*8348SEric.Yu@Sun.COM 	    (fflag & (FNONBLOCK|FNDELAY));
514*8348SEric.Yu@Sun.COM 	size = msgdsize(mp);
515*8348SEric.Yu@Sun.COM 
516*8348SEric.Yu@Sun.COM 	if (so->so_downcalls->sd_send == NULL) {
517*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
518*8348SEric.Yu@Sun.COM 		return (EOPNOTSUPP);
519*8348SEric.Yu@Sun.COM 	}
520*8348SEric.Yu@Sun.COM 
521*8348SEric.Yu@Sun.COM 	if ((so->so_mode & SM_ATOMIC) &&
522*8348SEric.Yu@Sun.COM 	    size > so->so_proto_props.sopp_maxpsz &&
523*8348SEric.Yu@Sun.COM 	    so->so_proto_props.sopp_maxpsz != -1) {
524*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
525*8348SEric.Yu@Sun.COM 		return (EMSGSIZE);
526*8348SEric.Yu@Sun.COM 	}
527*8348SEric.Yu@Sun.COM 
528*8348SEric.Yu@Sun.COM 	while (mp != NULL) {
529*8348SEric.Yu@Sun.COM 		mblk_t *nmp, *last_mblk;
530*8348SEric.Yu@Sun.COM 		size_t mlen;
531*8348SEric.Yu@Sun.COM 
532*8348SEric.Yu@Sun.COM 		if (so->so_state & SS_CANTSENDMORE) {
533*8348SEric.Yu@Sun.COM 			error = EPIPE;
534*8348SEric.Yu@Sun.COM 			break;
535*8348SEric.Yu@Sun.COM 		}
536*8348SEric.Yu@Sun.COM 		if (so->so_error != 0) {
537*8348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
538*8348SEric.Yu@Sun.COM 			error = sogeterr(so, B_TRUE);
539*8348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
540*8348SEric.Yu@Sun.COM 			if (error != 0)
541*8348SEric.Yu@Sun.COM 				break;
542*8348SEric.Yu@Sun.COM 		}
543*8348SEric.Yu@Sun.COM 		if (so->so_snd_qfull) {
544*8348SEric.Yu@Sun.COM 			/*
545*8348SEric.Yu@Sun.COM 			 * Need to wait until the protocol is ready to receive
546*8348SEric.Yu@Sun.COM 			 * more data for transmission.
547*8348SEric.Yu@Sun.COM 			 */
548*8348SEric.Yu@Sun.COM 			if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
549*8348SEric.Yu@Sun.COM 				break;
550*8348SEric.Yu@Sun.COM 		}
551*8348SEric.Yu@Sun.COM 
552*8348SEric.Yu@Sun.COM 		/*
553*8348SEric.Yu@Sun.COM 		 * We only allow so_maxpsz of data to be sent down to
554*8348SEric.Yu@Sun.COM 		 * the protocol at time.
555*8348SEric.Yu@Sun.COM 		 */
556*8348SEric.Yu@Sun.COM 		mlen = MBLKL(mp);
557*8348SEric.Yu@Sun.COM 		nmp = mp->b_cont;
558*8348SEric.Yu@Sun.COM 		last_mblk = mp;
559*8348SEric.Yu@Sun.COM 		while (nmp != NULL) {
560*8348SEric.Yu@Sun.COM 			mlen += MBLKL(nmp);
561*8348SEric.Yu@Sun.COM 			if (mlen > so->so_proto_props.sopp_maxpsz) {
562*8348SEric.Yu@Sun.COM 				last_mblk->b_cont = NULL;
563*8348SEric.Yu@Sun.COM 				break;
564*8348SEric.Yu@Sun.COM 			}
565*8348SEric.Yu@Sun.COM 			last_mblk = nmp;
566*8348SEric.Yu@Sun.COM 			nmp = nmp->b_cont;
567*8348SEric.Yu@Sun.COM 		}
568*8348SEric.Yu@Sun.COM 
569*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_send)
570*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, mp, msg, cr);
571*8348SEric.Yu@Sun.COM 		if (error != 0) {
572*8348SEric.Yu@Sun.COM 			/*
573*8348SEric.Yu@Sun.COM 			 * The send failed. The protocol will free the mblks
574*8348SEric.Yu@Sun.COM 			 * that were sent down. Let the caller deal with the
575*8348SEric.Yu@Sun.COM 			 * rest.
576*8348SEric.Yu@Sun.COM 			 */
577*8348SEric.Yu@Sun.COM 			*mpp = nmp;
578*8348SEric.Yu@Sun.COM 			break;
579*8348SEric.Yu@Sun.COM 		}
580*8348SEric.Yu@Sun.COM 
581*8348SEric.Yu@Sun.COM 		*mpp = mp = nmp;
582*8348SEric.Yu@Sun.COM 	}
583*8348SEric.Yu@Sun.COM 
584*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
585*8348SEric.Yu@Sun.COM 
586*8348SEric.Yu@Sun.COM 	return (error);
587*8348SEric.Yu@Sun.COM }
588*8348SEric.Yu@Sun.COM 
589*8348SEric.Yu@Sun.COM int
590*8348SEric.Yu@Sun.COM so_shutdown(struct sonode *so, int how, struct cred *cr)
591*8348SEric.Yu@Sun.COM {
592*8348SEric.Yu@Sun.COM 	int error;
593*8348SEric.Yu@Sun.COM 
594*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
595*8348SEric.Yu@Sun.COM 
596*8348SEric.Yu@Sun.COM 	/*
597*8348SEric.Yu@Sun.COM 	 * SunOS 4.X has no check for datagram sockets.
598*8348SEric.Yu@Sun.COM 	 * 5.X checks that it is connected (ENOTCONN)
599*8348SEric.Yu@Sun.COM 	 * X/Open requires that we check the connected state.
600*8348SEric.Yu@Sun.COM 	 */
601*8348SEric.Yu@Sun.COM 	if (!(so->so_state & SS_ISCONNECTED)) {
602*8348SEric.Yu@Sun.COM 		if (!xnet_skip_checks) {
603*8348SEric.Yu@Sun.COM 			error = ENOTCONN;
604*8348SEric.Yu@Sun.COM 			if (xnet_check_print) {
605*8348SEric.Yu@Sun.COM 				printf("sockfs: X/Open shutdown check "
606*8348SEric.Yu@Sun.COM 				    "caused ENOTCONN\n");
607*8348SEric.Yu@Sun.COM 			}
608*8348SEric.Yu@Sun.COM 		}
609*8348SEric.Yu@Sun.COM 		goto done;
610*8348SEric.Yu@Sun.COM 	}
611*8348SEric.Yu@Sun.COM 
612*8348SEric.Yu@Sun.COM 	error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
613*8348SEric.Yu@Sun.COM 	    how, cr));
614*8348SEric.Yu@Sun.COM 
615*8348SEric.Yu@Sun.COM 	/*
616*8348SEric.Yu@Sun.COM 	 * Protocol agreed to shutdown. We need to flush the
617*8348SEric.Yu@Sun.COM 	 * receive buffer if the receive side is being shutdown.
618*8348SEric.Yu@Sun.COM 	 */
619*8348SEric.Yu@Sun.COM 	if (error == 0 && how != SHUT_WR) {
620*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
621*8348SEric.Yu@Sun.COM 		/* wait for active reader to finish */
622*8348SEric.Yu@Sun.COM 		(void) so_lock_read(so, 0);
623*8348SEric.Yu@Sun.COM 
624*8348SEric.Yu@Sun.COM 		so_rcv_flush(so);
625*8348SEric.Yu@Sun.COM 
626*8348SEric.Yu@Sun.COM 		so_unlock_read(so);
627*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
628*8348SEric.Yu@Sun.COM 	}
629*8348SEric.Yu@Sun.COM 
630*8348SEric.Yu@Sun.COM done:
631*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
632*8348SEric.Yu@Sun.COM 	return (error);
633*8348SEric.Yu@Sun.COM }
634*8348SEric.Yu@Sun.COM 
635*8348SEric.Yu@Sun.COM int
636*8348SEric.Yu@Sun.COM so_getsockname(struct sonode *so, struct sockaddr *addr,
637*8348SEric.Yu@Sun.COM     socklen_t *addrlen, struct cred *cr)
638*8348SEric.Yu@Sun.COM {
639*8348SEric.Yu@Sun.COM 	int error;
640*8348SEric.Yu@Sun.COM 
641*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
642*8348SEric.Yu@Sun.COM 
643*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_getsockname)
644*8348SEric.Yu@Sun.COM 	    (so->so_proto_handle, addr, addrlen, cr);
645*8348SEric.Yu@Sun.COM 
646*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
647*8348SEric.Yu@Sun.COM 	return (error);
648*8348SEric.Yu@Sun.COM }
649*8348SEric.Yu@Sun.COM 
650*8348SEric.Yu@Sun.COM int
651*8348SEric.Yu@Sun.COM so_getpeername(struct sonode *so, struct sockaddr *addr,
652*8348SEric.Yu@Sun.COM     socklen_t *addrlen, boolean_t accept, struct cred *cr)
653*8348SEric.Yu@Sun.COM {
654*8348SEric.Yu@Sun.COM 	int error;
655*8348SEric.Yu@Sun.COM 
656*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
657*8348SEric.Yu@Sun.COM 
658*8348SEric.Yu@Sun.COM 	if (accept) {
659*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getpeername)
660*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, addr, addrlen, cr);
661*8348SEric.Yu@Sun.COM 	} else if (!(so->so_state & SS_ISCONNECTED)) {
662*8348SEric.Yu@Sun.COM 		error = ENOTCONN;
663*8348SEric.Yu@Sun.COM 	} else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
664*8348SEric.Yu@Sun.COM 		/* Added this check for X/Open */
665*8348SEric.Yu@Sun.COM 		error = EINVAL;
666*8348SEric.Yu@Sun.COM 		if (xnet_check_print) {
667*8348SEric.Yu@Sun.COM 			printf("sockfs: X/Open getpeername check => EINVAL\n");
668*8348SEric.Yu@Sun.COM 		}
669*8348SEric.Yu@Sun.COM 	} else {
670*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getpeername)
671*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, addr, addrlen, cr);
672*8348SEric.Yu@Sun.COM 	}
673*8348SEric.Yu@Sun.COM 
674*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
675*8348SEric.Yu@Sun.COM 	return (error);
676*8348SEric.Yu@Sun.COM }
677*8348SEric.Yu@Sun.COM 
678*8348SEric.Yu@Sun.COM int
679*8348SEric.Yu@Sun.COM so_getsockopt(struct sonode *so, int level, int option_name,
680*8348SEric.Yu@Sun.COM     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
681*8348SEric.Yu@Sun.COM {
682*8348SEric.Yu@Sun.COM 	int error = 0;
683*8348SEric.Yu@Sun.COM 
684*8348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
685*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so,
686*8348SEric.Yu@Sun.COM 	    SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
687*8348SEric.Yu@Sun.COM 
688*8348SEric.Yu@Sun.COM 	error = socket_getopt_common(so, level, option_name, optval,
689*8348SEric.Yu@Sun.COM 	    optlenp);
690*8348SEric.Yu@Sun.COM 	if (error < 0) {
691*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_getsockopt)
692*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, level, option_name, optval, optlenp,
693*8348SEric.Yu@Sun.COM 		    cr);
694*8348SEric.Yu@Sun.COM 		if (error ==  ENOPROTOOPT) {
695*8348SEric.Yu@Sun.COM 			if (level == SOL_SOCKET) {
696*8348SEric.Yu@Sun.COM 				/*
697*8348SEric.Yu@Sun.COM 				 * If a protocol does not support a particular
698*8348SEric.Yu@Sun.COM 				 * socket option, set can fail (not allowed)
699*8348SEric.Yu@Sun.COM 				 * but get can not fail. This is the previous
700*8348SEric.Yu@Sun.COM 				 * sockfs bahvior.
701*8348SEric.Yu@Sun.COM 				 */
702*8348SEric.Yu@Sun.COM 				switch (option_name) {
703*8348SEric.Yu@Sun.COM 				case SO_LINGER:
704*8348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
705*8348SEric.Yu@Sun.COM 					    sizeof (struct linger)) {
706*8348SEric.Yu@Sun.COM 						error = EINVAL;
707*8348SEric.Yu@Sun.COM 						break;
708*8348SEric.Yu@Sun.COM 					}
709*8348SEric.Yu@Sun.COM 					error = 0;
710*8348SEric.Yu@Sun.COM 					bzero(optval, sizeof (struct linger));
711*8348SEric.Yu@Sun.COM 					*optlenp = sizeof (struct linger);
712*8348SEric.Yu@Sun.COM 					break;
713*8348SEric.Yu@Sun.COM 				case SO_RCVTIMEO:
714*8348SEric.Yu@Sun.COM 				case SO_SNDTIMEO:
715*8348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
716*8348SEric.Yu@Sun.COM 					    sizeof (struct timeval)) {
717*8348SEric.Yu@Sun.COM 						error = EINVAL;
718*8348SEric.Yu@Sun.COM 						break;
719*8348SEric.Yu@Sun.COM 					}
720*8348SEric.Yu@Sun.COM 					error = 0;
721*8348SEric.Yu@Sun.COM 					bzero(optval, sizeof (struct timeval));
722*8348SEric.Yu@Sun.COM 					*optlenp = sizeof (struct timeval);
723*8348SEric.Yu@Sun.COM 					break;
724*8348SEric.Yu@Sun.COM 				case SO_SND_BUFINFO:
725*8348SEric.Yu@Sun.COM 					if (*optlenp < (t_uscalar_t)
726*8348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo)) {
727*8348SEric.Yu@Sun.COM 						error = EINVAL;
728*8348SEric.Yu@Sun.COM 						break;
729*8348SEric.Yu@Sun.COM 					}
730*8348SEric.Yu@Sun.COM 					error = 0;
731*8348SEric.Yu@Sun.COM 					bzero(optval,
732*8348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo));
733*8348SEric.Yu@Sun.COM 					*optlenp =
734*8348SEric.Yu@Sun.COM 					    sizeof (struct so_snd_bufinfo);
735*8348SEric.Yu@Sun.COM 					break;
736*8348SEric.Yu@Sun.COM 				case SO_DEBUG:
737*8348SEric.Yu@Sun.COM 				case SO_REUSEADDR:
738*8348SEric.Yu@Sun.COM 				case SO_KEEPALIVE:
739*8348SEric.Yu@Sun.COM 				case SO_DONTROUTE:
740*8348SEric.Yu@Sun.COM 				case SO_BROADCAST:
741*8348SEric.Yu@Sun.COM 				case SO_USELOOPBACK:
742*8348SEric.Yu@Sun.COM 				case SO_OOBINLINE:
743*8348SEric.Yu@Sun.COM 				case SO_DGRAM_ERRIND:
744*8348SEric.Yu@Sun.COM 				case SO_SNDBUF:
745*8348SEric.Yu@Sun.COM 				case SO_RCVBUF:
746*8348SEric.Yu@Sun.COM 					error = 0;
747*8348SEric.Yu@Sun.COM 					*((int32_t *)optval) = 0;
748*8348SEric.Yu@Sun.COM 					*optlenp = sizeof (int32_t);
749*8348SEric.Yu@Sun.COM 					break;
750*8348SEric.Yu@Sun.COM 				default:
751*8348SEric.Yu@Sun.COM 					break;
752*8348SEric.Yu@Sun.COM 				}
753*8348SEric.Yu@Sun.COM 			}
754*8348SEric.Yu@Sun.COM 		}
755*8348SEric.Yu@Sun.COM 	}
756*8348SEric.Yu@Sun.COM 
757*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
758*8348SEric.Yu@Sun.COM 	return (error);
759*8348SEric.Yu@Sun.COM }
760*8348SEric.Yu@Sun.COM 
761*8348SEric.Yu@Sun.COM int
762*8348SEric.Yu@Sun.COM so_setsockopt(struct sonode *so, int level, int option_name,
763*8348SEric.Yu@Sun.COM     const void *optval, socklen_t optlen, struct cred *cr)
764*8348SEric.Yu@Sun.COM {
765*8348SEric.Yu@Sun.COM 	int error = 0;
766*8348SEric.Yu@Sun.COM 
767*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so,
768*8348SEric.Yu@Sun.COM 	    SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
769*8348SEric.Yu@Sun.COM 
770*8348SEric.Yu@Sun.COM 	/* X/Open requires this check */
771*8348SEric.Yu@Sun.COM 	if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
772*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
773*8348SEric.Yu@Sun.COM 		if (xnet_check_print)
774*8348SEric.Yu@Sun.COM 			printf("sockfs: X/Open setsockopt check => EINVAL\n");
775*8348SEric.Yu@Sun.COM 		return (EINVAL);
776*8348SEric.Yu@Sun.COM 	}
777*8348SEric.Yu@Sun.COM 
778*8348SEric.Yu@Sun.COM 	if (level == SOL_SOCKET &&
779*8348SEric.Yu@Sun.COM 	    ((option_name == SO_RCVTIMEO) || (option_name == SO_SNDTIMEO))) {
780*8348SEric.Yu@Sun.COM 		struct timeval *tl = (struct timeval *)optval;
781*8348SEric.Yu@Sun.COM 		clock_t t_usec;
782*8348SEric.Yu@Sun.COM 
783*8348SEric.Yu@Sun.COM 		if (optlen != (t_uscalar_t)sizeof (struct timeval)) {
784*8348SEric.Yu@Sun.COM 			SO_UNBLOCK_FALLBACK(so);
785*8348SEric.Yu@Sun.COM 			return (EINVAL);
786*8348SEric.Yu@Sun.COM 		}
787*8348SEric.Yu@Sun.COM 		t_usec = tl->tv_sec * 1000 * 1000 + tl->tv_usec;
788*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
789*8348SEric.Yu@Sun.COM 		if (option_name == SO_RCVTIMEO)
790*8348SEric.Yu@Sun.COM 			so->so_rcvtimeo = drv_usectohz(t_usec);
791*8348SEric.Yu@Sun.COM 		else
792*8348SEric.Yu@Sun.COM 			so->so_sndtimeo = drv_usectohz(t_usec);
793*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
794*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
795*8348SEric.Yu@Sun.COM 		return (0);
796*8348SEric.Yu@Sun.COM 	}
797*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_setsockopt)
798*8348SEric.Yu@Sun.COM 	    (so->so_proto_handle, level, option_name, optval, optlen, cr);
799*8348SEric.Yu@Sun.COM 
800*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
801*8348SEric.Yu@Sun.COM 	return (error);
802*8348SEric.Yu@Sun.COM }
803*8348SEric.Yu@Sun.COM 
804*8348SEric.Yu@Sun.COM int
805*8348SEric.Yu@Sun.COM so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
806*8348SEric.Yu@Sun.COM     struct cred *cr, int32_t *rvalp)
807*8348SEric.Yu@Sun.COM {
808*8348SEric.Yu@Sun.COM 	int error = 0;
809*8348SEric.Yu@Sun.COM 
810*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
811*8348SEric.Yu@Sun.COM 
812*8348SEric.Yu@Sun.COM 	/*
813*8348SEric.Yu@Sun.COM 	 * If there is a pending error, return error
814*8348SEric.Yu@Sun.COM 	 * This can happen if a non blocking operation caused an error.
815*8348SEric.Yu@Sun.COM 	 */
816*8348SEric.Yu@Sun.COM 	if (so->so_error != 0) {
817*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
818*8348SEric.Yu@Sun.COM 		error = sogeterr(so, B_TRUE);
819*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
820*8348SEric.Yu@Sun.COM 		if (error != 0)
821*8348SEric.Yu@Sun.COM 			goto done;
822*8348SEric.Yu@Sun.COM 	}
823*8348SEric.Yu@Sun.COM 
824*8348SEric.Yu@Sun.COM 	/*
825*8348SEric.Yu@Sun.COM 	 * calling strioc can result in the socket falling back to TPI,
826*8348SEric.Yu@Sun.COM 	 * if that is supported.
827*8348SEric.Yu@Sun.COM 	 */
828*8348SEric.Yu@Sun.COM 	if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
829*8348SEric.Yu@Sun.COM 	    (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
830*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
831*8348SEric.Yu@Sun.COM 		    cmd, arg, mode, rvalp, cr);
832*8348SEric.Yu@Sun.COM 	}
833*8348SEric.Yu@Sun.COM 
834*8348SEric.Yu@Sun.COM done:
835*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
836*8348SEric.Yu@Sun.COM 
837*8348SEric.Yu@Sun.COM 	return (error);
838*8348SEric.Yu@Sun.COM }
839*8348SEric.Yu@Sun.COM 
840*8348SEric.Yu@Sun.COM int
841*8348SEric.Yu@Sun.COM so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
842*8348SEric.Yu@Sun.COM     struct pollhead **phpp)
843*8348SEric.Yu@Sun.COM {
844*8348SEric.Yu@Sun.COM 	int state = so->so_state;
845*8348SEric.Yu@Sun.COM 	*reventsp = 0;
846*8348SEric.Yu@Sun.COM 
847*8348SEric.Yu@Sun.COM 	if (so->so_error != 0 &&
848*8348SEric.Yu@Sun.COM 	    ((POLLIN|POLLRDNORM|POLLOUT) & events)  != 0) {
849*8348SEric.Yu@Sun.COM 		*reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
850*8348SEric.Yu@Sun.COM 		return (0);
851*8348SEric.Yu@Sun.COM 	}
852*8348SEric.Yu@Sun.COM 
853*8348SEric.Yu@Sun.COM 	/*
854*8348SEric.Yu@Sun.COM 	 * As long as there is buffer to send data, and the socket is
855*8348SEric.Yu@Sun.COM 	 * in a state where it can send data (i.e., connected for
856*8348SEric.Yu@Sun.COM 	 * connection oriented protocols), then turn on POLLOUT events
857*8348SEric.Yu@Sun.COM 	 */
858*8348SEric.Yu@Sun.COM 	if (!so->so_snd_qfull && ((so->so_mode & SM_CONNREQUIRED) == 0 ||
859*8348SEric.Yu@Sun.COM 	    state & SS_ISCONNECTED)) {
860*8348SEric.Yu@Sun.COM 		*reventsp |= POLLOUT & events;
861*8348SEric.Yu@Sun.COM 	}
862*8348SEric.Yu@Sun.COM 
863*8348SEric.Yu@Sun.COM 	/*
864*8348SEric.Yu@Sun.COM 	 * Turn on POLLIN whenever there is data on the receive queue,
865*8348SEric.Yu@Sun.COM 	 * or the socket is in a state where no more data will be received.
866*8348SEric.Yu@Sun.COM 	 * Also, if the socket is accepting connections, flip the bit if
867*8348SEric.Yu@Sun.COM 	 * there is something on the queue.
868*8348SEric.Yu@Sun.COM 	 */
869*8348SEric.Yu@Sun.COM 
870*8348SEric.Yu@Sun.COM 	/* Pending connections */
871*8348SEric.Yu@Sun.COM 	if (so->so_acceptq_len > 0)
872*8348SEric.Yu@Sun.COM 		*reventsp |= (POLLIN|POLLRDNORM) & events;
873*8348SEric.Yu@Sun.COM 
874*8348SEric.Yu@Sun.COM 	/* Data */
875*8348SEric.Yu@Sun.COM 	/* so_downcalls is null for sctp */
876*8348SEric.Yu@Sun.COM 	if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
877*8348SEric.Yu@Sun.COM 		*reventsp |= (*so->so_downcalls->sd_poll)
878*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
879*8348SEric.Yu@Sun.COM 		    CRED()) & events;
880*8348SEric.Yu@Sun.COM 		ASSERT((*reventsp & ~events) == 0);
881*8348SEric.Yu@Sun.COM 		/* do not recheck events */
882*8348SEric.Yu@Sun.COM 		events &= ~SO_PROTO_POLLEV;
883*8348SEric.Yu@Sun.COM 	} else {
884*8348SEric.Yu@Sun.COM 		if (SO_HAVE_DATA(so))
885*8348SEric.Yu@Sun.COM 			*reventsp |= (POLLIN|POLLRDNORM) & events;
886*8348SEric.Yu@Sun.COM 
887*8348SEric.Yu@Sun.COM 		/* Urgent data */
888*8348SEric.Yu@Sun.COM 		if ((state & SS_OOBPEND) != 0)
889*8348SEric.Yu@Sun.COM 			*reventsp |= (POLLRDBAND) & events;
890*8348SEric.Yu@Sun.COM 	}
891*8348SEric.Yu@Sun.COM 
892*8348SEric.Yu@Sun.COM 	if (!*reventsp && !anyyet) {
893*8348SEric.Yu@Sun.COM 		/* Check for read events again, but this time under lock */
894*8348SEric.Yu@Sun.COM 		if (events & (POLLIN|POLLRDNORM)) {
895*8348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
896*8348SEric.Yu@Sun.COM 			if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) {
897*8348SEric.Yu@Sun.COM 				mutex_exit(&so->so_lock);
898*8348SEric.Yu@Sun.COM 				*reventsp |= (POLLIN|POLLRDNORM) & events;
899*8348SEric.Yu@Sun.COM 				return (0);
900*8348SEric.Yu@Sun.COM 			} else {
901*8348SEric.Yu@Sun.COM 				so->so_pollev |= SO_POLLEV_IN;
902*8348SEric.Yu@Sun.COM 				mutex_exit(&so->so_lock);
903*8348SEric.Yu@Sun.COM 			}
904*8348SEric.Yu@Sun.COM 		}
905*8348SEric.Yu@Sun.COM 		*phpp = &so->so_poll_list;
906*8348SEric.Yu@Sun.COM 	}
907*8348SEric.Yu@Sun.COM 	return (0);
908*8348SEric.Yu@Sun.COM }
909*8348SEric.Yu@Sun.COM 
910*8348SEric.Yu@Sun.COM /*
911*8348SEric.Yu@Sun.COM  * Generic Upcalls
912*8348SEric.Yu@Sun.COM  */
913*8348SEric.Yu@Sun.COM void
914*8348SEric.Yu@Sun.COM so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
915*8348SEric.Yu@Sun.COM     cred_t *peer_cred, pid_t peer_cpid)
916*8348SEric.Yu@Sun.COM {
917*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
918*8348SEric.Yu@Sun.COM 
919*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
920*8348SEric.Yu@Sun.COM 	ASSERT(so->so_proto_handle != NULL);
921*8348SEric.Yu@Sun.COM 
922*8348SEric.Yu@Sun.COM 	if (peer_cred != NULL) {
923*8348SEric.Yu@Sun.COM 		if (so->so_peercred != NULL)
924*8348SEric.Yu@Sun.COM 			crfree(so->so_peercred);
925*8348SEric.Yu@Sun.COM 		crhold(peer_cred);
926*8348SEric.Yu@Sun.COM 		so->so_peercred = peer_cred;
927*8348SEric.Yu@Sun.COM 		so->so_cpid = peer_cpid;
928*8348SEric.Yu@Sun.COM 	}
929*8348SEric.Yu@Sun.COM 
930*8348SEric.Yu@Sun.COM 	so->so_proto_connid = id;
931*8348SEric.Yu@Sun.COM 	soisconnected(so);
932*8348SEric.Yu@Sun.COM 	/*
933*8348SEric.Yu@Sun.COM 	 * Wake ones who're waiting for conn to become established.
934*8348SEric.Yu@Sun.COM 	 */
935*8348SEric.Yu@Sun.COM 	so_notify_connected(so);
936*8348SEric.Yu@Sun.COM }
937*8348SEric.Yu@Sun.COM 
938*8348SEric.Yu@Sun.COM int
939*8348SEric.Yu@Sun.COM so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
940*8348SEric.Yu@Sun.COM {
941*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
942*8348SEric.Yu@Sun.COM 
943*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
944*8348SEric.Yu@Sun.COM 
945*8348SEric.Yu@Sun.COM 	so->so_proto_connid = id;
946*8348SEric.Yu@Sun.COM 	soisdisconnected(so, error);
947*8348SEric.Yu@Sun.COM 	so_notify_disconnected(so, error);
948*8348SEric.Yu@Sun.COM 
949*8348SEric.Yu@Sun.COM 	return (0);
950*8348SEric.Yu@Sun.COM }
951*8348SEric.Yu@Sun.COM 
952*8348SEric.Yu@Sun.COM void
953*8348SEric.Yu@Sun.COM so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
954*8348SEric.Yu@Sun.COM     uintptr_t arg)
955*8348SEric.Yu@Sun.COM {
956*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
957*8348SEric.Yu@Sun.COM 
958*8348SEric.Yu@Sun.COM 	switch (action) {
959*8348SEric.Yu@Sun.COM 	case SOCK_OPCTL_SHUT_SEND:
960*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
961*8348SEric.Yu@Sun.COM 		socantsendmore(so);
962*8348SEric.Yu@Sun.COM 		so_notify_disconnecting(so);
963*8348SEric.Yu@Sun.COM 		break;
964*8348SEric.Yu@Sun.COM 	case SOCK_OPCTL_SHUT_RECV: {
965*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
966*8348SEric.Yu@Sun.COM 		socantrcvmore(so);
967*8348SEric.Yu@Sun.COM 		so_notify_eof(so);
968*8348SEric.Yu@Sun.COM 		break;
969*8348SEric.Yu@Sun.COM 	}
970*8348SEric.Yu@Sun.COM 	case SOCK_OPCTL_ENAB_ACCEPT:
971*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
972*8348SEric.Yu@Sun.COM 		so->so_state |= SS_ACCEPTCONN;
973*8348SEric.Yu@Sun.COM 		so->so_backlog = (unsigned int)arg;
974*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
975*8348SEric.Yu@Sun.COM 		break;
976*8348SEric.Yu@Sun.COM 	default:
977*8348SEric.Yu@Sun.COM 		ASSERT(0);
978*8348SEric.Yu@Sun.COM 		break;
979*8348SEric.Yu@Sun.COM 	}
980*8348SEric.Yu@Sun.COM }
981*8348SEric.Yu@Sun.COM 
982*8348SEric.Yu@Sun.COM void
983*8348SEric.Yu@Sun.COM so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
984*8348SEric.Yu@Sun.COM {
985*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
986*8348SEric.Yu@Sun.COM 
987*8348SEric.Yu@Sun.COM 	if (qfull) {
988*8348SEric.Yu@Sun.COM 		so_snd_qfull(so);
989*8348SEric.Yu@Sun.COM 	} else {
990*8348SEric.Yu@Sun.COM 		so_snd_qnotfull(so);
991*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
992*8348SEric.Yu@Sun.COM 		so_notify_writable(so);
993*8348SEric.Yu@Sun.COM 	}
994*8348SEric.Yu@Sun.COM }
995*8348SEric.Yu@Sun.COM 
996*8348SEric.Yu@Sun.COM sock_upper_handle_t
997*8348SEric.Yu@Sun.COM so_newconn(sock_upper_handle_t parenthandle,
998*8348SEric.Yu@Sun.COM     sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
999*8348SEric.Yu@Sun.COM     struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1000*8348SEric.Yu@Sun.COM {
1001*8348SEric.Yu@Sun.COM 	struct sonode	*so = (struct sonode *)parenthandle;
1002*8348SEric.Yu@Sun.COM 	struct sonode	*nso;
1003*8348SEric.Yu@Sun.COM 	int error;
1004*8348SEric.Yu@Sun.COM 
1005*8348SEric.Yu@Sun.COM 	ASSERT(proto_handle != NULL);
1006*8348SEric.Yu@Sun.COM 
1007*8348SEric.Yu@Sun.COM 	if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1008*8348SEric.Yu@Sun.COM 	    so->so_acceptq_len >= so->so_backlog)
1009*8348SEric.Yu@Sun.COM 		return (NULL);
1010*8348SEric.Yu@Sun.COM 
1011*8348SEric.Yu@Sun.COM 	nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1012*8348SEric.Yu@Sun.COM 	    &error);
1013*8348SEric.Yu@Sun.COM 	if (nso == NULL)
1014*8348SEric.Yu@Sun.COM 		return (NULL);
1015*8348SEric.Yu@Sun.COM 
1016*8348SEric.Yu@Sun.COM 	if (peer_cred != NULL) {
1017*8348SEric.Yu@Sun.COM 		crhold(peer_cred);
1018*8348SEric.Yu@Sun.COM 		nso->so_peercred = peer_cred;
1019*8348SEric.Yu@Sun.COM 		nso->so_cpid = peer_cpid;
1020*8348SEric.Yu@Sun.COM 	}
1021*8348SEric.Yu@Sun.COM 
1022*8348SEric.Yu@Sun.COM 	(void) so_acceptq_enqueue(so, nso);
1023*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1024*8348SEric.Yu@Sun.COM 	so_notify_newconn(so);
1025*8348SEric.Yu@Sun.COM 
1026*8348SEric.Yu@Sun.COM 	*sock_upcallsp = &so_upcalls;
1027*8348SEric.Yu@Sun.COM 
1028*8348SEric.Yu@Sun.COM 	return ((sock_upper_handle_t)nso);
1029*8348SEric.Yu@Sun.COM }
1030*8348SEric.Yu@Sun.COM 
1031*8348SEric.Yu@Sun.COM void
1032*8348SEric.Yu@Sun.COM so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1033*8348SEric.Yu@Sun.COM {
1034*8348SEric.Yu@Sun.COM 	struct sonode *so;
1035*8348SEric.Yu@Sun.COM 
1036*8348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
1037*8348SEric.Yu@Sun.COM 
1038*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1039*8348SEric.Yu@Sun.COM 
1040*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1041*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1042*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_WROFF)
1043*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1044*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_TAIL)
1045*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_tail = soppp->sopp_tail;
1046*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1047*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1048*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1049*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1050*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1051*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1052*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1053*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1054*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1055*8348SEric.Yu@Sun.COM 		if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1056*8348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1057*8348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1058*8348SEric.Yu@Sun.COM 		} else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1059*8348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1060*8348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1061*8348SEric.Yu@Sun.COM 		}
1062*8348SEric.Yu@Sun.COM 
1063*8348SEric.Yu@Sun.COM 		if (soppp->sopp_zcopyflag & COPYCACHED) {
1064*8348SEric.Yu@Sun.COM 			so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1065*8348SEric.Yu@Sun.COM 		}
1066*8348SEric.Yu@Sun.COM 	}
1067*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1068*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1069*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1070*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1071*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1072*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1073*8348SEric.Yu@Sun.COM 	if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1074*8348SEric.Yu@Sun.COM 		so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1075*8348SEric.Yu@Sun.COM 
1076*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1077*8348SEric.Yu@Sun.COM 
1078*8348SEric.Yu@Sun.COM #ifdef DEBUG
1079*8348SEric.Yu@Sun.COM 	soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1080*8348SEric.Yu@Sun.COM 	    SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1081*8348SEric.Yu@Sun.COM 	    SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1082*8348SEric.Yu@Sun.COM 	    SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ);
1083*8348SEric.Yu@Sun.COM 	ASSERT(soppp->sopp_flags == 0);
1084*8348SEric.Yu@Sun.COM #endif
1085*8348SEric.Yu@Sun.COM }
1086*8348SEric.Yu@Sun.COM 
1087*8348SEric.Yu@Sun.COM /* ARGSUSED */
1088*8348SEric.Yu@Sun.COM ssize_t
1089*8348SEric.Yu@Sun.COM so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1090*8348SEric.Yu@Sun.COM     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1091*8348SEric.Yu@Sun.COM {
1092*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
1093*8348SEric.Yu@Sun.COM 	boolean_t force_push = B_TRUE;
1094*8348SEric.Yu@Sun.COM 	int space_left;
1095*8348SEric.Yu@Sun.COM 	sodirect_t *sodp = so->so_direct;
1096*8348SEric.Yu@Sun.COM 
1097*8348SEric.Yu@Sun.COM 	ASSERT(errorp != NULL);
1098*8348SEric.Yu@Sun.COM 	*errorp = 0;
1099*8348SEric.Yu@Sun.COM 	if (mp == NULL) {
1100*8348SEric.Yu@Sun.COM 		if (msg_size > 0) {
1101*8348SEric.Yu@Sun.COM 			ASSERT(so->so_downcalls->sd_recv_uio != NULL);
1102*8348SEric.Yu@Sun.COM 			mutex_enter(&so->so_lock);
1103*8348SEric.Yu@Sun.COM 			/* the notify functions will drop the lock */
1104*8348SEric.Yu@Sun.COM 			if (flags & MSG_OOB)
1105*8348SEric.Yu@Sun.COM 				so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1106*8348SEric.Yu@Sun.COM 			else
1107*8348SEric.Yu@Sun.COM 				so_notify_data(so, msg_size);
1108*8348SEric.Yu@Sun.COM 			return (0);
1109*8348SEric.Yu@Sun.COM 		}
1110*8348SEric.Yu@Sun.COM 		/*
1111*8348SEric.Yu@Sun.COM 		 * recv space check
1112*8348SEric.Yu@Sun.COM 		 */
1113*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
1114*8348SEric.Yu@Sun.COM 		space_left = so->so_rcvbuf - so->so_rcv_queued;
1115*8348SEric.Yu@Sun.COM 		if (space_left <= 0) {
1116*8348SEric.Yu@Sun.COM 			so->so_flowctrld = B_TRUE;
1117*8348SEric.Yu@Sun.COM 			*errorp = ENOSPC;
1118*8348SEric.Yu@Sun.COM 			space_left = -1;
1119*8348SEric.Yu@Sun.COM 		}
1120*8348SEric.Yu@Sun.COM 		goto done_unlock;
1121*8348SEric.Yu@Sun.COM 	}
1122*8348SEric.Yu@Sun.COM 
1123*8348SEric.Yu@Sun.COM 	ASSERT(mp->b_next == NULL);
1124*8348SEric.Yu@Sun.COM 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1125*8348SEric.Yu@Sun.COM 	ASSERT(msg_size == msgdsize(mp));
1126*8348SEric.Yu@Sun.COM 
1127*8348SEric.Yu@Sun.COM 	if (flags & MSG_OOB) {
1128*8348SEric.Yu@Sun.COM 		so_queue_oob(sock_handle, mp, msg_size);
1129*8348SEric.Yu@Sun.COM 		return (0);
1130*8348SEric.Yu@Sun.COM 	}
1131*8348SEric.Yu@Sun.COM 
1132*8348SEric.Yu@Sun.COM 	if (force_pushp != NULL)
1133*8348SEric.Yu@Sun.COM 		force_push = *force_pushp;
1134*8348SEric.Yu@Sun.COM 
1135*8348SEric.Yu@Sun.COM 	if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1136*8348SEric.Yu@Sun.COM 		/* The read pointer is not aligned correctly for TPI */
1137*8348SEric.Yu@Sun.COM 		zcmn_err(getzoneid(), CE_WARN,
1138*8348SEric.Yu@Sun.COM 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1139*8348SEric.Yu@Sun.COM 		    (void *)mp->b_rptr);
1140*8348SEric.Yu@Sun.COM 		freemsg(mp);
1141*8348SEric.Yu@Sun.COM 		mutex_enter(sodp->sod_lockp);
1142*8348SEric.Yu@Sun.COM 		SOD_UIOAFINI(sodp);
1143*8348SEric.Yu@Sun.COM 		mutex_exit(sodp->sod_lockp);
1144*8348SEric.Yu@Sun.COM 
1145*8348SEric.Yu@Sun.COM 		return (so->so_rcvbuf - so->so_rcv_queued);
1146*8348SEric.Yu@Sun.COM 	}
1147*8348SEric.Yu@Sun.COM 
1148*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1149*8348SEric.Yu@Sun.COM 	if (so->so_state & (SS_FALLBACK_PENDING | SS_FALLBACK_COMP)) {
1150*8348SEric.Yu@Sun.COM 		SOD_DISABLE(sodp);
1151*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
1152*8348SEric.Yu@Sun.COM 		*errorp = EOPNOTSUPP;
1153*8348SEric.Yu@Sun.COM 		return (-1);
1154*8348SEric.Yu@Sun.COM 	}
1155*8348SEric.Yu@Sun.COM 	if (so->so_state & SS_CANTRCVMORE) {
1156*8348SEric.Yu@Sun.COM 		freemsg(mp);
1157*8348SEric.Yu@Sun.COM 		SOD_DISABLE(sodp);
1158*8348SEric.Yu@Sun.COM 		mutex_exit(&so->so_lock);
1159*8348SEric.Yu@Sun.COM 		return (0);
1160*8348SEric.Yu@Sun.COM 	}
1161*8348SEric.Yu@Sun.COM 
1162*8348SEric.Yu@Sun.COM 	/* process the mblk via I/OAT if capable */
1163*8348SEric.Yu@Sun.COM 	if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
1164*8348SEric.Yu@Sun.COM 		if (DB_TYPE(mp) == M_DATA) {
1165*8348SEric.Yu@Sun.COM 			(void) sod_uioa_mblk_init(sodp, mp, msg_size);
1166*8348SEric.Yu@Sun.COM 		} else {
1167*8348SEric.Yu@Sun.COM 			SOD_UIOAFINI(sodp);
1168*8348SEric.Yu@Sun.COM 		}
1169*8348SEric.Yu@Sun.COM 	}
1170*8348SEric.Yu@Sun.COM 
1171*8348SEric.Yu@Sun.COM 	if (mp->b_next == NULL) {
1172*8348SEric.Yu@Sun.COM 		so_enqueue_msg(so, mp, msg_size);
1173*8348SEric.Yu@Sun.COM 	} else {
1174*8348SEric.Yu@Sun.COM 		do {
1175*8348SEric.Yu@Sun.COM 			mblk_t *nmp;
1176*8348SEric.Yu@Sun.COM 
1177*8348SEric.Yu@Sun.COM 			if ((nmp = mp->b_next) != NULL) {
1178*8348SEric.Yu@Sun.COM 				mp->b_next = NULL;
1179*8348SEric.Yu@Sun.COM 			}
1180*8348SEric.Yu@Sun.COM 			so_enqueue_msg(so, mp, msgdsize(mp));
1181*8348SEric.Yu@Sun.COM 			mp = nmp;
1182*8348SEric.Yu@Sun.COM 		} while (mp != NULL);
1183*8348SEric.Yu@Sun.COM 	}
1184*8348SEric.Yu@Sun.COM 
1185*8348SEric.Yu@Sun.COM 	space_left = so->so_rcvbuf - so->so_rcv_queued;
1186*8348SEric.Yu@Sun.COM 	if (space_left <= 0) {
1187*8348SEric.Yu@Sun.COM 		so->so_flowctrld = B_TRUE;
1188*8348SEric.Yu@Sun.COM 		*errorp = ENOSPC;
1189*8348SEric.Yu@Sun.COM 		space_left = -1;
1190*8348SEric.Yu@Sun.COM 	}
1191*8348SEric.Yu@Sun.COM 
1192*8348SEric.Yu@Sun.COM 	if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1193*8348SEric.Yu@Sun.COM 	    so->so_rcv_queued >= so->so_rcv_wanted ||
1194*8348SEric.Yu@Sun.COM 	    (sodp != NULL && so->so_rcv_queued >= sodp->sod_want)) {
1195*8348SEric.Yu@Sun.COM 		SOCKET_TIMER_CANCEL(so);
1196*8348SEric.Yu@Sun.COM 		/*
1197*8348SEric.Yu@Sun.COM 		 * so_notify_data will release the lock
1198*8348SEric.Yu@Sun.COM 		 */
1199*8348SEric.Yu@Sun.COM 		so_notify_data(so, so->so_rcv_queued);
1200*8348SEric.Yu@Sun.COM 
1201*8348SEric.Yu@Sun.COM 		if (force_pushp != NULL)
1202*8348SEric.Yu@Sun.COM 			*force_pushp = B_TRUE;
1203*8348SEric.Yu@Sun.COM 		goto done;
1204*8348SEric.Yu@Sun.COM 	} else if (so->so_rcv_timer_tid == 0) {
1205*8348SEric.Yu@Sun.COM 		/* Make sure the recv push timer is running */
1206*8348SEric.Yu@Sun.COM 		SOCKET_TIMER_START(so);
1207*8348SEric.Yu@Sun.COM 	}
1208*8348SEric.Yu@Sun.COM 
1209*8348SEric.Yu@Sun.COM done_unlock:
1210*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1211*8348SEric.Yu@Sun.COM done:
1212*8348SEric.Yu@Sun.COM 	return (space_left);
1213*8348SEric.Yu@Sun.COM }
1214*8348SEric.Yu@Sun.COM 
1215*8348SEric.Yu@Sun.COM /*
1216*8348SEric.Yu@Sun.COM  * Set the offset of where the oob data is relative to the bytes in
1217*8348SEric.Yu@Sun.COM  * queued. Also generate SIGURG
1218*8348SEric.Yu@Sun.COM  */
1219*8348SEric.Yu@Sun.COM void
1220*8348SEric.Yu@Sun.COM so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1221*8348SEric.Yu@Sun.COM {
1222*8348SEric.Yu@Sun.COM 	struct sonode *so;
1223*8348SEric.Yu@Sun.COM 
1224*8348SEric.Yu@Sun.COM 	ASSERT(offset >= 0);
1225*8348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
1226*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1227*8348SEric.Yu@Sun.COM 	SOD_UIOAFINI(so->so_direct);
1228*8348SEric.Yu@Sun.COM 
1229*8348SEric.Yu@Sun.COM 	/*
1230*8348SEric.Yu@Sun.COM 	 * New urgent data on the way so forget about any old
1231*8348SEric.Yu@Sun.COM 	 * urgent data.
1232*8348SEric.Yu@Sun.COM 	 */
1233*8348SEric.Yu@Sun.COM 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1234*8348SEric.Yu@Sun.COM 
1235*8348SEric.Yu@Sun.COM 	/*
1236*8348SEric.Yu@Sun.COM 	 * Record that urgent data is pending.
1237*8348SEric.Yu@Sun.COM 	 */
1238*8348SEric.Yu@Sun.COM 	so->so_state |= SS_OOBPEND;
1239*8348SEric.Yu@Sun.COM 
1240*8348SEric.Yu@Sun.COM 	if (so->so_oobmsg != NULL) {
1241*8348SEric.Yu@Sun.COM 		dprintso(so, 1, ("sock: discarding old oob\n"));
1242*8348SEric.Yu@Sun.COM 		freemsg(so->so_oobmsg);
1243*8348SEric.Yu@Sun.COM 		so->so_oobmsg = NULL;
1244*8348SEric.Yu@Sun.COM 	}
1245*8348SEric.Yu@Sun.COM 
1246*8348SEric.Yu@Sun.COM 	/*
1247*8348SEric.Yu@Sun.COM 	 * set the offset where the urgent byte is
1248*8348SEric.Yu@Sun.COM 	 */
1249*8348SEric.Yu@Sun.COM 	so->so_oobmark = so->so_rcv_queued + offset;
1250*8348SEric.Yu@Sun.COM 	if (so->so_oobmark == 0)
1251*8348SEric.Yu@Sun.COM 		so->so_state |= SS_RCVATMARK;
1252*8348SEric.Yu@Sun.COM 	else
1253*8348SEric.Yu@Sun.COM 		so->so_state &= ~SS_RCVATMARK;
1254*8348SEric.Yu@Sun.COM 
1255*8348SEric.Yu@Sun.COM 	so_notify_oobsig(so);
1256*8348SEric.Yu@Sun.COM }
1257*8348SEric.Yu@Sun.COM 
1258*8348SEric.Yu@Sun.COM /*
1259*8348SEric.Yu@Sun.COM  * Queue the OOB byte
1260*8348SEric.Yu@Sun.COM  */
1261*8348SEric.Yu@Sun.COM static void
1262*8348SEric.Yu@Sun.COM so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len)
1263*8348SEric.Yu@Sun.COM {
1264*8348SEric.Yu@Sun.COM 	struct sonode *so;
1265*8348SEric.Yu@Sun.COM 
1266*8348SEric.Yu@Sun.COM 	so = (struct sonode *)sock_handle;
1267*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1268*8348SEric.Yu@Sun.COM 	SOD_UIOAFINI(so->so_direct);
1269*8348SEric.Yu@Sun.COM 
1270*8348SEric.Yu@Sun.COM 	ASSERT(mp != NULL);
1271*8348SEric.Yu@Sun.COM 	if (!IS_SO_OOB_INLINE(so)) {
1272*8348SEric.Yu@Sun.COM 		so->so_oobmsg = mp;
1273*8348SEric.Yu@Sun.COM 		so->so_state |= SS_HAVEOOBDATA;
1274*8348SEric.Yu@Sun.COM 	} else {
1275*8348SEric.Yu@Sun.COM 		so_enqueue_msg(so, mp, len);
1276*8348SEric.Yu@Sun.COM 	}
1277*8348SEric.Yu@Sun.COM 
1278*8348SEric.Yu@Sun.COM 	so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1279*8348SEric.Yu@Sun.COM }
1280*8348SEric.Yu@Sun.COM 
1281*8348SEric.Yu@Sun.COM int
1282*8348SEric.Yu@Sun.COM so_close(struct sonode *so, int flag, struct cred *cr)
1283*8348SEric.Yu@Sun.COM {
1284*8348SEric.Yu@Sun.COM 	int error;
1285*8348SEric.Yu@Sun.COM 
1286*8348SEric.Yu@Sun.COM 	error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1287*8348SEric.Yu@Sun.COM 
1288*8348SEric.Yu@Sun.COM 	/*
1289*8348SEric.Yu@Sun.COM 	 * At this point there will be no more upcalls from the protocol
1290*8348SEric.Yu@Sun.COM 	 */
1291*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1292*8348SEric.Yu@Sun.COM 	so_rcv_flush(so);
1293*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1294*8348SEric.Yu@Sun.COM 
1295*8348SEric.Yu@Sun.COM 	return (error);
1296*8348SEric.Yu@Sun.COM }
1297*8348SEric.Yu@Sun.COM 
1298*8348SEric.Yu@Sun.COM void
1299*8348SEric.Yu@Sun.COM so_zcopy_notify(sock_upper_handle_t sock_handle)
1300*8348SEric.Yu@Sun.COM {
1301*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
1302*8348SEric.Yu@Sun.COM 
1303*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1304*8348SEric.Yu@Sun.COM 	so->so_copyflag |= STZCNOTIFY;
1305*8348SEric.Yu@Sun.COM 	cv_broadcast(&so->so_copy_cv);
1306*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1307*8348SEric.Yu@Sun.COM }
1308*8348SEric.Yu@Sun.COM 
1309*8348SEric.Yu@Sun.COM void
1310*8348SEric.Yu@Sun.COM so_set_error(sock_upper_handle_t sock_handle, int error)
1311*8348SEric.Yu@Sun.COM {
1312*8348SEric.Yu@Sun.COM 	struct sonode *so = (struct sonode *)sock_handle;
1313*8348SEric.Yu@Sun.COM 
1314*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1315*8348SEric.Yu@Sun.COM 
1316*8348SEric.Yu@Sun.COM 	soseterror(so, error);
1317*8348SEric.Yu@Sun.COM 
1318*8348SEric.Yu@Sun.COM 	so_notify_error(so);
1319*8348SEric.Yu@Sun.COM }
1320*8348SEric.Yu@Sun.COM 
1321*8348SEric.Yu@Sun.COM /*
1322*8348SEric.Yu@Sun.COM  * so_recvmsg - read data from the socket
1323*8348SEric.Yu@Sun.COM  *
1324*8348SEric.Yu@Sun.COM  * There are two ways of obtaining data; either we ask the protocol to
1325*8348SEric.Yu@Sun.COM  * copy directly into the supplied buffer, or we copy data from the
1326*8348SEric.Yu@Sun.COM  * sonode's receive queue. The decision which one to use depends on
1327*8348SEric.Yu@Sun.COM  * whether the protocol has a sd_recv_uio down call.
1328*8348SEric.Yu@Sun.COM  */
1329*8348SEric.Yu@Sun.COM int
1330*8348SEric.Yu@Sun.COM so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1331*8348SEric.Yu@Sun.COM     struct cred *cr)
1332*8348SEric.Yu@Sun.COM {
1333*8348SEric.Yu@Sun.COM 	rval_t 		rval;
1334*8348SEric.Yu@Sun.COM 	int 		flags = 0;
1335*8348SEric.Yu@Sun.COM 	t_uscalar_t	controllen, namelen;
1336*8348SEric.Yu@Sun.COM 	int 		error = 0;
1337*8348SEric.Yu@Sun.COM 	int ret;
1338*8348SEric.Yu@Sun.COM 	mblk_t		*mctlp = NULL;
1339*8348SEric.Yu@Sun.COM 	union T_primitives *tpr;
1340*8348SEric.Yu@Sun.COM 	void		*control;
1341*8348SEric.Yu@Sun.COM 	ssize_t		saved_resid;
1342*8348SEric.Yu@Sun.COM 	struct uio	*suiop;
1343*8348SEric.Yu@Sun.COM 
1344*8348SEric.Yu@Sun.COM 	SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1345*8348SEric.Yu@Sun.COM 
1346*8348SEric.Yu@Sun.COM 	if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1347*8348SEric.Yu@Sun.COM 	    (so->so_mode & SM_CONNREQUIRED)) {
1348*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
1349*8348SEric.Yu@Sun.COM 		return (ENOTCONN);
1350*8348SEric.Yu@Sun.COM 	}
1351*8348SEric.Yu@Sun.COM 
1352*8348SEric.Yu@Sun.COM 	if (msg->msg_flags & MSG_PEEK)
1353*8348SEric.Yu@Sun.COM 		msg->msg_flags &= ~MSG_WAITALL;
1354*8348SEric.Yu@Sun.COM 
1355*8348SEric.Yu@Sun.COM 	if (so->so_mode & SM_ATOMIC)
1356*8348SEric.Yu@Sun.COM 		msg->msg_flags |= MSG_TRUNC;
1357*8348SEric.Yu@Sun.COM 
1358*8348SEric.Yu@Sun.COM 	if (msg->msg_flags & MSG_OOB) {
1359*8348SEric.Yu@Sun.COM 		if ((so->so_mode & SM_EXDATA) == 0) {
1360*8348SEric.Yu@Sun.COM 			error = EOPNOTSUPP;
1361*8348SEric.Yu@Sun.COM 		} else if (so->so_downcalls->sd_recv_uio != NULL) {
1362*8348SEric.Yu@Sun.COM 			error = (*so->so_downcalls->sd_recv_uio)
1363*8348SEric.Yu@Sun.COM 			    (so->so_proto_handle, uiop, msg, cr);
1364*8348SEric.Yu@Sun.COM 		} else {
1365*8348SEric.Yu@Sun.COM 			error = sorecvoob(so, msg, uiop, msg->msg_flags,
1366*8348SEric.Yu@Sun.COM 			    IS_SO_OOB_INLINE(so));
1367*8348SEric.Yu@Sun.COM 		}
1368*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
1369*8348SEric.Yu@Sun.COM 		return (error);
1370*8348SEric.Yu@Sun.COM 	}
1371*8348SEric.Yu@Sun.COM 
1372*8348SEric.Yu@Sun.COM 	/*
1373*8348SEric.Yu@Sun.COM 	 * If the protocol has the recv down call, then pass the request
1374*8348SEric.Yu@Sun.COM 	 * down.
1375*8348SEric.Yu@Sun.COM 	 */
1376*8348SEric.Yu@Sun.COM 	if (so->so_downcalls->sd_recv_uio != NULL) {
1377*8348SEric.Yu@Sun.COM 		error = (*so->so_downcalls->sd_recv_uio)
1378*8348SEric.Yu@Sun.COM 		    (so->so_proto_handle, uiop, msg, cr);
1379*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
1380*8348SEric.Yu@Sun.COM 		return (error);
1381*8348SEric.Yu@Sun.COM 	}
1382*8348SEric.Yu@Sun.COM 
1383*8348SEric.Yu@Sun.COM 	/*
1384*8348SEric.Yu@Sun.COM 	 * Reading data from the socket buffer
1385*8348SEric.Yu@Sun.COM 	 */
1386*8348SEric.Yu@Sun.COM 	flags = msg->msg_flags;
1387*8348SEric.Yu@Sun.COM 	msg->msg_flags = 0;
1388*8348SEric.Yu@Sun.COM 
1389*8348SEric.Yu@Sun.COM 	/*
1390*8348SEric.Yu@Sun.COM 	 * Set msg_controllen and msg_namelen to zero here to make it
1391*8348SEric.Yu@Sun.COM 	 * simpler in the cases that no control or name is returned.
1392*8348SEric.Yu@Sun.COM 	 */
1393*8348SEric.Yu@Sun.COM 	controllen = msg->msg_controllen;
1394*8348SEric.Yu@Sun.COM 	namelen = msg->msg_namelen;
1395*8348SEric.Yu@Sun.COM 	msg->msg_controllen = 0;
1396*8348SEric.Yu@Sun.COM 	msg->msg_namelen = 0;
1397*8348SEric.Yu@Sun.COM 
1398*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1399*8348SEric.Yu@Sun.COM 	/* Set SOREADLOCKED */
1400*8348SEric.Yu@Sun.COM 	error = so_lock_read_intr(so,
1401*8348SEric.Yu@Sun.COM 	    uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1402*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1403*8348SEric.Yu@Sun.COM 	if (error) {
1404*8348SEric.Yu@Sun.COM 		SO_UNBLOCK_FALLBACK(so);
1405*8348SEric.Yu@Sun.COM 		return (error);
1406*8348SEric.Yu@Sun.COM 	}
1407*8348SEric.Yu@Sun.COM 
1408*8348SEric.Yu@Sun.COM 	suiop = sod_rcv_init(so, flags, &uiop);
1409*8348SEric.Yu@Sun.COM retry:
1410*8348SEric.Yu@Sun.COM 	saved_resid = uiop->uio_resid;
1411*8348SEric.Yu@Sun.COM 	error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1412*8348SEric.Yu@Sun.COM 	if (error != 0) {
1413*8348SEric.Yu@Sun.COM 		goto out;
1414*8348SEric.Yu@Sun.COM 	}
1415*8348SEric.Yu@Sun.COM 	/*
1416*8348SEric.Yu@Sun.COM 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1417*8348SEric.Yu@Sun.COM 	 * For non-datagrams MOREDATA is used to set MSG_EOR.
1418*8348SEric.Yu@Sun.COM 	 */
1419*8348SEric.Yu@Sun.COM 	ASSERT(!(rval.r_val1 & MORECTL));
1420*8348SEric.Yu@Sun.COM 	if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1421*8348SEric.Yu@Sun.COM 		msg->msg_flags |= MSG_TRUNC;
1422*8348SEric.Yu@Sun.COM 	if (mctlp == NULL) {
1423*8348SEric.Yu@Sun.COM 		dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1424*8348SEric.Yu@Sun.COM 
1425*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
1426*8348SEric.Yu@Sun.COM 		/* Set MSG_EOR based on MOREDATA */
1427*8348SEric.Yu@Sun.COM 		if (!(rval.r_val1 & MOREDATA)) {
1428*8348SEric.Yu@Sun.COM 			if (so->so_state & SS_SAVEDEOR) {
1429*8348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
1430*8348SEric.Yu@Sun.COM 				so->so_state &= ~SS_SAVEDEOR;
1431*8348SEric.Yu@Sun.COM 			}
1432*8348SEric.Yu@Sun.COM 		}
1433*8348SEric.Yu@Sun.COM 		/*
1434*8348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
1435*8348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
1436*8348SEric.Yu@Sun.COM 		 */
1437*8348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1438*8348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1439*8348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
1440*8348SEric.Yu@Sun.COM 			goto retry;
1441*8348SEric.Yu@Sun.COM 		}
1442*8348SEric.Yu@Sun.COM 
1443*8348SEric.Yu@Sun.COM 		goto out_locked;
1444*8348SEric.Yu@Sun.COM 	}
1445*8348SEric.Yu@Sun.COM 	/* strsock_proto has already verified length and alignment */
1446*8348SEric.Yu@Sun.COM 	tpr = (union T_primitives *)mctlp->b_rptr;
1447*8348SEric.Yu@Sun.COM 	dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1448*8348SEric.Yu@Sun.COM 	switch (tpr->type) {
1449*8348SEric.Yu@Sun.COM 	case T_DATA_IND: {
1450*8348SEric.Yu@Sun.COM 		/*
1451*8348SEric.Yu@Sun.COM 		 * Set msg_flags to MSG_EOR based on
1452*8348SEric.Yu@Sun.COM 		 * MORE_flag and MOREDATA.
1453*8348SEric.Yu@Sun.COM 		 */
1454*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
1455*8348SEric.Yu@Sun.COM 		so->so_state &= ~SS_SAVEDEOR;
1456*8348SEric.Yu@Sun.COM 		if (!(tpr->data_ind.MORE_flag & 1)) {
1457*8348SEric.Yu@Sun.COM 			if (!(rval.r_val1 & MOREDATA))
1458*8348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
1459*8348SEric.Yu@Sun.COM 			else
1460*8348SEric.Yu@Sun.COM 				so->so_state |= SS_SAVEDEOR;
1461*8348SEric.Yu@Sun.COM 		}
1462*8348SEric.Yu@Sun.COM 		freemsg(mctlp);
1463*8348SEric.Yu@Sun.COM 		/*
1464*8348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
1465*8348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
1466*8348SEric.Yu@Sun.COM 		 */
1467*8348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1468*8348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1469*8348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
1470*8348SEric.Yu@Sun.COM 			goto retry;
1471*8348SEric.Yu@Sun.COM 		}
1472*8348SEric.Yu@Sun.COM 		goto out_locked;
1473*8348SEric.Yu@Sun.COM 	}
1474*8348SEric.Yu@Sun.COM 	case T_UNITDATA_IND: {
1475*8348SEric.Yu@Sun.COM 		void *addr;
1476*8348SEric.Yu@Sun.COM 		t_uscalar_t addrlen;
1477*8348SEric.Yu@Sun.COM 		void *abuf;
1478*8348SEric.Yu@Sun.COM 		t_uscalar_t optlen;
1479*8348SEric.Yu@Sun.COM 		void *opt;
1480*8348SEric.Yu@Sun.COM 
1481*8348SEric.Yu@Sun.COM 		if (namelen != 0) {
1482*8348SEric.Yu@Sun.COM 			/* Caller wants source address */
1483*8348SEric.Yu@Sun.COM 			addrlen = tpr->unitdata_ind.SRC_length;
1484*8348SEric.Yu@Sun.COM 			addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1485*8348SEric.Yu@Sun.COM 			    addrlen, 1);
1486*8348SEric.Yu@Sun.COM 			if (addr == NULL) {
1487*8348SEric.Yu@Sun.COM 				freemsg(mctlp);
1488*8348SEric.Yu@Sun.COM 				error = EPROTO;
1489*8348SEric.Yu@Sun.COM 				eprintsoline(so, error);
1490*8348SEric.Yu@Sun.COM 				goto out;
1491*8348SEric.Yu@Sun.COM 			}
1492*8348SEric.Yu@Sun.COM 			ASSERT(so->so_family != AF_UNIX);
1493*8348SEric.Yu@Sun.COM 		}
1494*8348SEric.Yu@Sun.COM 		optlen = tpr->unitdata_ind.OPT_length;
1495*8348SEric.Yu@Sun.COM 		if (optlen != 0) {
1496*8348SEric.Yu@Sun.COM 			t_uscalar_t ncontrollen;
1497*8348SEric.Yu@Sun.COM 
1498*8348SEric.Yu@Sun.COM 			/*
1499*8348SEric.Yu@Sun.COM 			 * Extract any source address option.
1500*8348SEric.Yu@Sun.COM 			 * Determine how large cmsg buffer is needed.
1501*8348SEric.Yu@Sun.COM 			 */
1502*8348SEric.Yu@Sun.COM 			opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1503*8348SEric.Yu@Sun.COM 			    optlen, __TPI_ALIGN_SIZE);
1504*8348SEric.Yu@Sun.COM 
1505*8348SEric.Yu@Sun.COM 			if (opt == NULL) {
1506*8348SEric.Yu@Sun.COM 				freemsg(mctlp);
1507*8348SEric.Yu@Sun.COM 				error = EPROTO;
1508*8348SEric.Yu@Sun.COM 				eprintsoline(so, error);
1509*8348SEric.Yu@Sun.COM 				goto out;
1510*8348SEric.Yu@Sun.COM 			}
1511*8348SEric.Yu@Sun.COM 			if (so->so_family == AF_UNIX)
1512*8348SEric.Yu@Sun.COM 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1513*8348SEric.Yu@Sun.COM 			ncontrollen = so_cmsglen(mctlp, opt, optlen,
1514*8348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2));
1515*8348SEric.Yu@Sun.COM 			if (controllen != 0)
1516*8348SEric.Yu@Sun.COM 				controllen = ncontrollen;
1517*8348SEric.Yu@Sun.COM 			else if (ncontrollen != 0)
1518*8348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_CTRUNC;
1519*8348SEric.Yu@Sun.COM 		} else {
1520*8348SEric.Yu@Sun.COM 			controllen = 0;
1521*8348SEric.Yu@Sun.COM 		}
1522*8348SEric.Yu@Sun.COM 
1523*8348SEric.Yu@Sun.COM 		if (namelen != 0) {
1524*8348SEric.Yu@Sun.COM 			/*
1525*8348SEric.Yu@Sun.COM 			 * Return address to caller.
1526*8348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
1527*8348SEric.Yu@Sun.COM 			 * exceeds msg_namelen.
1528*8348SEric.Yu@Sun.COM 			 * NOTE: AF_UNIX NUL termination is ensured by
1529*8348SEric.Yu@Sun.COM 			 * the sender's copyin_name().
1530*8348SEric.Yu@Sun.COM 			 */
1531*8348SEric.Yu@Sun.COM 			abuf = kmem_alloc(addrlen, KM_SLEEP);
1532*8348SEric.Yu@Sun.COM 
1533*8348SEric.Yu@Sun.COM 			bcopy(addr, abuf, addrlen);
1534*8348SEric.Yu@Sun.COM 			msg->msg_name = abuf;
1535*8348SEric.Yu@Sun.COM 			msg->msg_namelen = addrlen;
1536*8348SEric.Yu@Sun.COM 		}
1537*8348SEric.Yu@Sun.COM 
1538*8348SEric.Yu@Sun.COM 		if (controllen != 0) {
1539*8348SEric.Yu@Sun.COM 			/*
1540*8348SEric.Yu@Sun.COM 			 * Return control msg to caller.
1541*8348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
1542*8348SEric.Yu@Sun.COM 			 * exceeds msg_controllen.
1543*8348SEric.Yu@Sun.COM 			 */
1544*8348SEric.Yu@Sun.COM 			control = kmem_zalloc(controllen, KM_SLEEP);
1545*8348SEric.Yu@Sun.COM 
1546*8348SEric.Yu@Sun.COM 			error = so_opt2cmsg(mctlp, opt, optlen,
1547*8348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2), control, controllen);
1548*8348SEric.Yu@Sun.COM 			if (error) {
1549*8348SEric.Yu@Sun.COM 				freemsg(mctlp);
1550*8348SEric.Yu@Sun.COM 				if (msg->msg_namelen != 0)
1551*8348SEric.Yu@Sun.COM 					kmem_free(msg->msg_name,
1552*8348SEric.Yu@Sun.COM 					    msg->msg_namelen);
1553*8348SEric.Yu@Sun.COM 				kmem_free(control, controllen);
1554*8348SEric.Yu@Sun.COM 				eprintsoline(so, error);
1555*8348SEric.Yu@Sun.COM 				goto out;
1556*8348SEric.Yu@Sun.COM 			}
1557*8348SEric.Yu@Sun.COM 			msg->msg_control = control;
1558*8348SEric.Yu@Sun.COM 			msg->msg_controllen = controllen;
1559*8348SEric.Yu@Sun.COM 		}
1560*8348SEric.Yu@Sun.COM 
1561*8348SEric.Yu@Sun.COM 		freemsg(mctlp);
1562*8348SEric.Yu@Sun.COM 		goto out;
1563*8348SEric.Yu@Sun.COM 	}
1564*8348SEric.Yu@Sun.COM 	case T_OPTDATA_IND: {
1565*8348SEric.Yu@Sun.COM 		struct T_optdata_req *tdr;
1566*8348SEric.Yu@Sun.COM 		void *opt;
1567*8348SEric.Yu@Sun.COM 		t_uscalar_t optlen;
1568*8348SEric.Yu@Sun.COM 
1569*8348SEric.Yu@Sun.COM 		tdr = (struct T_optdata_req *)mctlp->b_rptr;
1570*8348SEric.Yu@Sun.COM 		optlen = tdr->OPT_length;
1571*8348SEric.Yu@Sun.COM 		if (optlen != 0) {
1572*8348SEric.Yu@Sun.COM 			t_uscalar_t ncontrollen;
1573*8348SEric.Yu@Sun.COM 			/*
1574*8348SEric.Yu@Sun.COM 			 * Determine how large cmsg buffer is needed.
1575*8348SEric.Yu@Sun.COM 			 */
1576*8348SEric.Yu@Sun.COM 			opt = sogetoff(mctlp,
1577*8348SEric.Yu@Sun.COM 			    tpr->optdata_ind.OPT_offset, optlen,
1578*8348SEric.Yu@Sun.COM 			    __TPI_ALIGN_SIZE);
1579*8348SEric.Yu@Sun.COM 
1580*8348SEric.Yu@Sun.COM 			if (opt == NULL) {
1581*8348SEric.Yu@Sun.COM 				freemsg(mctlp);
1582*8348SEric.Yu@Sun.COM 				error = EPROTO;
1583*8348SEric.Yu@Sun.COM 				eprintsoline(so, error);
1584*8348SEric.Yu@Sun.COM 				goto out;
1585*8348SEric.Yu@Sun.COM 			}
1586*8348SEric.Yu@Sun.COM 
1587*8348SEric.Yu@Sun.COM 			ncontrollen = so_cmsglen(mctlp, opt, optlen,
1588*8348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2));
1589*8348SEric.Yu@Sun.COM 			if (controllen != 0)
1590*8348SEric.Yu@Sun.COM 				controllen = ncontrollen;
1591*8348SEric.Yu@Sun.COM 			else if (ncontrollen != 0)
1592*8348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_CTRUNC;
1593*8348SEric.Yu@Sun.COM 		} else {
1594*8348SEric.Yu@Sun.COM 			controllen = 0;
1595*8348SEric.Yu@Sun.COM 		}
1596*8348SEric.Yu@Sun.COM 
1597*8348SEric.Yu@Sun.COM 		if (controllen != 0) {
1598*8348SEric.Yu@Sun.COM 			/*
1599*8348SEric.Yu@Sun.COM 			 * Return control msg to caller.
1600*8348SEric.Yu@Sun.COM 			 * Caller handles truncation if length
1601*8348SEric.Yu@Sun.COM 			 * exceeds msg_controllen.
1602*8348SEric.Yu@Sun.COM 			 */
1603*8348SEric.Yu@Sun.COM 			control = kmem_zalloc(controllen, KM_SLEEP);
1604*8348SEric.Yu@Sun.COM 
1605*8348SEric.Yu@Sun.COM 			error = so_opt2cmsg(mctlp, opt, optlen,
1606*8348SEric.Yu@Sun.COM 			    !(flags & MSG_XPG4_2), control, controllen);
1607*8348SEric.Yu@Sun.COM 			if (error) {
1608*8348SEric.Yu@Sun.COM 				freemsg(mctlp);
1609*8348SEric.Yu@Sun.COM 				kmem_free(control, controllen);
1610*8348SEric.Yu@Sun.COM 				eprintsoline(so, error);
1611*8348SEric.Yu@Sun.COM 				goto out;
1612*8348SEric.Yu@Sun.COM 			}
1613*8348SEric.Yu@Sun.COM 			msg->msg_control = control;
1614*8348SEric.Yu@Sun.COM 			msg->msg_controllen = controllen;
1615*8348SEric.Yu@Sun.COM 		}
1616*8348SEric.Yu@Sun.COM 
1617*8348SEric.Yu@Sun.COM 		/*
1618*8348SEric.Yu@Sun.COM 		 * Set msg_flags to MSG_EOR based on
1619*8348SEric.Yu@Sun.COM 		 * DATA_flag and MOREDATA.
1620*8348SEric.Yu@Sun.COM 		 */
1621*8348SEric.Yu@Sun.COM 		mutex_enter(&so->so_lock);
1622*8348SEric.Yu@Sun.COM 		so->so_state &= ~SS_SAVEDEOR;
1623*8348SEric.Yu@Sun.COM 		if (!(tpr->data_ind.MORE_flag & 1)) {
1624*8348SEric.Yu@Sun.COM 			if (!(rval.r_val1 & MOREDATA))
1625*8348SEric.Yu@Sun.COM 				msg->msg_flags |= MSG_EOR;
1626*8348SEric.Yu@Sun.COM 			else
1627*8348SEric.Yu@Sun.COM 				so->so_state |= SS_SAVEDEOR;
1628*8348SEric.Yu@Sun.COM 		}
1629*8348SEric.Yu@Sun.COM 		freemsg(mctlp);
1630*8348SEric.Yu@Sun.COM 		/*
1631*8348SEric.Yu@Sun.COM 		 * If some data was received (i.e. not EOF) and the
1632*8348SEric.Yu@Sun.COM 		 * read/recv* has not been satisfied wait for some more.
1633*8348SEric.Yu@Sun.COM 		 * Not possible to wait if control info was received.
1634*8348SEric.Yu@Sun.COM 		 */
1635*8348SEric.Yu@Sun.COM 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1636*8348SEric.Yu@Sun.COM 		    controllen == 0 &&
1637*8348SEric.Yu@Sun.COM 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1638*8348SEric.Yu@Sun.COM 			mutex_exit(&so->so_lock);
1639*8348SEric.Yu@Sun.COM 			goto retry;
1640*8348SEric.Yu@Sun.COM 		}
1641*8348SEric.Yu@Sun.COM 		goto out_locked;
1642*8348SEric.Yu@Sun.COM 	}
1643*8348SEric.Yu@Sun.COM 	default:
1644*8348SEric.Yu@Sun.COM 		cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1645*8348SEric.Yu@Sun.COM 		    tpr->type);
1646*8348SEric.Yu@Sun.COM 		freemsg(mctlp);
1647*8348SEric.Yu@Sun.COM 		error = EPROTO;
1648*8348SEric.Yu@Sun.COM 		ASSERT(0);
1649*8348SEric.Yu@Sun.COM 	}
1650*8348SEric.Yu@Sun.COM out:
1651*8348SEric.Yu@Sun.COM 	mutex_enter(&so->so_lock);
1652*8348SEric.Yu@Sun.COM out_locked:
1653*8348SEric.Yu@Sun.COM 	/* The sod_lockp pointers to the sonode so_lock */
1654*8348SEric.Yu@Sun.COM 	ret = sod_rcv_done(so, suiop, uiop);
1655*8348SEric.Yu@Sun.COM 	if (ret != 0 && error == 0)
1656*8348SEric.Yu@Sun.COM 		error = ret;
1657*8348SEric.Yu@Sun.COM 
1658*8348SEric.Yu@Sun.COM 	so_unlock_read(so);	/* Clear SOREADLOCKED */
1659*8348SEric.Yu@Sun.COM 	mutex_exit(&so->so_lock);
1660*8348SEric.Yu@Sun.COM 
1661*8348SEric.Yu@Sun.COM 	SO_UNBLOCK_FALLBACK(so);
1662*8348SEric.Yu@Sun.COM 
1663*8348SEric.Yu@Sun.COM 	return (error);
1664*8348SEric.Yu@Sun.COM }
1665*8348SEric.Yu@Sun.COM 
1666*8348SEric.Yu@Sun.COM sonodeops_t so_sonodeops = {
1667*8348SEric.Yu@Sun.COM 	so_init,		/* sop_init	*/
1668*8348SEric.Yu@Sun.COM 	so_accept,		/* sop_accept   */
1669*8348SEric.Yu@Sun.COM 	so_bind,		/* sop_bind	*/
1670*8348SEric.Yu@Sun.COM 	so_listen,		/* sop_listen   */
1671*8348SEric.Yu@Sun.COM 	so_connect,		/* sop_connect  */
1672*8348SEric.Yu@Sun.COM 	so_recvmsg,		/* sop_recvmsg  */
1673*8348SEric.Yu@Sun.COM 	so_sendmsg,		/* sop_sendmsg  */
1674*8348SEric.Yu@Sun.COM 	so_sendmblk,		/* sop_sendmblk */
1675*8348SEric.Yu@Sun.COM 	so_getpeername,		/* sop_getpeername */
1676*8348SEric.Yu@Sun.COM 	so_getsockname,		/* sop_getsockname */
1677*8348SEric.Yu@Sun.COM 	so_shutdown,		/* sop_shutdown */
1678*8348SEric.Yu@Sun.COM 	so_getsockopt,		/* sop_getsockopt */
1679*8348SEric.Yu@Sun.COM 	so_setsockopt,		/* sop_setsockopt */
1680*8348SEric.Yu@Sun.COM 	so_ioctl,		/* sop_ioctl    */
1681*8348SEric.Yu@Sun.COM 	so_poll,		/* sop_poll	*/
1682*8348SEric.Yu@Sun.COM 	so_close,		/* sop_close */
1683*8348SEric.Yu@Sun.COM };
1684*8348SEric.Yu@Sun.COM 
1685*8348SEric.Yu@Sun.COM sock_upcalls_t so_upcalls = {
1686*8348SEric.Yu@Sun.COM 	so_newconn,
1687*8348SEric.Yu@Sun.COM 	so_connected,
1688*8348SEric.Yu@Sun.COM 	so_disconnected,
1689*8348SEric.Yu@Sun.COM 	so_opctl,
1690*8348SEric.Yu@Sun.COM 	so_queue_msg,
1691*8348SEric.Yu@Sun.COM 	so_set_prop,
1692*8348SEric.Yu@Sun.COM 	so_txq_full,
1693*8348SEric.Yu@Sun.COM 	so_signal_oob,
1694*8348SEric.Yu@Sun.COM 	so_zcopy_notify,
1695*8348SEric.Yu@Sun.COM 	so_set_error
1696*8348SEric.Yu@Sun.COM };
1697