xref: /onnv-gate/usr/src/uts/common/fs/sockfs/socktpi.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <sys/t_lock.h>
31*0Sstevel@tonic-gate #include <sys/param.h>
32*0Sstevel@tonic-gate #include <sys/systm.h>
33*0Sstevel@tonic-gate #include <sys/buf.h>
34*0Sstevel@tonic-gate #include <sys/conf.h>
35*0Sstevel@tonic-gate #include <sys/cred.h>
36*0Sstevel@tonic-gate #include <sys/kmem.h>
37*0Sstevel@tonic-gate #include <sys/sysmacros.h>
38*0Sstevel@tonic-gate #include <sys/vfs.h>
39*0Sstevel@tonic-gate #include <sys/vnode.h>
40*0Sstevel@tonic-gate #include <sys/debug.h>
41*0Sstevel@tonic-gate #include <sys/errno.h>
42*0Sstevel@tonic-gate #include <sys/time.h>
43*0Sstevel@tonic-gate #include <sys/file.h>
44*0Sstevel@tonic-gate #include <sys/open.h>
45*0Sstevel@tonic-gate #include <sys/user.h>
46*0Sstevel@tonic-gate #include <sys/termios.h>
47*0Sstevel@tonic-gate #include <sys/stream.h>
48*0Sstevel@tonic-gate #include <sys/strsubr.h>
49*0Sstevel@tonic-gate #include <sys/strsun.h>
50*0Sstevel@tonic-gate #include <sys/ddi.h>
51*0Sstevel@tonic-gate #include <sys/esunddi.h>
52*0Sstevel@tonic-gate #include <sys/flock.h>
53*0Sstevel@tonic-gate #include <sys/modctl.h>
54*0Sstevel@tonic-gate #include <sys/vtrace.h>
55*0Sstevel@tonic-gate #include <sys/cmn_err.h>
56*0Sstevel@tonic-gate #include <sys/pathname.h>
57*0Sstevel@tonic-gate 
58*0Sstevel@tonic-gate #include <sys/socket.h>
59*0Sstevel@tonic-gate #include <sys/socketvar.h>
60*0Sstevel@tonic-gate #include <netinet/in.h>
61*0Sstevel@tonic-gate #include <sys/un.h>
62*0Sstevel@tonic-gate #include <sys/strsun.h>
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate #include <sys/tiuser.h>
65*0Sstevel@tonic-gate #define	_SUN_TPI_VERSION	2
66*0Sstevel@tonic-gate #include <sys/tihdr.h>
67*0Sstevel@tonic-gate #include <sys/timod.h>		/* TI_GETMYNAME, TI_GETPEERNAME */
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate #include <c2/audit.h>
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate #include <inet/common.h>
72*0Sstevel@tonic-gate #include <inet/ip.h>
73*0Sstevel@tonic-gate #include <inet/ip6.h>
74*0Sstevel@tonic-gate #include <inet/tcp.h>
75*0Sstevel@tonic-gate 
76*0Sstevel@tonic-gate #include <fs/sockfs/nl7c.h>
77*0Sstevel@tonic-gate #include <sys/zone.h>
78*0Sstevel@tonic-gate 
79*0Sstevel@tonic-gate /*
80*0Sstevel@tonic-gate  * Possible failures when memory can't be allocated. The documented behavior:
81*0Sstevel@tonic-gate  *
82*0Sstevel@tonic-gate  * 		5.5:			4.X:		XNET:
83*0Sstevel@tonic-gate  * accept:	ENOMEM/ENOSR/EINTR	- (EINTR)	ENOMEM/ENOBUFS/ENOSR/
84*0Sstevel@tonic-gate  *							EINTR
85*0Sstevel@tonic-gate  *	(4.X does not document EINTR but returns it)
86*0Sstevel@tonic-gate  * bind:	ENOSR			-		ENOBUFS/ENOSR
87*0Sstevel@tonic-gate  * connect: 	EINTR			EINTR		ENOBUFS/ENOSR/EINTR
88*0Sstevel@tonic-gate  * getpeername:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
89*0Sstevel@tonic-gate  * getsockname:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
90*0Sstevel@tonic-gate  *	(4.X getpeername and getsockname do not fail in practice)
91*0Sstevel@tonic-gate  * getsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
92*0Sstevel@tonic-gate  * listen:	-			-		ENOBUFS
93*0Sstevel@tonic-gate  * recv:	ENOMEM/ENOSR/EINTR	EINTR		ENOBUFS/ENOMEM/ENOSR/
94*0Sstevel@tonic-gate  *							EINTR
95*0Sstevel@tonic-gate  * send:	ENOMEM/ENOSR/EINTR	ENOBUFS/EINTR	ENOBUFS/ENOMEM/ENOSR/
96*0Sstevel@tonic-gate  *							EINTR
97*0Sstevel@tonic-gate  * setsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
98*0Sstevel@tonic-gate  * shutdown:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
99*0Sstevel@tonic-gate  * socket:	ENOMEM/ENOSR		ENOBUFS		ENOBUFS/ENOMEM/ENOSR
100*0Sstevel@tonic-gate  * socketpair:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
101*0Sstevel@tonic-gate  *
102*0Sstevel@tonic-gate  * Resolution. When allocation fails:
103*0Sstevel@tonic-gate  *	recv: return EINTR
104*0Sstevel@tonic-gate  *	send: return EINTR
105*0Sstevel@tonic-gate  *	connect, accept: EINTR
106*0Sstevel@tonic-gate  *	bind, listen, shutdown (unbind, unix_close, disconnect): sleep
107*0Sstevel@tonic-gate  *	socket, socketpair: ENOBUFS
108*0Sstevel@tonic-gate  *	getpeername, getsockname: sleep
109*0Sstevel@tonic-gate  *	getsockopt, setsockopt: sleep
110*0Sstevel@tonic-gate  */
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate #ifdef SOCK_TEST
113*0Sstevel@tonic-gate /*
114*0Sstevel@tonic-gate  * Variables that make sockfs do something other than the standard TPI
115*0Sstevel@tonic-gate  * for the AF_INET transports.
116*0Sstevel@tonic-gate  *
117*0Sstevel@tonic-gate  * solisten_tpi_tcp:
118*0Sstevel@tonic-gate  *	TCP can handle a O_T_BIND_REQ with an increased backlog even though
119*0Sstevel@tonic-gate  *	the transport is already bound. This is needed to avoid loosing the
120*0Sstevel@tonic-gate  *	port number should listen() do a T_UNBIND_REQ followed by a
121*0Sstevel@tonic-gate  *	O_T_BIND_REQ.
122*0Sstevel@tonic-gate  *
123*0Sstevel@tonic-gate  * soconnect_tpi_udp:
124*0Sstevel@tonic-gate  *	UDP and ICMP can handle a T_CONN_REQ.
125*0Sstevel@tonic-gate  *	This is needed to make the sequence of connect(), getsockname()
126*0Sstevel@tonic-gate  *	return the local IP address used to send packets to the connected to
127*0Sstevel@tonic-gate  *	destination.
128*0Sstevel@tonic-gate  *
129*0Sstevel@tonic-gate  * soconnect_tpi_tcp:
130*0Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
131*0Sstevel@tonic-gate  *	Set this to non-zero to send TPI conformant messages to TCP in this
132*0Sstevel@tonic-gate  *	respect. This is a performance optimization.
133*0Sstevel@tonic-gate  *
134*0Sstevel@tonic-gate  * soaccept_tpi_tcp:
135*0Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without the acceptor being bound.
136*0Sstevel@tonic-gate  *	This is a performance optimization that has been picked up in XTI.
137*0Sstevel@tonic-gate  *
138*0Sstevel@tonic-gate  * soaccept_tpi_multioptions:
139*0Sstevel@tonic-gate  *	When inheriting SOL_SOCKET options from the listener to the accepting
140*0Sstevel@tonic-gate  *	socket send them as a single message for AF_INET{,6}.
141*0Sstevel@tonic-gate  */
142*0Sstevel@tonic-gate int solisten_tpi_tcp = 0;
143*0Sstevel@tonic-gate int soconnect_tpi_udp = 0;
144*0Sstevel@tonic-gate int soconnect_tpi_tcp = 0;
145*0Sstevel@tonic-gate int soaccept_tpi_tcp = 0;
146*0Sstevel@tonic-gate int soaccept_tpi_multioptions = 1;
147*0Sstevel@tonic-gate #else /* SOCK_TEST */
148*0Sstevel@tonic-gate #define	soconnect_tpi_tcp	0
149*0Sstevel@tonic-gate #define	soconnect_tpi_udp	0
150*0Sstevel@tonic-gate #define	solisten_tpi_tcp	0
151*0Sstevel@tonic-gate #define	soaccept_tpi_tcp	0
152*0Sstevel@tonic-gate #define	soaccept_tpi_multioptions	1
153*0Sstevel@tonic-gate #endif /* SOCK_TEST */
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate #ifdef SOCK_TEST
156*0Sstevel@tonic-gate extern int do_useracc;
157*0Sstevel@tonic-gate extern clock_t sock_test_timelimit;
158*0Sstevel@tonic-gate #endif /* SOCK_TEST */
159*0Sstevel@tonic-gate 
160*0Sstevel@tonic-gate /*
161*0Sstevel@tonic-gate  * Some X/Open added checks might have to be backed out to keep SunOS 4.X
162*0Sstevel@tonic-gate  * applications working. Turn on this flag to disable these checks.
163*0Sstevel@tonic-gate  */
164*0Sstevel@tonic-gate int xnet_skip_checks = 0;
165*0Sstevel@tonic-gate int xnet_check_print = 0;
166*0Sstevel@tonic-gate int xnet_truncate_print = 0;
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate extern	void sigintr(k_sigset_t *, int);
169*0Sstevel@tonic-gate extern	void sigunintr(k_sigset_t *);
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate extern	void *nl7c_lookup_addr(void *, t_uscalar_t);
172*0Sstevel@tonic-gate extern	void *nl7c_add_addr(void *, t_uscalar_t);
173*0Sstevel@tonic-gate extern	void nl7c_listener_addr(void *, queue_t *);
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate static int	sotpi_unbind(struct sonode *, int);
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate /* TPI sockfs sonode operations */
178*0Sstevel@tonic-gate static int	sotpi_accept(struct sonode *, int, struct sonode **);
179*0Sstevel@tonic-gate static int	sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
180*0Sstevel@tonic-gate 		    int);
181*0Sstevel@tonic-gate static int	sotpi_connect(struct sonode *, const struct sockaddr *,
182*0Sstevel@tonic-gate 		    socklen_t, int, int);
183*0Sstevel@tonic-gate static int	sotpi_listen(struct sonode *, int);
184*0Sstevel@tonic-gate static int	sotpi_sendmsg(struct sonode *, struct nmsghdr *,
185*0Sstevel@tonic-gate 		    struct uio *);
186*0Sstevel@tonic-gate static int	sotpi_shutdown(struct sonode *, int);
187*0Sstevel@tonic-gate static int	sotpi_getsockname(struct sonode *);
188*0Sstevel@tonic-gate 
189*0Sstevel@tonic-gate sonodeops_t sotpi_sonodeops = {
190*0Sstevel@tonic-gate 	sotpi_accept,		/* sop_accept		*/
191*0Sstevel@tonic-gate 	sotpi_bind,		/* sop_bind		*/
192*0Sstevel@tonic-gate 	sotpi_listen,		/* sop_listen		*/
193*0Sstevel@tonic-gate 	sotpi_connect,		/* sop_connect		*/
194*0Sstevel@tonic-gate 	sotpi_recvmsg,		/* sop_recvmsg		*/
195*0Sstevel@tonic-gate 	sotpi_sendmsg,		/* sop_sendmsg		*/
196*0Sstevel@tonic-gate 	sotpi_getpeername,	/* sop_getpeername	*/
197*0Sstevel@tonic-gate 	sotpi_getsockname,	/* sop_getsockname	*/
198*0Sstevel@tonic-gate 	sotpi_shutdown,		/* sop_shutdown		*/
199*0Sstevel@tonic-gate 	sotpi_getsockopt,	/* sop_getsockopt	*/
200*0Sstevel@tonic-gate 	sotpi_setsockopt	/* sop_setsockopt	*/
201*0Sstevel@tonic-gate };
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate /*
204*0Sstevel@tonic-gate  * Common create code for socket and accept. If tso is set the values
205*0Sstevel@tonic-gate  * from that node is used instead of issuing a T_INFO_REQ.
206*0Sstevel@tonic-gate  *
207*0Sstevel@tonic-gate  * Assumes that the caller has a VN_HOLD on accessvp.
208*0Sstevel@tonic-gate  * The VN_RELE will occur either when sotpi_create() fails or when
209*0Sstevel@tonic-gate  * the returned sonode is freed.
210*0Sstevel@tonic-gate  */
211*0Sstevel@tonic-gate struct sonode *
212*0Sstevel@tonic-gate sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
213*0Sstevel@tonic-gate     struct sonode *tso, int *errorp)
214*0Sstevel@tonic-gate {
215*0Sstevel@tonic-gate 	struct sonode	*so;
216*0Sstevel@tonic-gate 	vnode_t		*vp;
217*0Sstevel@tonic-gate 	int		flags, error;
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate 	ASSERT(accessvp != NULL);
220*0Sstevel@tonic-gate 	vp = makesockvp(accessvp, domain, type, protocol);
221*0Sstevel@tonic-gate 	ASSERT(vp != NULL);
222*0Sstevel@tonic-gate 	so = VTOSO(vp);
223*0Sstevel@tonic-gate 
224*0Sstevel@tonic-gate 	flags = FREAD|FWRITE;
225*0Sstevel@tonic-gate 	if (tso != NULL) {
226*0Sstevel@tonic-gate 		if ((tso->so_state & (SS_TCP_FAST_ACCEPT)) != 0) {
227*0Sstevel@tonic-gate 			flags |= SO_ACCEPTOR|SO_SOCKSTR;
228*0Sstevel@tonic-gate 			so->so_state |= SS_TCP_FAST_ACCEPT;
229*0Sstevel@tonic-gate 		}
230*0Sstevel@tonic-gate 	} else {
231*0Sstevel@tonic-gate 		if ((so->so_type == SOCK_STREAM) &&
232*0Sstevel@tonic-gate 		    (so->so_family == AF_INET || so->so_family == AF_INET6)) {
233*0Sstevel@tonic-gate 			flags |= SO_SOCKSTR;
234*0Sstevel@tonic-gate 			so->so_state |= SS_TCP_FAST_ACCEPT;
235*0Sstevel@tonic-gate 		}
236*0Sstevel@tonic-gate 	}
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 	/*
239*0Sstevel@tonic-gate 	 * Tell local transport that it is talking to sockets.
240*0Sstevel@tonic-gate 	 */
241*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
242*0Sstevel@tonic-gate 		flags |= SO_SOCKSTR;
243*0Sstevel@tonic-gate 	}
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate 	if (error = socktpi_open(&vp, flags, CRED())) {
246*0Sstevel@tonic-gate 		VN_RELE(vp);
247*0Sstevel@tonic-gate 		*errorp = error;
248*0Sstevel@tonic-gate 		return (NULL);
249*0Sstevel@tonic-gate 	}
250*0Sstevel@tonic-gate 
251*0Sstevel@tonic-gate 	if (error = so_strinit(so, tso)) {
252*0Sstevel@tonic-gate 		(void) VOP_CLOSE(vp, 0, 1, 0, CRED());
253*0Sstevel@tonic-gate 		VN_RELE(vp);
254*0Sstevel@tonic-gate 		*errorp = error;
255*0Sstevel@tonic-gate 		return (NULL);
256*0Sstevel@tonic-gate 	}
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate 	if (version == SOV_DEFAULT)
259*0Sstevel@tonic-gate 		version = so_default_version;
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate 	so->so_version = (short)version;
262*0Sstevel@tonic-gate 	return (so);
263*0Sstevel@tonic-gate }
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate /*
266*0Sstevel@tonic-gate  * Bind the socket to an unspecified address in sockfs only.
267*0Sstevel@tonic-gate  * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
268*0Sstevel@tonic-gate  * required in all cases.
269*0Sstevel@tonic-gate  */
270*0Sstevel@tonic-gate static void
271*0Sstevel@tonic-gate so_automatic_bind(struct sonode *so)
272*0Sstevel@tonic-gate {
273*0Sstevel@tonic-gate 	ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
274*0Sstevel@tonic-gate 
275*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
276*0Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND));
277*0Sstevel@tonic-gate 	ASSERT(so->so_unbind_mp);
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate 	ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
280*0Sstevel@tonic-gate 	bzero(so->so_laddr_sa, so->so_laddr_len);
281*0Sstevel@tonic-gate 	so->so_laddr_sa->sa_family = so->so_family;
282*0Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
283*0Sstevel@tonic-gate }
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate /*
287*0Sstevel@tonic-gate  * bind the socket.
288*0Sstevel@tonic-gate  *
289*0Sstevel@tonic-gate  * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
290*0Sstevel@tonic-gate  * are passed in we allow rebinding. Note that for backwards compatibility
291*0Sstevel@tonic-gate  * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
292*0Sstevel@tonic-gate  * Thus the rebinding code is currently not executed.
293*0Sstevel@tonic-gate  *
294*0Sstevel@tonic-gate  * The constraints for rebinding are:
295*0Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
296*0Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
297*0Sstevel@tonic-gate  *   and no listen() has been done.
298*0Sstevel@tonic-gate  * This rebinding code was added based on some language in the XNET book
299*0Sstevel@tonic-gate  * about not returning EINVAL it the protocol allows rebinding. However,
300*0Sstevel@tonic-gate  * this language is not present in the Posix socket draft. Thus maybe the
301*0Sstevel@tonic-gate  * rebinding logic should be deleted from the source.
302*0Sstevel@tonic-gate  *
303*0Sstevel@tonic-gate  * A null "name" can be used to unbind the socket if:
304*0Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
305*0Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
306*0Sstevel@tonic-gate  *   and no listen() has been done.
307*0Sstevel@tonic-gate  */
308*0Sstevel@tonic-gate static int
309*0Sstevel@tonic-gate sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
310*0Sstevel@tonic-gate     socklen_t namelen, int backlog, int flags)
311*0Sstevel@tonic-gate {
312*0Sstevel@tonic-gate 	struct T_bind_req	bind_req;
313*0Sstevel@tonic-gate 	struct T_bind_ack	*bind_ack;
314*0Sstevel@tonic-gate 	int			error = 0;
315*0Sstevel@tonic-gate 	mblk_t			*mp;
316*0Sstevel@tonic-gate 	void			*addr;
317*0Sstevel@tonic-gate 	t_uscalar_t		addrlen;
318*0Sstevel@tonic-gate 	int			unbind_on_err = 1;
319*0Sstevel@tonic-gate 	boolean_t		clear_acceptconn_on_err = B_FALSE;
320*0Sstevel@tonic-gate 	boolean_t		restore_backlog_on_err = B_FALSE;
321*0Sstevel@tonic-gate 	int			save_so_backlog;
322*0Sstevel@tonic-gate 	t_scalar_t		PRIM_type = O_T_BIND_REQ;
323*0Sstevel@tonic-gate 	boolean_t		tcp_udp_xport;
324*0Sstevel@tonic-gate 	void			*nl7c = NULL;
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
327*0Sstevel@tonic-gate 		so, name, namelen, backlog, flags,
328*0Sstevel@tonic-gate 		pr_state(so->so_state, so->so_mode)));
329*0Sstevel@tonic-gate 
330*0Sstevel@tonic-gate 	tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
331*0Sstevel@tonic-gate 
332*0Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
333*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
334*0Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
335*0Sstevel@tonic-gate 	} else {
336*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
337*0Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
338*0Sstevel@tonic-gate 	}
339*0Sstevel@tonic-gate 
340*0Sstevel@tonic-gate 	/*
341*0Sstevel@tonic-gate 	 * Make sure that there is a preallocated unbind_req message
342*0Sstevel@tonic-gate 	 * before binding. This message allocated when the socket is
343*0Sstevel@tonic-gate 	 * created  but it might be have been consumed.
344*0Sstevel@tonic-gate 	 */
345*0Sstevel@tonic-gate 	if (so->so_unbind_mp == NULL) {
346*0Sstevel@tonic-gate 		dprintso(so, 1, ("sobind: allocating unbind_req\n"));
347*0Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
348*0Sstevel@tonic-gate 		so->so_unbind_mp =
349*0Sstevel@tonic-gate 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
350*0Sstevel@tonic-gate 	}
351*0Sstevel@tonic-gate 
352*0Sstevel@tonic-gate 	if (flags & _SOBIND_REBIND) {
353*0Sstevel@tonic-gate 		/*
354*0Sstevel@tonic-gate 		 * Called from solisten after doing an sotpi_unbind() or
355*0Sstevel@tonic-gate 		 * potentially without the unbind (latter for AF_INET{,6}).
356*0Sstevel@tonic-gate 		 */
357*0Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
360*0Sstevel@tonic-gate 			ASSERT(so->so_ux_bound_vp);
361*0Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
362*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
363*0Sstevel@tonic-gate 			dprintso(so, 1,
364*0Sstevel@tonic-gate 			("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n",
365*0Sstevel@tonic-gate 			    addrlen,
366*0Sstevel@tonic-gate 			    ((struct so_ux_addr *)addr)->soua_vp,
367*0Sstevel@tonic-gate 			    so->so_ux_bound_vp));
368*0Sstevel@tonic-gate 		} else {
369*0Sstevel@tonic-gate 			addr = so->so_laddr_sa;
370*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_laddr_len;
371*0Sstevel@tonic-gate 		}
372*0Sstevel@tonic-gate 	} else if (flags & _SOBIND_UNSPEC) {
373*0Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate 		/*
376*0Sstevel@tonic-gate 		 * The caller checked SS_ISBOUND but not necessarily
377*0Sstevel@tonic-gate 		 * under so_lock
378*0Sstevel@tonic-gate 		 */
379*0Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
380*0Sstevel@tonic-gate 			/* No error */
381*0Sstevel@tonic-gate 			goto done;
382*0Sstevel@tonic-gate 		}
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate 		/* Set an initial local address */
385*0Sstevel@tonic-gate 		switch (so->so_family) {
386*0Sstevel@tonic-gate 		case AF_UNIX:
387*0Sstevel@tonic-gate 			/*
388*0Sstevel@tonic-gate 			 * Use an address with same size as struct sockaddr
389*0Sstevel@tonic-gate 			 * just like BSD.
390*0Sstevel@tonic-gate 			 */
391*0Sstevel@tonic-gate 			so->so_laddr_len =
392*0Sstevel@tonic-gate 				(socklen_t)sizeof (struct sockaddr);
393*0Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
394*0Sstevel@tonic-gate 			bzero(so->so_laddr_sa, so->so_laddr_len);
395*0Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
396*0Sstevel@tonic-gate 
397*0Sstevel@tonic-gate 			/*
398*0Sstevel@tonic-gate 			 * Pass down an address with the implicit bind
399*0Sstevel@tonic-gate 			 * magic number and the rest all zeros.
400*0Sstevel@tonic-gate 			 * The transport will return a unique address.
401*0Sstevel@tonic-gate 			 */
402*0Sstevel@tonic-gate 			so->so_ux_laddr.soua_vp = NULL;
403*0Sstevel@tonic-gate 			so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
404*0Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
405*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
406*0Sstevel@tonic-gate 			break;
407*0Sstevel@tonic-gate 
408*0Sstevel@tonic-gate 		case AF_INET:
409*0Sstevel@tonic-gate 		case AF_INET6:
410*0Sstevel@tonic-gate 			/*
411*0Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
412*0Sstevel@tonic-gate 			 * Set the address in sockfs to have the sa_family.
413*0Sstevel@tonic-gate 			 */
414*0Sstevel@tonic-gate 			so->so_laddr_len = (so->so_family == AF_INET) ?
415*0Sstevel@tonic-gate 			    (socklen_t)sizeof (sin_t) :
416*0Sstevel@tonic-gate 			    (socklen_t)sizeof (sin6_t);
417*0Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
418*0Sstevel@tonic-gate 			bzero(so->so_laddr_sa, so->so_laddr_len);
419*0Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
420*0Sstevel@tonic-gate 			addr = NULL;
421*0Sstevel@tonic-gate 			addrlen = 0;
422*0Sstevel@tonic-gate 			break;
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate 		default:
425*0Sstevel@tonic-gate 			/*
426*0Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
427*0Sstevel@tonic-gate 			 * Set the address in sockfs to be zero length.
428*0Sstevel@tonic-gate 			 *
429*0Sstevel@tonic-gate 			 * Can not assume there is a sa_family for all
430*0Sstevel@tonic-gate 			 * protocol families. For example, AF_X25 does not
431*0Sstevel@tonic-gate 			 * have a family field.
432*0Sstevel@tonic-gate 			 */
433*0Sstevel@tonic-gate 			so->so_laddr_len = 0;	/* XXX correct? */
434*0Sstevel@tonic-gate 			bzero(so->so_laddr_sa, so->so_laddr_len);
435*0Sstevel@tonic-gate 			addr = NULL;
436*0Sstevel@tonic-gate 			addrlen = 0;
437*0Sstevel@tonic-gate 			break;
438*0Sstevel@tonic-gate 		}
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate 	} else {
441*0Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
442*0Sstevel@tonic-gate 			/*
443*0Sstevel@tonic-gate 			 * If it is ok to rebind the socket, first unbind
444*0Sstevel@tonic-gate 			 * with the transport. A rebind to the NULL address
445*0Sstevel@tonic-gate 			 * is interpreted as an unbind.
446*0Sstevel@tonic-gate 			 * Note that a bind to NULL in BSD does unbind the
447*0Sstevel@tonic-gate 			 * socket but it fails with EINVAL.
448*0Sstevel@tonic-gate 			 * Note that regular sockets set SOV_SOCKBSD i.e.
449*0Sstevel@tonic-gate 			 * _SOBIND_SOCKBSD gets set here hence no type of
450*0Sstevel@tonic-gate 			 * socket does currently allow rebinding.
451*0Sstevel@tonic-gate 			 *
452*0Sstevel@tonic-gate 			 * If the name is NULL just do an unbind.
453*0Sstevel@tonic-gate 			 */
454*0Sstevel@tonic-gate 			if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
455*0Sstevel@tonic-gate 			    name != NULL) {
456*0Sstevel@tonic-gate 				error = EINVAL;
457*0Sstevel@tonic-gate 				unbind_on_err = 0;
458*0Sstevel@tonic-gate 				eprintsoline(so, error);
459*0Sstevel@tonic-gate 				goto done;
460*0Sstevel@tonic-gate 			}
461*0Sstevel@tonic-gate 			if ((so->so_mode & SM_CONNREQUIRED) &&
462*0Sstevel@tonic-gate 			    (so->so_state & SS_CANTREBIND)) {
463*0Sstevel@tonic-gate 				error = EINVAL;
464*0Sstevel@tonic-gate 				unbind_on_err = 0;
465*0Sstevel@tonic-gate 				eprintsoline(so, error);
466*0Sstevel@tonic-gate 				goto done;
467*0Sstevel@tonic-gate 			}
468*0Sstevel@tonic-gate 			error = sotpi_unbind(so, 0);
469*0Sstevel@tonic-gate 			if (error) {
470*0Sstevel@tonic-gate 				eprintsoline(so, error);
471*0Sstevel@tonic-gate 				goto done;
472*0Sstevel@tonic-gate 			}
473*0Sstevel@tonic-gate 			ASSERT(!(so->so_state & SS_ISBOUND));
474*0Sstevel@tonic-gate 			if (name == NULL) {
475*0Sstevel@tonic-gate 				so->so_state &=
476*0Sstevel@tonic-gate 					~(SS_ISCONNECTED|SS_ISCONNECTING);
477*0Sstevel@tonic-gate 				goto done;
478*0Sstevel@tonic-gate 			}
479*0Sstevel@tonic-gate 		}
480*0Sstevel@tonic-gate 		/* X/Open requires this check */
481*0Sstevel@tonic-gate 		if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
482*0Sstevel@tonic-gate 			if (xnet_check_print) {
483*0Sstevel@tonic-gate 				printf("sockfs: X/Open bind state check "
484*0Sstevel@tonic-gate 				    "caused EINVAL\n");
485*0Sstevel@tonic-gate 			}
486*0Sstevel@tonic-gate 			error = EINVAL;
487*0Sstevel@tonic-gate 			goto done;
488*0Sstevel@tonic-gate 		}
489*0Sstevel@tonic-gate 
490*0Sstevel@tonic-gate 		switch (so->so_family) {
491*0Sstevel@tonic-gate 		case AF_UNIX:
492*0Sstevel@tonic-gate 			/*
493*0Sstevel@tonic-gate 			 * All AF_UNIX addresses are nul terminated
494*0Sstevel@tonic-gate 			 * when copied (copyin_name) in so the minimum
495*0Sstevel@tonic-gate 			 * length is 3 bytes.
496*0Sstevel@tonic-gate 			 */
497*0Sstevel@tonic-gate 			if (name == NULL ||
498*0Sstevel@tonic-gate 			    (ssize_t)namelen <= sizeof (short) + 1) {
499*0Sstevel@tonic-gate 				error = EISDIR;
500*0Sstevel@tonic-gate 				eprintsoline(so, error);
501*0Sstevel@tonic-gate 				goto done;
502*0Sstevel@tonic-gate 			}
503*0Sstevel@tonic-gate 			/*
504*0Sstevel@tonic-gate 			 * Verify so_family matches the bound family.
505*0Sstevel@tonic-gate 			 * BSD does not check this for AF_UNIX resulting
506*0Sstevel@tonic-gate 			 * in funny mknods.
507*0Sstevel@tonic-gate 			 */
508*0Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
509*0Sstevel@tonic-gate 				error = EAFNOSUPPORT;
510*0Sstevel@tonic-gate 				goto done;
511*0Sstevel@tonic-gate 			}
512*0Sstevel@tonic-gate 			break;
513*0Sstevel@tonic-gate 		case AF_INET:
514*0Sstevel@tonic-gate 			if (name == NULL) {
515*0Sstevel@tonic-gate 				error = EINVAL;
516*0Sstevel@tonic-gate 				eprintsoline(so, error);
517*0Sstevel@tonic-gate 				goto done;
518*0Sstevel@tonic-gate 			}
519*0Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin_t)) {
520*0Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
521*0Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
522*0Sstevel@tonic-gate 				eprintsoline(so, error);
523*0Sstevel@tonic-gate 				goto done;
524*0Sstevel@tonic-gate 			}
525*0Sstevel@tonic-gate 			if ((flags & _SOBIND_XPG4_2) &&
526*0Sstevel@tonic-gate 			    (name->sa_family != so->so_family)) {
527*0Sstevel@tonic-gate 				/*
528*0Sstevel@tonic-gate 				 * This check has to be made for X/Open
529*0Sstevel@tonic-gate 				 * sockets however application failures have
530*0Sstevel@tonic-gate 				 * been observed when it is applied to
531*0Sstevel@tonic-gate 				 * all sockets.
532*0Sstevel@tonic-gate 				 */
533*0Sstevel@tonic-gate 				error = EAFNOSUPPORT;
534*0Sstevel@tonic-gate 				eprintsoline(so, error);
535*0Sstevel@tonic-gate 				goto done;
536*0Sstevel@tonic-gate 			}
537*0Sstevel@tonic-gate 			/*
538*0Sstevel@tonic-gate 			 * Force a zero sa_family to match so_family.
539*0Sstevel@tonic-gate 			 *
540*0Sstevel@tonic-gate 			 * Some programs like inetd(1M) don't set the
541*0Sstevel@tonic-gate 			 * family field. Other programs leave
542*0Sstevel@tonic-gate 			 * sin_family set to garbage - SunOS 4.X does
543*0Sstevel@tonic-gate 			 * not check the family field on a bind.
544*0Sstevel@tonic-gate 			 * We use the family field that
545*0Sstevel@tonic-gate 			 * was passed in to the socket() call.
546*0Sstevel@tonic-gate 			 */
547*0Sstevel@tonic-gate 			name->sa_family = so->so_family;
548*0Sstevel@tonic-gate 			break;
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate 		case AF_INET6: {
551*0Sstevel@tonic-gate #ifdef DEBUG
552*0Sstevel@tonic-gate 			sin6_t *sin6 = (sin6_t *)name;
553*0Sstevel@tonic-gate #endif /* DEBUG */
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 			if (name == NULL) {
556*0Sstevel@tonic-gate 				error = EINVAL;
557*0Sstevel@tonic-gate 				eprintsoline(so, error);
558*0Sstevel@tonic-gate 				goto done;
559*0Sstevel@tonic-gate 			}
560*0Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin6_t)) {
561*0Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
562*0Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
563*0Sstevel@tonic-gate 				eprintsoline(so, error);
564*0Sstevel@tonic-gate 				goto done;
565*0Sstevel@tonic-gate 			}
566*0Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
567*0Sstevel@tonic-gate 				/*
568*0Sstevel@tonic-gate 				 * With IPv6 we require the family to match
569*0Sstevel@tonic-gate 				 * unlike in IPv4.
570*0Sstevel@tonic-gate 				 */
571*0Sstevel@tonic-gate 				error = EAFNOSUPPORT;
572*0Sstevel@tonic-gate 				eprintsoline(so, error);
573*0Sstevel@tonic-gate 				goto done;
574*0Sstevel@tonic-gate 			}
575*0Sstevel@tonic-gate #ifdef DEBUG
576*0Sstevel@tonic-gate 			/*
577*0Sstevel@tonic-gate 			 * Verify that apps don't forget to clear
578*0Sstevel@tonic-gate 			 * sin6_scope_id etc
579*0Sstevel@tonic-gate 			 */
580*0Sstevel@tonic-gate 			if (sin6->sin6_scope_id != 0 &&
581*0Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
582*0Sstevel@tonic-gate 				cmn_err(CE_WARN,
583*0Sstevel@tonic-gate 				    "bind with uninitialized sin6_scope_id "
584*0Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
585*0Sstevel@tonic-gate 				    (int)sin6->sin6_scope_id,
586*0Sstevel@tonic-gate 				    (int)curproc->p_pid);
587*0Sstevel@tonic-gate 			}
588*0Sstevel@tonic-gate 			if (sin6->__sin6_src_id != 0) {
589*0Sstevel@tonic-gate 				cmn_err(CE_WARN,
590*0Sstevel@tonic-gate 				    "bind with uninitialized __sin6_src_id "
591*0Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
592*0Sstevel@tonic-gate 				    (int)sin6->__sin6_src_id,
593*0Sstevel@tonic-gate 				    (int)curproc->p_pid);
594*0Sstevel@tonic-gate 			}
595*0Sstevel@tonic-gate #endif /* DEBUG */
596*0Sstevel@tonic-gate 			break;
597*0Sstevel@tonic-gate 		}
598*0Sstevel@tonic-gate 		default:
599*0Sstevel@tonic-gate 			/*
600*0Sstevel@tonic-gate 			 * Don't do any length or sa_family check to allow
601*0Sstevel@tonic-gate 			 * non-sockaddr style addresses.
602*0Sstevel@tonic-gate 			 */
603*0Sstevel@tonic-gate 			if (name == NULL) {
604*0Sstevel@tonic-gate 				error = EINVAL;
605*0Sstevel@tonic-gate 				eprintsoline(so, error);
606*0Sstevel@tonic-gate 				goto done;
607*0Sstevel@tonic-gate 			}
608*0Sstevel@tonic-gate 			break;
609*0Sstevel@tonic-gate 		}
610*0Sstevel@tonic-gate 
611*0Sstevel@tonic-gate 		if (namelen > (t_uscalar_t)so->so_laddr_maxlen) {
612*0Sstevel@tonic-gate 			error = ENAMETOOLONG;
613*0Sstevel@tonic-gate 			eprintsoline(so, error);
614*0Sstevel@tonic-gate 			goto done;
615*0Sstevel@tonic-gate 		}
616*0Sstevel@tonic-gate 		/*
617*0Sstevel@tonic-gate 		 * Save local address.
618*0Sstevel@tonic-gate 		 */
619*0Sstevel@tonic-gate 		so->so_laddr_len = (socklen_t)namelen;
620*0Sstevel@tonic-gate 		ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
621*0Sstevel@tonic-gate 		bcopy(name, so->so_laddr_sa, namelen);
622*0Sstevel@tonic-gate 
623*0Sstevel@tonic-gate 		addr = so->so_laddr_sa;
624*0Sstevel@tonic-gate 		addrlen = (t_uscalar_t)so->so_laddr_len;
625*0Sstevel@tonic-gate 		switch (so->so_family) {
626*0Sstevel@tonic-gate 		case AF_INET6:
627*0Sstevel@tonic-gate 		case AF_INET:
628*0Sstevel@tonic-gate 			break;
629*0Sstevel@tonic-gate 		case AF_UNIX: {
630*0Sstevel@tonic-gate 			struct sockaddr_un *soun =
631*0Sstevel@tonic-gate 				(struct sockaddr_un *)so->so_laddr_sa;
632*0Sstevel@tonic-gate 			struct vnode *vp;
633*0Sstevel@tonic-gate 			struct vattr vattr;
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 			ASSERT(so->so_ux_bound_vp == NULL);
636*0Sstevel@tonic-gate 			/*
637*0Sstevel@tonic-gate 			 * Create vnode for the specified path name.
638*0Sstevel@tonic-gate 			 * Keep vnode held with a reference in so_ux_bound_vp.
639*0Sstevel@tonic-gate 			 * Use the vnode pointer as the address used in the
640*0Sstevel@tonic-gate 			 * bind with the transport.
641*0Sstevel@tonic-gate 			 *
642*0Sstevel@tonic-gate 			 * Use the same mode as in BSD. In particular this does
643*0Sstevel@tonic-gate 			 * not observe the umask.
644*0Sstevel@tonic-gate 			 */
645*0Sstevel@tonic-gate 			/* MAXPATHLEN + soun_family + nul termination */
646*0Sstevel@tonic-gate 			if (so->so_laddr_len >
647*0Sstevel@tonic-gate 			    (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
648*0Sstevel@tonic-gate 				error = ENAMETOOLONG;
649*0Sstevel@tonic-gate 				eprintsoline(so, error);
650*0Sstevel@tonic-gate 				goto done;
651*0Sstevel@tonic-gate 			}
652*0Sstevel@tonic-gate 			vattr.va_type = VSOCK;
653*0Sstevel@tonic-gate 			vattr.va_mode = 0777 & ~u.u_cmask;
654*0Sstevel@tonic-gate 			vattr.va_mask = AT_TYPE|AT_MODE;
655*0Sstevel@tonic-gate 			/* NOTE: holding so_lock */
656*0Sstevel@tonic-gate 			error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
657*0Sstevel@tonic-gate 						EXCL, 0, &vp, CRMKNOD, 0, 0);
658*0Sstevel@tonic-gate 			if (error) {
659*0Sstevel@tonic-gate 				if (error == EEXIST)
660*0Sstevel@tonic-gate 					error = EADDRINUSE;
661*0Sstevel@tonic-gate 				eprintsoline(so, error);
662*0Sstevel@tonic-gate 				goto done;
663*0Sstevel@tonic-gate 			}
664*0Sstevel@tonic-gate 			/*
665*0Sstevel@tonic-gate 			 * Establish pointer from the underlying filesystem
666*0Sstevel@tonic-gate 			 * vnode to the socket node.
667*0Sstevel@tonic-gate 			 * so_ux_bound_vp and v_stream->sd_vnode form the
668*0Sstevel@tonic-gate 			 * cross-linkage between the underlying filesystem
669*0Sstevel@tonic-gate 			 * node and the socket node.
670*0Sstevel@tonic-gate 			 */
671*0Sstevel@tonic-gate 			ASSERT(SOTOV(so)->v_stream);
672*0Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
673*0Sstevel@tonic-gate 			vp->v_stream = SOTOV(so)->v_stream;
674*0Sstevel@tonic-gate 			so->so_ux_bound_vp = vp;
675*0Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
676*0Sstevel@tonic-gate 
677*0Sstevel@tonic-gate 			/*
678*0Sstevel@tonic-gate 			 * Use the vnode pointer value as a unique address
679*0Sstevel@tonic-gate 			 * (together with the magic number to avoid conflicts
680*0Sstevel@tonic-gate 			 * with implicit binds) in the transport provider.
681*0Sstevel@tonic-gate 			 */
682*0Sstevel@tonic-gate 			so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp;
683*0Sstevel@tonic-gate 			so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
684*0Sstevel@tonic-gate 			addr = &so->so_ux_laddr;
685*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
686*0Sstevel@tonic-gate 			dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
687*0Sstevel@tonic-gate 			    addrlen,
688*0Sstevel@tonic-gate 			    ((struct so_ux_addr *)addr)->soua_vp));
689*0Sstevel@tonic-gate 			break;
690*0Sstevel@tonic-gate 		}
691*0Sstevel@tonic-gate 		} /* end switch (so->so_family) */
692*0Sstevel@tonic-gate 	}
693*0Sstevel@tonic-gate 
694*0Sstevel@tonic-gate 	/*
695*0Sstevel@tonic-gate 	 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
696*0Sstevel@tonic-gate 	 * the transport can start passing up T_CONN_IND messages
697*0Sstevel@tonic-gate 	 * as soon as it receives the bind req and strsock_proto()
698*0Sstevel@tonic-gate 	 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
699*0Sstevel@tonic-gate 	 */
700*0Sstevel@tonic-gate 	if (flags & _SOBIND_LISTEN) {
701*0Sstevel@tonic-gate 		if ((so->so_state & SS_ACCEPTCONN) == 0)
702*0Sstevel@tonic-gate 			clear_acceptconn_on_err = B_TRUE;
703*0Sstevel@tonic-gate 		save_so_backlog = so->so_backlog;
704*0Sstevel@tonic-gate 		restore_backlog_on_err = B_TRUE;
705*0Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
706*0Sstevel@tonic-gate 		so->so_backlog = backlog;
707*0Sstevel@tonic-gate 	}
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	/*
710*0Sstevel@tonic-gate 	 * If NL7C addr(s) have been configured check for addr/port match,
711*0Sstevel@tonic-gate 	 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
712*0Sstevel@tonic-gate 	 *
713*0Sstevel@tonic-gate 	 * NL7C supports the TCP transport only so check AF_INET and AF_INET6
714*0Sstevel@tonic-gate 	 * family sockets only. If match mark as such.
715*0Sstevel@tonic-gate 	 */
716*0Sstevel@tonic-gate 	if ((nl7c_enabled && addr != NULL &&
717*0Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
718*0Sstevel@tonic-gate 	    (nl7c = nl7c_lookup_addr(addr, addrlen))) ||
719*0Sstevel@tonic-gate 	    so->so_nl7c_flags == NL7C_AF_NCA) {
720*0Sstevel@tonic-gate 		/*
721*0Sstevel@tonic-gate 		 * NL7C is not supported in non-global zones,
722*0Sstevel@tonic-gate 		 * we enforce this restriction here.
723*0Sstevel@tonic-gate 		 */
724*0Sstevel@tonic-gate 		if (so->so_zoneid == GLOBAL_ZONEID) {
725*0Sstevel@tonic-gate 			/* An NL7C socket, mark it */
726*0Sstevel@tonic-gate 			so->so_nl7c_flags |= NL7C_ENABLED;
727*0Sstevel@tonic-gate 		} else
728*0Sstevel@tonic-gate 			nl7c = NULL;
729*0Sstevel@tonic-gate 	}
730*0Sstevel@tonic-gate 	/*
731*0Sstevel@tonic-gate 	 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
732*0Sstevel@tonic-gate 	 * for other transports we will send in a O_T_BIND_REQ.
733*0Sstevel@tonic-gate 	 */
734*0Sstevel@tonic-gate 	if (tcp_udp_xport &&
735*0Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6))
736*0Sstevel@tonic-gate 		PRIM_type = T_BIND_REQ;
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 	bind_req.PRIM_type = PRIM_type;
739*0Sstevel@tonic-gate 	bind_req.ADDR_length = addrlen;
740*0Sstevel@tonic-gate 	bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
741*0Sstevel@tonic-gate 	bind_req.CONIND_number = backlog;
742*0Sstevel@tonic-gate 	/* NOTE: holding so_lock while sleeping */
743*0Sstevel@tonic-gate 	mp = soallocproto2(&bind_req, sizeof (bind_req),
744*0Sstevel@tonic-gate 				addr, addrlen, 0, _ALLOC_SLEEP);
745*0Sstevel@tonic-gate 	so->so_state &= ~SS_LADDR_VALID;
746*0Sstevel@tonic-gate 	/* Done using so_laddr_sa - can drop the lock */
747*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
748*0Sstevel@tonic-gate 
749*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
750*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
751*0Sstevel@tonic-gate 	if (error) {
752*0Sstevel@tonic-gate 		eprintsoline(so, error);
753*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
754*0Sstevel@tonic-gate 		goto done;
755*0Sstevel@tonic-gate 	}
756*0Sstevel@tonic-gate 
757*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
758*0Sstevel@tonic-gate 	error = sowaitprim(so, PRIM_type, T_BIND_ACK,
759*0Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
760*0Sstevel@tonic-gate 	if (error) {
761*0Sstevel@tonic-gate 		eprintsoline(so, error);
762*0Sstevel@tonic-gate 		goto done;
763*0Sstevel@tonic-gate 	}
764*0Sstevel@tonic-gate 	ASSERT(mp);
765*0Sstevel@tonic-gate 	/*
766*0Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
767*0Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the bind
768*0Sstevel@tonic-gate 	 * is allowed to complete.
769*0Sstevel@tonic-gate 	 */
770*0Sstevel@tonic-gate 
771*0Sstevel@tonic-gate 	/* Mark as bound. This will be undone if we detect errors below. */
772*0Sstevel@tonic-gate 	if (flags & _SOBIND_NOXLATE) {
773*0Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
774*0Sstevel@tonic-gate 		so->so_state |= SS_FADDR_NOXLATE;
775*0Sstevel@tonic-gate 	}
776*0Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
777*0Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
778*0Sstevel@tonic-gate 	ASSERT(so->so_unbind_mp);
779*0Sstevel@tonic-gate 
780*0Sstevel@tonic-gate 	/* note that we've already set SS_ACCEPTCONN above */
781*0Sstevel@tonic-gate 
782*0Sstevel@tonic-gate 	/*
783*0Sstevel@tonic-gate 	 * Recompute addrlen - an unspecied bind sent down an
784*0Sstevel@tonic-gate 	 * address of length zero but we expect the appropriate length
785*0Sstevel@tonic-gate 	 * in return.
786*0Sstevel@tonic-gate 	 */
787*0Sstevel@tonic-gate 	addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
788*0Sstevel@tonic-gate 	    sizeof (so->so_ux_laddr) : so->so_laddr_len);
789*0Sstevel@tonic-gate 
790*0Sstevel@tonic-gate 	bind_ack = (struct T_bind_ack *)mp->b_rptr;
791*0Sstevel@tonic-gate 	/*
792*0Sstevel@tonic-gate 	 * The alignment restriction is really too strict but
793*0Sstevel@tonic-gate 	 * we want enough alignment to inspect the fields of
794*0Sstevel@tonic-gate 	 * a sockaddr_in.
795*0Sstevel@tonic-gate 	 */
796*0Sstevel@tonic-gate 	addr = sogetoff(mp, bind_ack->ADDR_offset,
797*0Sstevel@tonic-gate 			bind_ack->ADDR_length,
798*0Sstevel@tonic-gate 			__TPI_ALIGN_SIZE);
799*0Sstevel@tonic-gate 	if (addr == NULL) {
800*0Sstevel@tonic-gate 		freemsg(mp);
801*0Sstevel@tonic-gate 		error = EPROTO;
802*0Sstevel@tonic-gate 		eprintsoline(so, error);
803*0Sstevel@tonic-gate 		goto done;
804*0Sstevel@tonic-gate 	}
805*0Sstevel@tonic-gate 	if (!(flags & _SOBIND_UNSPEC)) {
806*0Sstevel@tonic-gate 		/*
807*0Sstevel@tonic-gate 		 * Verify that the transport didn't return something we
808*0Sstevel@tonic-gate 		 * did not want e.g. an address other than what we asked for.
809*0Sstevel@tonic-gate 		 *
810*0Sstevel@tonic-gate 		 * NOTE: These checks would go away if/when we switch to
811*0Sstevel@tonic-gate 		 * using the new TPI (in which the transport would fail
812*0Sstevel@tonic-gate 		 * the request instead of assigning a different address).
813*0Sstevel@tonic-gate 		 *
814*0Sstevel@tonic-gate 		 * NOTE2: For protocols that we don't know (i.e. any
815*0Sstevel@tonic-gate 		 * other than AF_INET6, AF_INET and AF_UNIX), we
816*0Sstevel@tonic-gate 		 * cannot know if the transport should be expected to
817*0Sstevel@tonic-gate 		 * return the same address as that requested.
818*0Sstevel@tonic-gate 		 *
819*0Sstevel@tonic-gate 		 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
820*0Sstevel@tonic-gate 		 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
821*0Sstevel@tonic-gate 		 *
822*0Sstevel@tonic-gate 		 * For example, in the case of netatalk it may be
823*0Sstevel@tonic-gate 		 * inappropriate for the transport to return the
824*0Sstevel@tonic-gate 		 * requested address (as it may have allocated a local
825*0Sstevel@tonic-gate 		 * port number in behaviour similar to that of an
826*0Sstevel@tonic-gate 		 * AF_INET bind request with a port number of zero).
827*0Sstevel@tonic-gate 		 *
828*0Sstevel@tonic-gate 		 * Given the definition of O_T_BIND_REQ, where the
829*0Sstevel@tonic-gate 		 * transport may bind to an address other than the
830*0Sstevel@tonic-gate 		 * requested address, it's not possible to determine
831*0Sstevel@tonic-gate 		 * whether a returned address that differs from the
832*0Sstevel@tonic-gate 		 * requested address is a reason to fail (because the
833*0Sstevel@tonic-gate 		 * requested address was not available) or succeed
834*0Sstevel@tonic-gate 		 * (because the transport allocated an appropriate
835*0Sstevel@tonic-gate 		 * address and/or port).
836*0Sstevel@tonic-gate 		 *
837*0Sstevel@tonic-gate 		 * sockfs currently requires that the transport return
838*0Sstevel@tonic-gate 		 * the requested address in the T_BIND_ACK, unless
839*0Sstevel@tonic-gate 		 * there is code here to allow for any discrepancy.
840*0Sstevel@tonic-gate 		 * Such code exists for AF_INET and AF_INET6.
841*0Sstevel@tonic-gate 		 *
842*0Sstevel@tonic-gate 		 * Netatalk chooses to return the requested address
843*0Sstevel@tonic-gate 		 * rather than the (correct) allocated address.  This
844*0Sstevel@tonic-gate 		 * means that netatalk violates the TPI specification
845*0Sstevel@tonic-gate 		 * (and would not function correctly if used from a
846*0Sstevel@tonic-gate 		 * TLI application), but it does mean that it works
847*0Sstevel@tonic-gate 		 * with sockfs.
848*0Sstevel@tonic-gate 		 *
849*0Sstevel@tonic-gate 		 * As noted above, using the newer XTI bind primitive
850*0Sstevel@tonic-gate 		 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
851*0Sstevel@tonic-gate 		 * allow sockfs to be more sure about whether or not
852*0Sstevel@tonic-gate 		 * the bind request had succeeded (as transports are
853*0Sstevel@tonic-gate 		 * not permitted to bind to a different address than
854*0Sstevel@tonic-gate 		 * that requested - they must return failure).
855*0Sstevel@tonic-gate 		 * Unfortunately, support for T_BIND_REQ may not be
856*0Sstevel@tonic-gate 		 * present in all transport implementations (netatalk,
857*0Sstevel@tonic-gate 		 * for example, doesn't have it), making the
858*0Sstevel@tonic-gate 		 * transition difficult.
859*0Sstevel@tonic-gate 		 */
860*0Sstevel@tonic-gate 		if (bind_ack->ADDR_length != addrlen) {
861*0Sstevel@tonic-gate 			/* Assumes that the requested address was in use */
862*0Sstevel@tonic-gate 			freemsg(mp);
863*0Sstevel@tonic-gate 			error = EADDRINUSE;
864*0Sstevel@tonic-gate 			eprintsoline(so, error);
865*0Sstevel@tonic-gate 			goto done;
866*0Sstevel@tonic-gate 		}
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate 		switch (so->so_family) {
869*0Sstevel@tonic-gate 		case AF_INET6:
870*0Sstevel@tonic-gate 		case AF_INET: {
871*0Sstevel@tonic-gate 			sin_t *rname, *aname;
872*0Sstevel@tonic-gate 
873*0Sstevel@tonic-gate 			rname = (sin_t *)addr;
874*0Sstevel@tonic-gate 			aname = (sin_t *)so->so_laddr_sa;
875*0Sstevel@tonic-gate 
876*0Sstevel@tonic-gate 			/*
877*0Sstevel@tonic-gate 			 * Take advantage of the alignment
878*0Sstevel@tonic-gate 			 * of sin_port and sin6_port which fall
879*0Sstevel@tonic-gate 			 * in the same place in their data structures.
880*0Sstevel@tonic-gate 			 * Just use sin_port for either address family.
881*0Sstevel@tonic-gate 			 *
882*0Sstevel@tonic-gate 			 * This may become a problem if (heaven forbid)
883*0Sstevel@tonic-gate 			 * there's a separate ipv6port_reserved... :-P
884*0Sstevel@tonic-gate 			 *
885*0Sstevel@tonic-gate 			 * Binding to port 0 has the semantics of letting
886*0Sstevel@tonic-gate 			 * the transport bind to any port.
887*0Sstevel@tonic-gate 			 *
888*0Sstevel@tonic-gate 			 * If the transport is TCP or UDP since we had sent
889*0Sstevel@tonic-gate 			 * a T_BIND_REQ we would not get a port other than
890*0Sstevel@tonic-gate 			 * what we asked for.
891*0Sstevel@tonic-gate 			 */
892*0Sstevel@tonic-gate 			if (tcp_udp_xport) {
893*0Sstevel@tonic-gate 				/*
894*0Sstevel@tonic-gate 				 * Pick up the new port number if we bound to
895*0Sstevel@tonic-gate 				 * port 0.
896*0Sstevel@tonic-gate 				 */
897*0Sstevel@tonic-gate 				if (aname->sin_port == 0)
898*0Sstevel@tonic-gate 					aname->sin_port = rname->sin_port;
899*0Sstevel@tonic-gate 				so->so_state |= SS_LADDR_VALID;
900*0Sstevel@tonic-gate 				break;
901*0Sstevel@tonic-gate 			}
902*0Sstevel@tonic-gate 			if (aname->sin_port != 0 &&
903*0Sstevel@tonic-gate 			    aname->sin_port != rname->sin_port) {
904*0Sstevel@tonic-gate 				freemsg(mp);
905*0Sstevel@tonic-gate 				error = EADDRINUSE;
906*0Sstevel@tonic-gate 				eprintsoline(so, error);
907*0Sstevel@tonic-gate 				goto done;
908*0Sstevel@tonic-gate 			}
909*0Sstevel@tonic-gate 			/*
910*0Sstevel@tonic-gate 			 * Pick up the new port number if we bound to port 0.
911*0Sstevel@tonic-gate 			 */
912*0Sstevel@tonic-gate 			aname->sin_port = rname->sin_port;
913*0Sstevel@tonic-gate 
914*0Sstevel@tonic-gate 			/*
915*0Sstevel@tonic-gate 			 * Unfortunately, addresses aren't _quite_ the same.
916*0Sstevel@tonic-gate 			 */
917*0Sstevel@tonic-gate 			if (so->so_family == AF_INET) {
918*0Sstevel@tonic-gate 				if (aname->sin_addr.s_addr !=
919*0Sstevel@tonic-gate 				    rname->sin_addr.s_addr) {
920*0Sstevel@tonic-gate 					freemsg(mp);
921*0Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
922*0Sstevel@tonic-gate 					eprintsoline(so, error);
923*0Sstevel@tonic-gate 					goto done;
924*0Sstevel@tonic-gate 				}
925*0Sstevel@tonic-gate 			} else {
926*0Sstevel@tonic-gate 				sin6_t *rname6 = (sin6_t *)rname;
927*0Sstevel@tonic-gate 				sin6_t *aname6 = (sin6_t *)aname;
928*0Sstevel@tonic-gate 
929*0Sstevel@tonic-gate 				if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
930*0Sstevel@tonic-gate 				    &rname6->sin6_addr)) {
931*0Sstevel@tonic-gate 					freemsg(mp);
932*0Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
933*0Sstevel@tonic-gate 					eprintsoline(so, error);
934*0Sstevel@tonic-gate 					goto done;
935*0Sstevel@tonic-gate 				}
936*0Sstevel@tonic-gate 			}
937*0Sstevel@tonic-gate 			break;
938*0Sstevel@tonic-gate 		}
939*0Sstevel@tonic-gate 		case AF_UNIX:
940*0Sstevel@tonic-gate 			if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) {
941*0Sstevel@tonic-gate 				freemsg(mp);
942*0Sstevel@tonic-gate 				error = EADDRINUSE;
943*0Sstevel@tonic-gate 				eprintsoline(so, error);
944*0Sstevel@tonic-gate 				eprintso(so,
945*0Sstevel@tonic-gate 					("addrlen %d, addr 0x%x, vp %p\n",
946*0Sstevel@tonic-gate 					addrlen, *((int *)addr),
947*0Sstevel@tonic-gate 					so->so_ux_bound_vp));
948*0Sstevel@tonic-gate 				goto done;
949*0Sstevel@tonic-gate 			}
950*0Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
951*0Sstevel@tonic-gate 			break;
952*0Sstevel@tonic-gate 		default:
953*0Sstevel@tonic-gate 			/*
954*0Sstevel@tonic-gate 			 * NOTE: This assumes that addresses can be
955*0Sstevel@tonic-gate 			 * byte-compared for equivalence.
956*0Sstevel@tonic-gate 			 */
957*0Sstevel@tonic-gate 			if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) {
958*0Sstevel@tonic-gate 				freemsg(mp);
959*0Sstevel@tonic-gate 				error = EADDRINUSE;
960*0Sstevel@tonic-gate 				eprintsoline(so, error);
961*0Sstevel@tonic-gate 				goto done;
962*0Sstevel@tonic-gate 			}
963*0Sstevel@tonic-gate 			/*
964*0Sstevel@tonic-gate 			 * Don't mark SS_LADDR_VALID, as we cannot be
965*0Sstevel@tonic-gate 			 * sure that the returned address is the real
966*0Sstevel@tonic-gate 			 * bound address when talking to an unknown
967*0Sstevel@tonic-gate 			 * transport.
968*0Sstevel@tonic-gate 			 */
969*0Sstevel@tonic-gate 			break;
970*0Sstevel@tonic-gate 		}
971*0Sstevel@tonic-gate 	} else {
972*0Sstevel@tonic-gate 		/*
973*0Sstevel@tonic-gate 		 * Save for returned address for getsockname.
974*0Sstevel@tonic-gate 		 * Needed for unspecific bind unless transport supports
975*0Sstevel@tonic-gate 		 * the TI_GETMYNAME ioctl.
976*0Sstevel@tonic-gate 		 * Do this for AF_INET{,6} even though they do, as
977*0Sstevel@tonic-gate 		 * caching info here is much better performance than
978*0Sstevel@tonic-gate 		 * a TPI/STREAMS trip to the transport for getsockname.
979*0Sstevel@tonic-gate 		 * Any which can't for some reason _must_ _not_ set
980*0Sstevel@tonic-gate 		 * LADDR_VALID here for the caching version of getsockname
981*0Sstevel@tonic-gate 		 * to not break;
982*0Sstevel@tonic-gate 		 */
983*0Sstevel@tonic-gate 		switch (so->so_family) {
984*0Sstevel@tonic-gate 		case AF_UNIX:
985*0Sstevel@tonic-gate 			/*
986*0Sstevel@tonic-gate 			 * Record the address bound with the transport
987*0Sstevel@tonic-gate 			 * for use by socketpair.
988*0Sstevel@tonic-gate 			 */
989*0Sstevel@tonic-gate 			bcopy(addr, &so->so_ux_laddr, addrlen);
990*0Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
991*0Sstevel@tonic-gate 			break;
992*0Sstevel@tonic-gate 		case AF_INET:
993*0Sstevel@tonic-gate 		case AF_INET6:
994*0Sstevel@tonic-gate 			ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
995*0Sstevel@tonic-gate 			bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
996*0Sstevel@tonic-gate 			so->so_state |= SS_LADDR_VALID;
997*0Sstevel@tonic-gate 			break;
998*0Sstevel@tonic-gate 		default:
999*0Sstevel@tonic-gate 			/*
1000*0Sstevel@tonic-gate 			 * Don't mark SS_LADDR_VALID, as we cannot be
1001*0Sstevel@tonic-gate 			 * sure that the returned address is the real
1002*0Sstevel@tonic-gate 			 * bound address when talking to an unknown
1003*0Sstevel@tonic-gate 			 * transport.
1004*0Sstevel@tonic-gate 			 */
1005*0Sstevel@tonic-gate 			break;
1006*0Sstevel@tonic-gate 		}
1007*0Sstevel@tonic-gate 	}
1008*0Sstevel@tonic-gate 
1009*0Sstevel@tonic-gate 	if (nl7c == NULL && (so->so_nl7c_flags & NL7C_AF_NCA) &&
1010*0Sstevel@tonic-gate 	    (so->so_nl7c_flags & NL7C_ENABLED)) {
1011*0Sstevel@tonic-gate 		/*
1012*0Sstevel@tonic-gate 		 * Was an AF_NCA bind() so add it to the addr list for
1013*0Sstevel@tonic-gate 		 * reporting purposes.
1014*0Sstevel@tonic-gate 		 */
1015*0Sstevel@tonic-gate 		nl7c = nl7c_add_addr(addr, addrlen);
1016*0Sstevel@tonic-gate 	}
1017*0Sstevel@tonic-gate 	if (nl7c != NULL) {
1018*0Sstevel@tonic-gate 		nl7c_listener_addr(nl7c, strvp2wq(SOTOV(so)));
1019*0Sstevel@tonic-gate 	}
1020*0Sstevel@tonic-gate 
1021*0Sstevel@tonic-gate 	freemsg(mp);
1022*0Sstevel@tonic-gate 
1023*0Sstevel@tonic-gate done:
1024*0Sstevel@tonic-gate 	if (error) {
1025*0Sstevel@tonic-gate 		/* reset state & backlog to values held on entry */
1026*0Sstevel@tonic-gate 		if (clear_acceptconn_on_err == B_TRUE)
1027*0Sstevel@tonic-gate 			so->so_state &= ~SS_ACCEPTCONN;
1028*0Sstevel@tonic-gate 		if (restore_backlog_on_err == B_TRUE)
1029*0Sstevel@tonic-gate 			so->so_backlog = save_so_backlog;
1030*0Sstevel@tonic-gate 
1031*0Sstevel@tonic-gate 		if (unbind_on_err && so->so_state & SS_ISBOUND) {
1032*0Sstevel@tonic-gate 			int err;
1033*0Sstevel@tonic-gate 
1034*0Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
1035*0Sstevel@tonic-gate 			/* LINTED - statement has no consequent: if */
1036*0Sstevel@tonic-gate 			if (err) {
1037*0Sstevel@tonic-gate 				eprintsoline(so, error);
1038*0Sstevel@tonic-gate 			} else {
1039*0Sstevel@tonic-gate 				ASSERT(!(so->so_state & SS_ISBOUND));
1040*0Sstevel@tonic-gate 			}
1041*0Sstevel@tonic-gate 		}
1042*0Sstevel@tonic-gate 	}
1043*0Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
1044*0Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
1045*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
1046*0Sstevel@tonic-gate 	} else {
1047*0Sstevel@tonic-gate 		/* If the caller held the lock don't release it here */
1048*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
1049*0Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
1050*0Sstevel@tonic-gate 	}
1051*0Sstevel@tonic-gate 	return (error);
1052*0Sstevel@tonic-gate }
1053*0Sstevel@tonic-gate 
1054*0Sstevel@tonic-gate /* bind the socket */
1055*0Sstevel@tonic-gate int
1056*0Sstevel@tonic-gate sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
1057*0Sstevel@tonic-gate     int flags)
1058*0Sstevel@tonic-gate {
1059*0Sstevel@tonic-gate 	if ((flags & _SOBIND_SOCKETPAIR) == 0)
1060*0Sstevel@tonic-gate 		return (sotpi_bindlisten(so, name, namelen, 0, flags));
1061*0Sstevel@tonic-gate 
1062*0Sstevel@tonic-gate 	flags &= ~_SOBIND_SOCKETPAIR;
1063*0Sstevel@tonic-gate 	return (sotpi_bindlisten(so, name, namelen, 1, flags));
1064*0Sstevel@tonic-gate }
1065*0Sstevel@tonic-gate 
1066*0Sstevel@tonic-gate /*
1067*0Sstevel@tonic-gate  * Unbind a socket - used when bind() fails, when bind() specifies a NULL
1068*0Sstevel@tonic-gate  * address, or when listen needs to unbind and bind.
1069*0Sstevel@tonic-gate  * If the _SOUNBIND_REBIND flag is specified the addresses are retained
1070*0Sstevel@tonic-gate  * so that a sobind can pick them up.
1071*0Sstevel@tonic-gate  */
1072*0Sstevel@tonic-gate static int
1073*0Sstevel@tonic-gate sotpi_unbind(struct sonode *so, int flags)
1074*0Sstevel@tonic-gate {
1075*0Sstevel@tonic-gate 	struct T_unbind_req	unbind_req;
1076*0Sstevel@tonic-gate 	int			error = 0;
1077*0Sstevel@tonic-gate 	mblk_t			*mp;
1078*0Sstevel@tonic-gate 
1079*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
1080*0Sstevel@tonic-gate 			so, flags, pr_state(so->so_state, so->so_mode)));
1081*0Sstevel@tonic-gate 
1082*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
1083*0Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
1084*0Sstevel@tonic-gate 
1085*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
1086*0Sstevel@tonic-gate 		error = EINVAL;
1087*0Sstevel@tonic-gate 		eprintsoline(so, error);
1088*0Sstevel@tonic-gate 		goto done;
1089*0Sstevel@tonic-gate 	}
1090*0Sstevel@tonic-gate 
1091*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1092*0Sstevel@tonic-gate 
1093*0Sstevel@tonic-gate 	/*
1094*0Sstevel@tonic-gate 	 * Flush the read and write side (except stream head read queue)
1095*0Sstevel@tonic-gate 	 * and send down T_UNBIND_REQ.
1096*0Sstevel@tonic-gate 	 */
1097*0Sstevel@tonic-gate 	(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1098*0Sstevel@tonic-gate 
1099*0Sstevel@tonic-gate 	unbind_req.PRIM_type = T_UNBIND_REQ;
1100*0Sstevel@tonic-gate 	mp = soallocproto1(&unbind_req, sizeof (unbind_req),
1101*0Sstevel@tonic-gate 	    0, _ALLOC_SLEEP);
1102*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1103*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1104*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1105*0Sstevel@tonic-gate 	if (error) {
1106*0Sstevel@tonic-gate 		eprintsoline(so, error);
1107*0Sstevel@tonic-gate 		goto done;
1108*0Sstevel@tonic-gate 	}
1109*0Sstevel@tonic-gate 
1110*0Sstevel@tonic-gate 	error = sowaitokack(so, T_UNBIND_REQ);
1111*0Sstevel@tonic-gate 	if (error) {
1112*0Sstevel@tonic-gate 		eprintsoline(so, error);
1113*0Sstevel@tonic-gate 		goto done;
1114*0Sstevel@tonic-gate 	}
1115*0Sstevel@tonic-gate 
1116*0Sstevel@tonic-gate 	/*
1117*0Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1118*0Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the unbind
1119*0Sstevel@tonic-gate 	 * is allowed to complete.
1120*0Sstevel@tonic-gate 	 */
1121*0Sstevel@tonic-gate 	if (!(flags & _SOUNBIND_REBIND)) {
1122*0Sstevel@tonic-gate 		/*
1123*0Sstevel@tonic-gate 		 * Clear out bound address.
1124*0Sstevel@tonic-gate 		 */
1125*0Sstevel@tonic-gate 		vnode_t *vp;
1126*0Sstevel@tonic-gate 
1127*0Sstevel@tonic-gate 		if ((vp = so->so_ux_bound_vp) != NULL) {
1128*0Sstevel@tonic-gate 			ASSERT(vp->v_stream);
1129*0Sstevel@tonic-gate 			so->so_ux_bound_vp = NULL;
1130*0Sstevel@tonic-gate 			vn_rele_stream(vp);
1131*0Sstevel@tonic-gate 		}
1132*0Sstevel@tonic-gate 		/* Clear out address */
1133*0Sstevel@tonic-gate 		so->so_laddr_len = 0;
1134*0Sstevel@tonic-gate 	}
1135*0Sstevel@tonic-gate 	so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1136*0Sstevel@tonic-gate done:
1137*0Sstevel@tonic-gate 	/* If the caller held the lock don't release it here */
1138*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
1139*0Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
1140*0Sstevel@tonic-gate 
1141*0Sstevel@tonic-gate 	return (error);
1142*0Sstevel@tonic-gate }
1143*0Sstevel@tonic-gate 
1144*0Sstevel@tonic-gate /*
1145*0Sstevel@tonic-gate  * listen on the socket.
1146*0Sstevel@tonic-gate  * For TPI conforming transports this has to first unbind with the transport
1147*0Sstevel@tonic-gate  * and then bind again using the new backlog.
1148*0Sstevel@tonic-gate  */
1149*0Sstevel@tonic-gate int
1150*0Sstevel@tonic-gate sotpi_listen(struct sonode *so, int backlog)
1151*0Sstevel@tonic-gate {
1152*0Sstevel@tonic-gate 	int		error = 0;
1153*0Sstevel@tonic-gate 
1154*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
1155*0Sstevel@tonic-gate 		so, backlog, pr_state(so->so_state, so->so_mode)));
1156*0Sstevel@tonic-gate 
1157*0Sstevel@tonic-gate 	if (so->so_serv_type == T_CLTS)
1158*0Sstevel@tonic-gate 		return (EOPNOTSUPP);
1159*0Sstevel@tonic-gate 
1160*0Sstevel@tonic-gate 	/*
1161*0Sstevel@tonic-gate 	 * If the socket is ready to accept connections already, then
1162*0Sstevel@tonic-gate 	 * return without doing anything.  This avoids a problem where
1163*0Sstevel@tonic-gate 	 * a second listen() call fails if a connection is pending and
1164*0Sstevel@tonic-gate 	 * leaves the socket unbound. Only when we are not unbinding
1165*0Sstevel@tonic-gate 	 * with the transport can we safely increase the backlog.
1166*0Sstevel@tonic-gate 	 */
1167*0Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN &&
1168*0Sstevel@tonic-gate 	    !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1169*0Sstevel@tonic-gate 		/*CONSTCOND*/
1170*0Sstevel@tonic-gate 		!solisten_tpi_tcp))
1171*0Sstevel@tonic-gate 		return (0);
1172*0Sstevel@tonic-gate 
1173*0Sstevel@tonic-gate 	if (so->so_state & SS_ISCONNECTED)
1174*0Sstevel@tonic-gate 		return (EINVAL);
1175*0Sstevel@tonic-gate 
1176*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1177*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
1178*0Sstevel@tonic-gate 
1179*0Sstevel@tonic-gate 	if (backlog < 0)
1180*0Sstevel@tonic-gate 		backlog = 0;
1181*0Sstevel@tonic-gate 	/*
1182*0Sstevel@tonic-gate 	 * Use the same qlimit as in BSD. BSD checks the qlimit
1183*0Sstevel@tonic-gate 	 * before queuing the next connection implying that a
1184*0Sstevel@tonic-gate 	 * listen(sock, 0) allows one connection to be queued.
1185*0Sstevel@tonic-gate 	 * BSD also uses 1.5 times the requested backlog.
1186*0Sstevel@tonic-gate 	 *
1187*0Sstevel@tonic-gate 	 * XNS Issue 4 required a strict interpretation of the backlog.
1188*0Sstevel@tonic-gate 	 * This has been waived subsequently for Issue 4 and the change
1189*0Sstevel@tonic-gate 	 * incorporated in XNS Issue 5. So we aren't required to do
1190*0Sstevel@tonic-gate 	 * anything special for XPG apps.
1191*0Sstevel@tonic-gate 	 */
1192*0Sstevel@tonic-gate 	if (backlog >= (INT_MAX - 1) / 3)
1193*0Sstevel@tonic-gate 		backlog = INT_MAX;
1194*0Sstevel@tonic-gate 	else
1195*0Sstevel@tonic-gate 		backlog = backlog * 3 / 2 + 1;
1196*0Sstevel@tonic-gate 
1197*0Sstevel@tonic-gate 	/*
1198*0Sstevel@tonic-gate 	 * If the listen doesn't change the backlog we do nothing.
1199*0Sstevel@tonic-gate 	 * This avoids an EPROTO error from the transport.
1200*0Sstevel@tonic-gate 	 */
1201*0Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) &&
1202*0Sstevel@tonic-gate 	    so->so_backlog == backlog)
1203*0Sstevel@tonic-gate 		goto done;
1204*0Sstevel@tonic-gate 
1205*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
1206*0Sstevel@tonic-gate 		/*
1207*0Sstevel@tonic-gate 		 * Must have been explicitly bound in the UNIX domain.
1208*0Sstevel@tonic-gate 		 */
1209*0Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
1210*0Sstevel@tonic-gate 			error = EINVAL;
1211*0Sstevel@tonic-gate 			goto done;
1212*0Sstevel@tonic-gate 		}
1213*0Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
1214*0Sstevel@tonic-gate 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
1215*0Sstevel@tonic-gate 	} else if (backlog > 0) {
1216*0Sstevel@tonic-gate 		/*
1217*0Sstevel@tonic-gate 		 * AF_INET{,6} hack to avoid losing the port.
1218*0Sstevel@tonic-gate 		 * Assumes that all AF_INET{,6} transports can handle a
1219*0Sstevel@tonic-gate 		 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
1220*0Sstevel@tonic-gate 		 * has already bound thus it is possible to avoid the unbind.
1221*0Sstevel@tonic-gate 		 */
1222*0Sstevel@tonic-gate 		if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1223*0Sstevel@tonic-gate 		    /*CONSTCOND*/
1224*0Sstevel@tonic-gate 		    !solisten_tpi_tcp)) {
1225*0Sstevel@tonic-gate 			error = sotpi_unbind(so, _SOUNBIND_REBIND);
1226*0Sstevel@tonic-gate 			if (error)
1227*0Sstevel@tonic-gate 				goto done;
1228*0Sstevel@tonic-gate 		}
1229*0Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
1230*0Sstevel@tonic-gate 			    _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
1231*0Sstevel@tonic-gate 	} else {
1232*0Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
1233*0Sstevel@tonic-gate 		so->so_backlog = backlog;
1234*0Sstevel@tonic-gate 	}
1235*0Sstevel@tonic-gate 	if (error)
1236*0Sstevel@tonic-gate 		goto done;
1237*0Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ACCEPTCONN);
1238*0Sstevel@tonic-gate done:
1239*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
1240*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1241*0Sstevel@tonic-gate 	return (error);
1242*0Sstevel@tonic-gate }
1243*0Sstevel@tonic-gate 
1244*0Sstevel@tonic-gate /*
1245*0Sstevel@tonic-gate  * Disconnect either a specified seqno or all (-1).
1246*0Sstevel@tonic-gate  * The former is used on listening sockets only.
1247*0Sstevel@tonic-gate  *
1248*0Sstevel@tonic-gate  * When seqno == -1 sodisconnect could call sotpi_unbind. However,
1249*0Sstevel@tonic-gate  * the current use of sodisconnect(seqno == -1) is only for shutdown
1250*0Sstevel@tonic-gate  * so there is no point (and potentially incorrect) to unbind.
1251*0Sstevel@tonic-gate  */
1252*0Sstevel@tonic-gate int
1253*0Sstevel@tonic-gate sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
1254*0Sstevel@tonic-gate {
1255*0Sstevel@tonic-gate 	struct T_discon_req	discon_req;
1256*0Sstevel@tonic-gate 	int			error = 0;
1257*0Sstevel@tonic-gate 	mblk_t			*mp;
1258*0Sstevel@tonic-gate 
1259*0Sstevel@tonic-gate 	dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
1260*0Sstevel@tonic-gate 			so, seqno, flags, pr_state(so->so_state, so->so_mode)));
1261*0Sstevel@tonic-gate 
1262*0Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1263*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
1264*0Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
1265*0Sstevel@tonic-gate 	} else {
1266*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
1267*0Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
1268*0Sstevel@tonic-gate 	}
1269*0Sstevel@tonic-gate 
1270*0Sstevel@tonic-gate 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
1271*0Sstevel@tonic-gate 		error = EINVAL;
1272*0Sstevel@tonic-gate 		eprintsoline(so, error);
1273*0Sstevel@tonic-gate 		goto done;
1274*0Sstevel@tonic-gate 	}
1275*0Sstevel@tonic-gate 
1276*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1277*0Sstevel@tonic-gate 	/*
1278*0Sstevel@tonic-gate 	 * Flush the write side (unless this is a listener)
1279*0Sstevel@tonic-gate 	 * and then send down a T_DISCON_REQ.
1280*0Sstevel@tonic-gate 	 * (Don't flush on listener since it could flush {O_}T_CONN_RES
1281*0Sstevel@tonic-gate 	 * and other messages.)
1282*0Sstevel@tonic-gate 	 */
1283*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ACCEPTCONN))
1284*0Sstevel@tonic-gate 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
1285*0Sstevel@tonic-gate 
1286*0Sstevel@tonic-gate 	discon_req.PRIM_type = T_DISCON_REQ;
1287*0Sstevel@tonic-gate 	discon_req.SEQ_number = seqno;
1288*0Sstevel@tonic-gate 	mp = soallocproto1(&discon_req, sizeof (discon_req),
1289*0Sstevel@tonic-gate 	    0, _ALLOC_SLEEP);
1290*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1291*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1292*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1293*0Sstevel@tonic-gate 	if (error) {
1294*0Sstevel@tonic-gate 		eprintsoline(so, error);
1295*0Sstevel@tonic-gate 		goto done;
1296*0Sstevel@tonic-gate 	}
1297*0Sstevel@tonic-gate 
1298*0Sstevel@tonic-gate 	error = sowaitokack(so, T_DISCON_REQ);
1299*0Sstevel@tonic-gate 	if (error) {
1300*0Sstevel@tonic-gate 		eprintsoline(so, error);
1301*0Sstevel@tonic-gate 		goto done;
1302*0Sstevel@tonic-gate 	}
1303*0Sstevel@tonic-gate 	/*
1304*0Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1305*0Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the disconnect
1306*0Sstevel@tonic-gate 	 * is allowed to complete. However, it is not possible to
1307*0Sstevel@tonic-gate 	 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
1308*0Sstevel@tonic-gate 	 */
1309*0Sstevel@tonic-gate 	so->so_state &=
1310*0Sstevel@tonic-gate 	    ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID);
1311*0Sstevel@tonic-gate done:
1312*0Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1313*0Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
1314*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
1315*0Sstevel@tonic-gate 	} else {
1316*0Sstevel@tonic-gate 		/* If the caller held the lock don't release it here */
1317*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
1318*0Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
1319*0Sstevel@tonic-gate 	}
1320*0Sstevel@tonic-gate 	return (error);
1321*0Sstevel@tonic-gate }
1322*0Sstevel@tonic-gate 
1323*0Sstevel@tonic-gate int
1324*0Sstevel@tonic-gate sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop)
1325*0Sstevel@tonic-gate {
1326*0Sstevel@tonic-gate 	struct T_conn_ind	*conn_ind;
1327*0Sstevel@tonic-gate 	struct T_conn_res	*conn_res;
1328*0Sstevel@tonic-gate 	int			error = 0;
1329*0Sstevel@tonic-gate 	mblk_t			*mp;
1330*0Sstevel@tonic-gate 	struct sonode		*nso;
1331*0Sstevel@tonic-gate 	vnode_t			*nvp;
1332*0Sstevel@tonic-gate 	void			*src;
1333*0Sstevel@tonic-gate 	t_uscalar_t		srclen;
1334*0Sstevel@tonic-gate 	void			*opt;
1335*0Sstevel@tonic-gate 	t_uscalar_t		optlen;
1336*0Sstevel@tonic-gate 	t_scalar_t		PRIM_type;
1337*0Sstevel@tonic-gate 	t_scalar_t		SEQ_number;
1338*0Sstevel@tonic-gate 
1339*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
1340*0Sstevel@tonic-gate 		so, fflag, nsop, pr_state(so->so_state, so->so_mode)));
1341*0Sstevel@tonic-gate 
1342*0Sstevel@tonic-gate 	/*
1343*0Sstevel@tonic-gate 	 * Defer single-threading the accepting socket until
1344*0Sstevel@tonic-gate 	 * the T_CONN_IND has been received and parsed and the
1345*0Sstevel@tonic-gate 	 * new sonode has been opened.
1346*0Sstevel@tonic-gate 	 */
1347*0Sstevel@tonic-gate 
1348*0Sstevel@tonic-gate 	/* Check that we are not already connected */
1349*0Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) == 0)
1350*0Sstevel@tonic-gate 		goto conn_bad;
1351*0Sstevel@tonic-gate again:
1352*0Sstevel@tonic-gate 	if ((error = sowaitconnind(so, fflag, &mp)) != 0)
1353*0Sstevel@tonic-gate 		goto e_bad;
1354*0Sstevel@tonic-gate 
1355*0Sstevel@tonic-gate 	ASSERT(mp);
1356*0Sstevel@tonic-gate 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
1357*0Sstevel@tonic-gate 	/*
1358*0Sstevel@tonic-gate 	 * Save SEQ_number for error paths.
1359*0Sstevel@tonic-gate 	 */
1360*0Sstevel@tonic-gate 	SEQ_number = conn_ind->SEQ_number;
1361*0Sstevel@tonic-gate 
1362*0Sstevel@tonic-gate 	srclen = conn_ind->SRC_length;
1363*0Sstevel@tonic-gate 	src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
1364*0Sstevel@tonic-gate 	if (src == NULL) {
1365*0Sstevel@tonic-gate 		error = EPROTO;
1366*0Sstevel@tonic-gate 		freemsg(mp);
1367*0Sstevel@tonic-gate 		eprintsoline(so, error);
1368*0Sstevel@tonic-gate 		goto disconnect_unlocked;
1369*0Sstevel@tonic-gate 	}
1370*0Sstevel@tonic-gate 	optlen = conn_ind->OPT_length;
1371*0Sstevel@tonic-gate 	switch (so->so_family) {
1372*0Sstevel@tonic-gate 	case AF_INET:
1373*0Sstevel@tonic-gate 	case AF_INET6:
1374*0Sstevel@tonic-gate 		if ((optlen == sizeof (intptr_t)) &&
1375*0Sstevel@tonic-gate 		    ((so->so_state & SS_TCP_FAST_ACCEPT) != 0)) {
1376*0Sstevel@tonic-gate 			bcopy(mp->b_rptr + conn_ind->OPT_offset,
1377*0Sstevel@tonic-gate 			    &opt, conn_ind->OPT_length);
1378*0Sstevel@tonic-gate 		} else {
1379*0Sstevel@tonic-gate 			/*
1380*0Sstevel@tonic-gate 			 * The transport (in this case TCP) hasn't sent up
1381*0Sstevel@tonic-gate 			 * a pointer to an instance for the accept fast-path.
1382*0Sstevel@tonic-gate 			 * Disable fast-path completely because the call to
1383*0Sstevel@tonic-gate 			 * sotpi_create() below would otherwise create an
1384*0Sstevel@tonic-gate 			 * incomplete TCP instance, which would lead to
1385*0Sstevel@tonic-gate 			 * problems when sockfs sends a normal T_CONN_RES
1386*0Sstevel@tonic-gate 			 * message down the new stream.
1387*0Sstevel@tonic-gate 			 */
1388*0Sstevel@tonic-gate 			so->so_state &= ~SS_TCP_FAST_ACCEPT;
1389*0Sstevel@tonic-gate 			opt = NULL;
1390*0Sstevel@tonic-gate 			optlen = 0;
1391*0Sstevel@tonic-gate 		}
1392*0Sstevel@tonic-gate 		break;
1393*0Sstevel@tonic-gate 	case AF_UNIX:
1394*0Sstevel@tonic-gate 	default:
1395*0Sstevel@tonic-gate 		if (optlen != 0) {
1396*0Sstevel@tonic-gate 			opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
1397*0Sstevel@tonic-gate 			    __TPI_ALIGN_SIZE);
1398*0Sstevel@tonic-gate 			if (opt == NULL) {
1399*0Sstevel@tonic-gate 				error = EPROTO;
1400*0Sstevel@tonic-gate 				freemsg(mp);
1401*0Sstevel@tonic-gate 				eprintsoline(so, error);
1402*0Sstevel@tonic-gate 				goto disconnect_unlocked;
1403*0Sstevel@tonic-gate 			}
1404*0Sstevel@tonic-gate 		}
1405*0Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
1406*0Sstevel@tonic-gate 			if (!(so->so_state & SS_FADDR_NOXLATE)) {
1407*0Sstevel@tonic-gate 				src = NULL;
1408*0Sstevel@tonic-gate 				srclen = 0;
1409*0Sstevel@tonic-gate 			}
1410*0Sstevel@tonic-gate 			/* Extract src address from options */
1411*0Sstevel@tonic-gate 			if (optlen != 0)
1412*0Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &src, &srclen);
1413*0Sstevel@tonic-gate 		}
1414*0Sstevel@tonic-gate 		break;
1415*0Sstevel@tonic-gate 	}
1416*0Sstevel@tonic-gate 
1417*0Sstevel@tonic-gate 	/*
1418*0Sstevel@tonic-gate 	 * Create the new socket.
1419*0Sstevel@tonic-gate 	 */
1420*0Sstevel@tonic-gate 	VN_HOLD(so->so_accessvp);
1421*0Sstevel@tonic-gate 	nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type,
1422*0Sstevel@tonic-gate 			so->so_protocol, so->so_version, so, &error);
1423*0Sstevel@tonic-gate 	if (nso == NULL) {
1424*0Sstevel@tonic-gate 		ASSERT(error != 0);
1425*0Sstevel@tonic-gate 		/*
1426*0Sstevel@tonic-gate 		 * Accept can not fail with ENOBUFS. sotpi_create
1427*0Sstevel@tonic-gate 		 * sleeps waiting for memory until a signal is caught
1428*0Sstevel@tonic-gate 		 * so return EINTR.
1429*0Sstevel@tonic-gate 		 */
1430*0Sstevel@tonic-gate 		freemsg(mp);
1431*0Sstevel@tonic-gate 		if (error == ENOBUFS)
1432*0Sstevel@tonic-gate 			error = EINTR;
1433*0Sstevel@tonic-gate 		goto e_disc_unl;
1434*0Sstevel@tonic-gate 	}
1435*0Sstevel@tonic-gate 	nvp = SOTOV(nso);
1436*0Sstevel@tonic-gate 
1437*0Sstevel@tonic-gate #ifdef DEBUG
1438*0Sstevel@tonic-gate 	/*
1439*0Sstevel@tonic-gate 	 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
1440*0Sstevel@tonic-gate 	 * it's inherited early to allow debugging of the accept code itself.
1441*0Sstevel@tonic-gate 	 */
1442*0Sstevel@tonic-gate 	nso->so_options |= so->so_options & SO_DEBUG;
1443*0Sstevel@tonic-gate #endif /* DEBUG */
1444*0Sstevel@tonic-gate 
1445*0Sstevel@tonic-gate 	/*
1446*0Sstevel@tonic-gate 	 * Save the SRC address from the T_CONN_IND
1447*0Sstevel@tonic-gate 	 * for getpeername to work on AF_UNIX and on transports that do not
1448*0Sstevel@tonic-gate 	 * support TI_GETPEERNAME.
1449*0Sstevel@tonic-gate 	 *
1450*0Sstevel@tonic-gate 	 * NOTE: AF_UNIX NUL termination is ensured by the sender's
1451*0Sstevel@tonic-gate 	 * copyin_name().
1452*0Sstevel@tonic-gate 	 */
1453*0Sstevel@tonic-gate 	if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) {
1454*0Sstevel@tonic-gate 		error = EINVAL;
1455*0Sstevel@tonic-gate 		freemsg(mp);
1456*0Sstevel@tonic-gate 		eprintsoline(so, error);
1457*0Sstevel@tonic-gate 		goto disconnect_vp_unlocked;
1458*0Sstevel@tonic-gate 	}
1459*0Sstevel@tonic-gate 	nso->so_faddr_len = (socklen_t)srclen;
1460*0Sstevel@tonic-gate 	ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
1461*0Sstevel@tonic-gate 	bcopy(src, nso->so_faddr_sa, srclen);
1462*0Sstevel@tonic-gate 	nso->so_state |= SS_FADDR_VALID;
1463*0Sstevel@tonic-gate 
1464*0Sstevel@tonic-gate 	if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
1465*0Sstevel@tonic-gate 	    (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
1466*0Sstevel@tonic-gate 		cred_t *cr;
1467*0Sstevel@tonic-gate 
1468*0Sstevel@tonic-gate 		if ((cr = DB_CRED(mp)) != NULL) {
1469*0Sstevel@tonic-gate 			crhold(cr);
1470*0Sstevel@tonic-gate 			nso->so_peercred = cr;
1471*0Sstevel@tonic-gate 			nso->so_cpid = DB_CPID(mp);
1472*0Sstevel@tonic-gate 		}
1473*0Sstevel@tonic-gate 		freemsg(mp);
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate 		mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
1476*0Sstevel@tonic-gate 		    sizeof (intptr_t), 0, _ALLOC_INTR);
1477*0Sstevel@tonic-gate 		if (mp == NULL) {
1478*0Sstevel@tonic-gate 			/*
1479*0Sstevel@tonic-gate 			 * Accept can not fail with ENOBUFS.
1480*0Sstevel@tonic-gate 			 * A signal was caught so return EINTR.
1481*0Sstevel@tonic-gate 			 */
1482*0Sstevel@tonic-gate 			error = EINTR;
1483*0Sstevel@tonic-gate 			eprintsoline(so, error);
1484*0Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
1485*0Sstevel@tonic-gate 		}
1486*0Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
1487*0Sstevel@tonic-gate 	} else {
1488*0Sstevel@tonic-gate 		nso->so_peercred = DB_CRED(mp);
1489*0Sstevel@tonic-gate 		nso->so_cpid = DB_CPID(mp);
1490*0Sstevel@tonic-gate 		DB_CRED(mp) = NULL;
1491*0Sstevel@tonic-gate 
1492*0Sstevel@tonic-gate 		mp->b_rptr = DB_BASE(mp);
1493*0Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
1494*0Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
1495*0Sstevel@tonic-gate 	}
1496*0Sstevel@tonic-gate 
1497*0Sstevel@tonic-gate 	/*
1498*0Sstevel@tonic-gate 	 * New socket must be bound at least in sockfs and, except for AF_INET,
1499*0Sstevel@tonic-gate 	 * (or AF_INET6) it also has to be bound in the transport provider.
1500*0Sstevel@tonic-gate 	 * After accepting the connection on nso so_laddr_sa will be set to
1501*0Sstevel@tonic-gate 	 * contain the same address as the listener's local address
1502*0Sstevel@tonic-gate 	 * so the address we bind to isn't important.
1503*0Sstevel@tonic-gate 	 */
1504*0Sstevel@tonic-gate 	if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
1505*0Sstevel@tonic-gate 	    /*CONSTCOND*/
1506*0Sstevel@tonic-gate 	    nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
1507*0Sstevel@tonic-gate 		/*
1508*0Sstevel@tonic-gate 		 * Optimization for AF_INET{,6} transports
1509*0Sstevel@tonic-gate 		 * that can handle a T_CONN_RES without being bound.
1510*0Sstevel@tonic-gate 		 */
1511*0Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
1512*0Sstevel@tonic-gate 		so_automatic_bind(nso);
1513*0Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
1514*0Sstevel@tonic-gate 	} else {
1515*0Sstevel@tonic-gate 		/* Perform NULL bind with the transport provider. */
1516*0Sstevel@tonic-gate 		if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) {
1517*0Sstevel@tonic-gate 			ASSERT(error != ENOBUFS);
1518*0Sstevel@tonic-gate 			freemsg(mp);
1519*0Sstevel@tonic-gate 			eprintsoline(nso, error);
1520*0Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
1521*0Sstevel@tonic-gate 		}
1522*0Sstevel@tonic-gate 	}
1523*0Sstevel@tonic-gate 
1524*0Sstevel@tonic-gate 	/*
1525*0Sstevel@tonic-gate 	 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
1526*0Sstevel@tonic-gate 	 * so that any data arriving on the new socket will cause the
1527*0Sstevel@tonic-gate 	 * appropriate signals to be delivered for the new socket.
1528*0Sstevel@tonic-gate 	 *
1529*0Sstevel@tonic-gate 	 * No other thread (except strsock_proto and strsock_misc)
1530*0Sstevel@tonic-gate 	 * can access the new socket thus we relax the locking.
1531*0Sstevel@tonic-gate 	 */
1532*0Sstevel@tonic-gate 	nso->so_pgrp = so->so_pgrp;
1533*0Sstevel@tonic-gate 	nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE);
1534*0Sstevel@tonic-gate 
1535*0Sstevel@tonic-gate 	if (nso->so_pgrp != 0) {
1536*0Sstevel@tonic-gate 		if ((error = so_set_events(nso, nvp, CRED())) != 0) {
1537*0Sstevel@tonic-gate 			eprintsoline(nso, error);
1538*0Sstevel@tonic-gate 			error = 0;
1539*0Sstevel@tonic-gate 			nso->so_pgrp = 0;
1540*0Sstevel@tonic-gate 		}
1541*0Sstevel@tonic-gate 	}
1542*0Sstevel@tonic-gate 
1543*0Sstevel@tonic-gate 	/*
1544*0Sstevel@tonic-gate 	 * Make note of the socket level options. TCP and IP level options
1545*0Sstevel@tonic-gate 	 * are already inherited. We could do all this after accept is
1546*0Sstevel@tonic-gate 	 * successful but doing it here simplifies code and no harm done
1547*0Sstevel@tonic-gate 	 * for error case.
1548*0Sstevel@tonic-gate 	 */
1549*0Sstevel@tonic-gate 	nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
1550*0Sstevel@tonic-gate 	    SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
1551*0Sstevel@tonic-gate 	    SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
1552*0Sstevel@tonic-gate 	nso->so_sndbuf = so->so_sndbuf;
1553*0Sstevel@tonic-gate 	nso->so_rcvbuf = so->so_rcvbuf;
1554*0Sstevel@tonic-gate 	if (nso->so_options & SO_LINGER)
1555*0Sstevel@tonic-gate 		nso->so_linger = so->so_linger;
1556*0Sstevel@tonic-gate 
1557*0Sstevel@tonic-gate 	if ((so->so_state & SS_TCP_FAST_ACCEPT) != 0) {
1558*0Sstevel@tonic-gate 		mblk_t *ack_mp;
1559*0Sstevel@tonic-gate 
1560*0Sstevel@tonic-gate 		ASSERT(opt != NULL);
1561*0Sstevel@tonic-gate 
1562*0Sstevel@tonic-gate 		conn_res->OPT_length = optlen;
1563*0Sstevel@tonic-gate 		conn_res->OPT_offset = MBLKL(mp);
1564*0Sstevel@tonic-gate 		bcopy(&opt, mp->b_wptr, optlen);
1565*0Sstevel@tonic-gate 		mp->b_wptr += optlen;
1566*0Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
1567*0Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = 0;
1568*0Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
1569*0Sstevel@tonic-gate 
1570*0Sstevel@tonic-gate 		/* Send down the T_CONN_RES on acceptor STREAM */
1571*0Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(nso), mp, NULL,
1572*0Sstevel@tonic-gate 		    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1573*0Sstevel@tonic-gate 		if (error) {
1574*0Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
1575*0Sstevel@tonic-gate 			so_lock_single(so);
1576*0Sstevel@tonic-gate 			eprintsoline(so, error);
1577*0Sstevel@tonic-gate 			goto disconnect_vp;
1578*0Sstevel@tonic-gate 		}
1579*0Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
1580*0Sstevel@tonic-gate 		error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
1581*0Sstevel@tonic-gate 		    (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
1582*0Sstevel@tonic-gate 		if (error) {
1583*0Sstevel@tonic-gate 			mutex_exit(&nso->so_lock);
1584*0Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
1585*0Sstevel@tonic-gate 			so_lock_single(so);
1586*0Sstevel@tonic-gate 			eprintsoline(so, error);
1587*0Sstevel@tonic-gate 			goto disconnect_vp;
1588*0Sstevel@tonic-gate 		}
1589*0Sstevel@tonic-gate 		if (nso->so_family == AF_INET) {
1590*0Sstevel@tonic-gate 			sin_t *sin;
1591*0Sstevel@tonic-gate 
1592*0Sstevel@tonic-gate 			sin = (sin_t *)(ack_mp->b_rptr +
1593*0Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
1594*0Sstevel@tonic-gate 			bcopy(sin, nso->so_laddr_sa, sizeof (sin_t));
1595*0Sstevel@tonic-gate 			nso->so_laddr_len = sizeof (sin_t);
1596*0Sstevel@tonic-gate 		} else {
1597*0Sstevel@tonic-gate 			sin6_t *sin6;
1598*0Sstevel@tonic-gate 
1599*0Sstevel@tonic-gate 			sin6 = (sin6_t *)(ack_mp->b_rptr +
1600*0Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
1601*0Sstevel@tonic-gate 			bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t));
1602*0Sstevel@tonic-gate 			nso->so_laddr_len = sizeof (sin6_t);
1603*0Sstevel@tonic-gate 		}
1604*0Sstevel@tonic-gate 		freemsg(ack_mp);
1605*0Sstevel@tonic-gate 
1606*0Sstevel@tonic-gate 		nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID;
1607*0Sstevel@tonic-gate 		nso->so_priv = opt;
1608*0Sstevel@tonic-gate 
1609*0Sstevel@tonic-gate 		if (so->so_nl7c_flags & NL7C_ENABLED) {
1610*0Sstevel@tonic-gate 			/*
1611*0Sstevel@tonic-gate 			 * An NL7C marked listen()er so the new socket
1612*0Sstevel@tonic-gate 			 * inherits the listen()er's NL7C state.
1613*0Sstevel@tonic-gate 			 *
1614*0Sstevel@tonic-gate 			 * When calling NL7C to process the new socket
1615*0Sstevel@tonic-gate 			 * pass the nonblocking i/o state of the listen
1616*0Sstevel@tonic-gate 			 * socket as this is the context we are in.
1617*0Sstevel@tonic-gate 			 */
1618*0Sstevel@tonic-gate 			nso->so_nl7c_flags = so->so_nl7c_flags;
1619*0Sstevel@tonic-gate 			if (nl7c_process(nso,
1620*0Sstevel@tonic-gate 			    (nso->so_state & (SS_NONBLOCK|SS_NDELAY)),
1621*0Sstevel@tonic-gate 			    (int)((tcp_t *)nso->so_priv)->tcp_mss)) {
1622*0Sstevel@tonic-gate 				/*
1623*0Sstevel@tonic-gate 				 * NL7C has completed processing on the
1624*0Sstevel@tonic-gate 				 * socket, close the socket and back to
1625*0Sstevel@tonic-gate 				 * the top to await the next T_CONN_IND.
1626*0Sstevel@tonic-gate 				 */
1627*0Sstevel@tonic-gate 				mutex_exit(&nso->so_lock);
1628*0Sstevel@tonic-gate 				(void) VOP_CLOSE(nvp, 0, 1, (offset_t)0,
1629*0Sstevel@tonic-gate 						CRED());
1630*0Sstevel@tonic-gate 				VN_RELE(nvp);
1631*0Sstevel@tonic-gate 				goto again;
1632*0Sstevel@tonic-gate 			}
1633*0Sstevel@tonic-gate 			/* Pass the new socket out */
1634*0Sstevel@tonic-gate 		}
1635*0Sstevel@tonic-gate 
1636*0Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
1637*0Sstevel@tonic-gate 
1638*0Sstevel@tonic-gate 		/*
1639*0Sstevel@tonic-gate 		 * Pass out new socket.
1640*0Sstevel@tonic-gate 		 */
1641*0Sstevel@tonic-gate 		if (nsop != NULL)
1642*0Sstevel@tonic-gate 			*nsop = nso;
1643*0Sstevel@tonic-gate 
1644*0Sstevel@tonic-gate 		return (0);
1645*0Sstevel@tonic-gate 	}
1646*0Sstevel@tonic-gate 
1647*0Sstevel@tonic-gate 	/*
1648*0Sstevel@tonic-gate 	 * Copy local address from listener.
1649*0Sstevel@tonic-gate 	 */
1650*0Sstevel@tonic-gate 	nso->so_laddr_len = so->so_laddr_len;
1651*0Sstevel@tonic-gate 	ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen);
1652*0Sstevel@tonic-gate 	bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len);
1653*0Sstevel@tonic-gate 	nso->so_state |= SS_LADDR_VALID;
1654*0Sstevel@tonic-gate 
1655*0Sstevel@tonic-gate 	/*
1656*0Sstevel@tonic-gate 	 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
1657*0Sstevel@tonic-gate 	 * which don't support the FireEngine accept fast-path. It is also
1658*0Sstevel@tonic-gate 	 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
1659*0Sstevel@tonic-gate 	 * again. Neither sockfs nor TCP attempt to find out if some other
1660*0Sstevel@tonic-gate 	 * random module has been inserted in between (in which case we
1661*0Sstevel@tonic-gate 	 * should follow TLI accept behaviour). We blindly assume the worst
1662*0Sstevel@tonic-gate 	 * case and revert back to old behaviour i.e. TCP will not send us
1663*0Sstevel@tonic-gate 	 * any option (eager) and the accept should happen on the listener
1664*0Sstevel@tonic-gate 	 * queue. Any queued T_conn_ind have already got their options removed
1665*0Sstevel@tonic-gate 	 * by so_sock2_stream() when "sockmod" was I_POP'd.
1666*0Sstevel@tonic-gate 	 */
1667*0Sstevel@tonic-gate 	/*
1668*0Sstevel@tonic-gate 	 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
1669*0Sstevel@tonic-gate 	 */
1670*0Sstevel@tonic-gate 	if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
1671*0Sstevel@tonic-gate #ifdef	_ILP32
1672*0Sstevel@tonic-gate 		queue_t	*q;
1673*0Sstevel@tonic-gate 
1674*0Sstevel@tonic-gate 		/*
1675*0Sstevel@tonic-gate 		 * Find read queue in driver
1676*0Sstevel@tonic-gate 		 * Can safely do this since we "own" nso/nvp.
1677*0Sstevel@tonic-gate 		 */
1678*0Sstevel@tonic-gate 		q = strvp2wq(nvp)->q_next;
1679*0Sstevel@tonic-gate 		while (SAMESTR(q))
1680*0Sstevel@tonic-gate 			q = q->q_next;
1681*0Sstevel@tonic-gate 		q = RD(q);
1682*0Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)q;
1683*0Sstevel@tonic-gate #else
1684*0Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
1685*0Sstevel@tonic-gate #endif	/* _ILP32 */
1686*0Sstevel@tonic-gate 		conn_res->PRIM_type = O_T_CONN_RES;
1687*0Sstevel@tonic-gate 		PRIM_type = O_T_CONN_RES;
1688*0Sstevel@tonic-gate 	} else {
1689*0Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = nso->so_acceptor_id;
1690*0Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
1691*0Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
1692*0Sstevel@tonic-gate 	}
1693*0Sstevel@tonic-gate 	conn_res->SEQ_number = SEQ_number;
1694*0Sstevel@tonic-gate 	conn_res->OPT_length = 0;
1695*0Sstevel@tonic-gate 	conn_res->OPT_offset = 0;
1696*0Sstevel@tonic-gate 
1697*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1698*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
1699*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1700*0Sstevel@tonic-gate 
1701*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL,
1702*0Sstevel@tonic-gate 	    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1703*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1704*0Sstevel@tonic-gate 	if (error) {
1705*0Sstevel@tonic-gate 		eprintsoline(so, error);
1706*0Sstevel@tonic-gate 		goto disconnect_vp;
1707*0Sstevel@tonic-gate 	}
1708*0Sstevel@tonic-gate 	error = sowaitokack(so, PRIM_type);
1709*0Sstevel@tonic-gate 	if (error) {
1710*0Sstevel@tonic-gate 		eprintsoline(so, error);
1711*0Sstevel@tonic-gate 		goto disconnect_vp;
1712*0Sstevel@tonic-gate 	}
1713*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
1714*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1715*0Sstevel@tonic-gate 
1716*0Sstevel@tonic-gate 	nso->so_state |= SS_ISCONNECTED;
1717*0Sstevel@tonic-gate 
1718*0Sstevel@tonic-gate 	/*
1719*0Sstevel@tonic-gate 	 * Pass out new socket.
1720*0Sstevel@tonic-gate 	 */
1721*0Sstevel@tonic-gate 	if (nsop != NULL)
1722*0Sstevel@tonic-gate 		*nsop = nso;
1723*0Sstevel@tonic-gate 
1724*0Sstevel@tonic-gate 	return (0);
1725*0Sstevel@tonic-gate 
1726*0Sstevel@tonic-gate 
1727*0Sstevel@tonic-gate eproto_disc_unl:
1728*0Sstevel@tonic-gate 	error = EPROTO;
1729*0Sstevel@tonic-gate e_disc_unl:
1730*0Sstevel@tonic-gate 	eprintsoline(so, error);
1731*0Sstevel@tonic-gate 	goto disconnect_unlocked;
1732*0Sstevel@tonic-gate 
1733*0Sstevel@tonic-gate pr_disc_vp_unl:
1734*0Sstevel@tonic-gate 	eprintsoline(so, error);
1735*0Sstevel@tonic-gate disconnect_vp_unlocked:
1736*0Sstevel@tonic-gate 	(void) VOP_CLOSE(nvp, 0, 1, 0, CRED());
1737*0Sstevel@tonic-gate 	VN_RELE(nvp);
1738*0Sstevel@tonic-gate disconnect_unlocked:
1739*0Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, 0);
1740*0Sstevel@tonic-gate 	return (error);
1741*0Sstevel@tonic-gate 
1742*0Sstevel@tonic-gate pr_disc_vp:
1743*0Sstevel@tonic-gate 	eprintsoline(so, error);
1744*0Sstevel@tonic-gate disconnect_vp:
1745*0Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
1746*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
1747*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
1748*0Sstevel@tonic-gate 	(void) VOP_CLOSE(nvp, 0, 1, 0, CRED());
1749*0Sstevel@tonic-gate 	VN_RELE(nvp);
1750*0Sstevel@tonic-gate 	return (error);
1751*0Sstevel@tonic-gate 
1752*0Sstevel@tonic-gate conn_bad:	/* Note: SunOS 4/BSD unconditionally returns EINVAL here */
1753*0Sstevel@tonic-gate 	error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
1754*0Sstevel@tonic-gate 	    ? EOPNOTSUPP : EINVAL;
1755*0Sstevel@tonic-gate e_bad:
1756*0Sstevel@tonic-gate 	eprintsoline(so, error);
1757*0Sstevel@tonic-gate 	return (error);
1758*0Sstevel@tonic-gate }
1759*0Sstevel@tonic-gate 
1760*0Sstevel@tonic-gate /*
1761*0Sstevel@tonic-gate  * connect a socket.
1762*0Sstevel@tonic-gate  *
1763*0Sstevel@tonic-gate  * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
1764*0Sstevel@tonic-gate  * unconnect (by specifying a null address).
1765*0Sstevel@tonic-gate  */
1766*0Sstevel@tonic-gate int
1767*0Sstevel@tonic-gate sotpi_connect(struct sonode *so,
1768*0Sstevel@tonic-gate 	const struct sockaddr *name,
1769*0Sstevel@tonic-gate 	socklen_t namelen,
1770*0Sstevel@tonic-gate 	int fflag,
1771*0Sstevel@tonic-gate 	int flags)
1772*0Sstevel@tonic-gate {
1773*0Sstevel@tonic-gate 	struct T_conn_req	conn_req;
1774*0Sstevel@tonic-gate 	int			error = 0;
1775*0Sstevel@tonic-gate 	mblk_t			*mp;
1776*0Sstevel@tonic-gate 	void			*src;
1777*0Sstevel@tonic-gate 	socklen_t		srclen;
1778*0Sstevel@tonic-gate 	void			*addr;
1779*0Sstevel@tonic-gate 	socklen_t		addrlen;
1780*0Sstevel@tonic-gate 	boolean_t		need_unlock;
1781*0Sstevel@tonic-gate 
1782*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
1783*0Sstevel@tonic-gate 		so, name, namelen, fflag, flags,
1784*0Sstevel@tonic-gate 		pr_state(so->so_state, so->so_mode)));
1785*0Sstevel@tonic-gate 
1786*0Sstevel@tonic-gate 	/*
1787*0Sstevel@tonic-gate 	 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
1788*0Sstevel@tonic-gate 	 * avoid sleeping for memory with SOLOCKED held.
1789*0Sstevel@tonic-gate 	 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen
1790*0Sstevel@tonic-gate 	 * + sizeof (struct T_opthdr).
1791*0Sstevel@tonic-gate 	 * (the AF_UNIX so_ux_addr_xlate() does not make the address
1792*0Sstevel@tonic-gate 	 * exceed so_faddr_maxlen).
1793*0Sstevel@tonic-gate 	 */
1794*0Sstevel@tonic-gate 	mp = soallocproto(sizeof (struct T_conn_req) +
1795*0Sstevel@tonic-gate 	    2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR);
1796*0Sstevel@tonic-gate 	if (mp == NULL) {
1797*0Sstevel@tonic-gate 		/*
1798*0Sstevel@tonic-gate 		 * Connect can not fail with ENOBUFS. A signal was
1799*0Sstevel@tonic-gate 		 * caught so return EINTR.
1800*0Sstevel@tonic-gate 		 */
1801*0Sstevel@tonic-gate 		error = EINTR;
1802*0Sstevel@tonic-gate 		eprintsoline(so, error);
1803*0Sstevel@tonic-gate 		return (error);
1804*0Sstevel@tonic-gate 	}
1805*0Sstevel@tonic-gate 
1806*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
1807*0Sstevel@tonic-gate 	/*
1808*0Sstevel@tonic-gate 	 * Make sure that there is a preallocated unbind_req
1809*0Sstevel@tonic-gate 	 * message before any binding. This message allocated when
1810*0Sstevel@tonic-gate 	 * the socket is created  but it might be have been
1811*0Sstevel@tonic-gate 	 * consumed.
1812*0Sstevel@tonic-gate 	 */
1813*0Sstevel@tonic-gate 	if (so->so_unbind_mp == NULL) {
1814*0Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
1815*0Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
1816*0Sstevel@tonic-gate 		so->so_unbind_mp =
1817*0Sstevel@tonic-gate 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR);
1818*0Sstevel@tonic-gate 		if (so->so_unbind_mp == NULL) {
1819*0Sstevel@tonic-gate 			error = EINTR;
1820*0Sstevel@tonic-gate 			need_unlock = B_FALSE;
1821*0Sstevel@tonic-gate 			goto done;
1822*0Sstevel@tonic-gate 		}
1823*0Sstevel@tonic-gate 	}
1824*0Sstevel@tonic-gate 
1825*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
1826*0Sstevel@tonic-gate 	need_unlock = B_TRUE;
1827*0Sstevel@tonic-gate 
1828*0Sstevel@tonic-gate 	/*
1829*0Sstevel@tonic-gate 	 * Can't have done a listen before connecting.
1830*0Sstevel@tonic-gate 	 */
1831*0Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN) {
1832*0Sstevel@tonic-gate 		error = EOPNOTSUPP;
1833*0Sstevel@tonic-gate 		goto done;
1834*0Sstevel@tonic-gate 	}
1835*0Sstevel@tonic-gate 
1836*0Sstevel@tonic-gate 	/*
1837*0Sstevel@tonic-gate 	 * Must be bound with the transport
1838*0Sstevel@tonic-gate 	 */
1839*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
1840*0Sstevel@tonic-gate 		if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1841*0Sstevel@tonic-gate 		    /*CONSTCOND*/
1842*0Sstevel@tonic-gate 		    so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
1843*0Sstevel@tonic-gate 			/*
1844*0Sstevel@tonic-gate 			 * Optimization for AF_INET{,6} transports
1845*0Sstevel@tonic-gate 			 * that can handle a T_CONN_REQ without being bound.
1846*0Sstevel@tonic-gate 			 */
1847*0Sstevel@tonic-gate 			so_automatic_bind(so);
1848*0Sstevel@tonic-gate 		} else {
1849*0Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
1850*0Sstevel@tonic-gate 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
1851*0Sstevel@tonic-gate 			if (error)
1852*0Sstevel@tonic-gate 				goto done;
1853*0Sstevel@tonic-gate 		}
1854*0Sstevel@tonic-gate 		ASSERT(so->so_state & SS_ISBOUND);
1855*0Sstevel@tonic-gate 		flags |= _SOCONNECT_DID_BIND;
1856*0Sstevel@tonic-gate 	}
1857*0Sstevel@tonic-gate 
1858*0Sstevel@tonic-gate 	/*
1859*0Sstevel@tonic-gate 	 * Handle a connect to a name parameter of type AF_UNSPEC like a
1860*0Sstevel@tonic-gate 	 * connect to a null address. This is the portable method to
1861*0Sstevel@tonic-gate 	 * unconnect a socket.
1862*0Sstevel@tonic-gate 	 */
1863*0Sstevel@tonic-gate 	if ((namelen >= sizeof (sa_family_t)) &&
1864*0Sstevel@tonic-gate 	    (name->sa_family == AF_UNSPEC)) {
1865*0Sstevel@tonic-gate 		name = NULL;
1866*0Sstevel@tonic-gate 		namelen = 0;
1867*0Sstevel@tonic-gate 	}
1868*0Sstevel@tonic-gate 
1869*0Sstevel@tonic-gate 	/*
1870*0Sstevel@tonic-gate 	 * Check that we are not already connected.
1871*0Sstevel@tonic-gate 	 * A connection-oriented socket cannot be reconnected.
1872*0Sstevel@tonic-gate 	 * A connected connection-less socket can be
1873*0Sstevel@tonic-gate 	 * - connected to a different address by a subsequent connect
1874*0Sstevel@tonic-gate 	 * - "unconnected" by a connect to the NULL address
1875*0Sstevel@tonic-gate 	 */
1876*0Sstevel@tonic-gate 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
1877*0Sstevel@tonic-gate 		ASSERT(!(flags & _SOCONNECT_DID_BIND));
1878*0Sstevel@tonic-gate 		if (so->so_mode & SM_CONNREQUIRED) {
1879*0Sstevel@tonic-gate 			/* Connection-oriented socket */
1880*0Sstevel@tonic-gate 			error = so->so_state & SS_ISCONNECTED ?
1881*0Sstevel@tonic-gate 			    EISCONN : EALREADY;
1882*0Sstevel@tonic-gate 			goto done;
1883*0Sstevel@tonic-gate 		}
1884*0Sstevel@tonic-gate 		/* Connection-less socket */
1885*0Sstevel@tonic-gate 		if (name == NULL) {
1886*0Sstevel@tonic-gate 			/*
1887*0Sstevel@tonic-gate 			 * Remove the connected state and clear SO_DGRAM_ERRIND
1888*0Sstevel@tonic-gate 			 * since it was set when the socket was connected.
1889*0Sstevel@tonic-gate 			 * If this is UDP also send down a T_DISCON_REQ.
1890*0Sstevel@tonic-gate 			 */
1891*0Sstevel@tonic-gate 			int val;
1892*0Sstevel@tonic-gate 
1893*0Sstevel@tonic-gate 			if ((so->so_family == AF_INET ||
1894*0Sstevel@tonic-gate 				so->so_family == AF_INET6) &&
1895*0Sstevel@tonic-gate 			    (so->so_type == SOCK_DGRAM ||
1896*0Sstevel@tonic-gate 				so->so_type == SOCK_RAW) &&
1897*0Sstevel@tonic-gate 			    /*CONSTCOND*/
1898*0Sstevel@tonic-gate 			    !soconnect_tpi_udp) {
1899*0Sstevel@tonic-gate 				/* XXX What about implicitly unbinding here? */
1900*0Sstevel@tonic-gate 				error = sodisconnect(so, -1,
1901*0Sstevel@tonic-gate 						_SODISCONNECT_LOCK_HELD);
1902*0Sstevel@tonic-gate 			} else {
1903*0Sstevel@tonic-gate 				so->so_state &=
1904*0Sstevel@tonic-gate 				    ~(SS_ISCONNECTED | SS_ISCONNECTING |
1905*0Sstevel@tonic-gate 				    SS_FADDR_VALID);
1906*0Sstevel@tonic-gate 				so->so_faddr_len = 0;
1907*0Sstevel@tonic-gate 			}
1908*0Sstevel@tonic-gate 
1909*0Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
1910*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
1911*0Sstevel@tonic-gate 
1912*0Sstevel@tonic-gate 			val = 0;
1913*0Sstevel@tonic-gate 			(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
1914*0Sstevel@tonic-gate 					&val, (t_uscalar_t)sizeof (val));
1915*0Sstevel@tonic-gate 
1916*0Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
1917*0Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
1918*0Sstevel@tonic-gate 			goto done;
1919*0Sstevel@tonic-gate 		}
1920*0Sstevel@tonic-gate 	}
1921*0Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ISBOUND);
1922*0Sstevel@tonic-gate 
1923*0Sstevel@tonic-gate 	if (name == NULL || namelen == 0) {
1924*0Sstevel@tonic-gate 		error = EINVAL;
1925*0Sstevel@tonic-gate 		goto done;
1926*0Sstevel@tonic-gate 	}
1927*0Sstevel@tonic-gate 	/*
1928*0Sstevel@tonic-gate 	 * Mark the socket if so_faddr_sa represents the transport level
1929*0Sstevel@tonic-gate 	 * address.
1930*0Sstevel@tonic-gate 	 */
1931*0Sstevel@tonic-gate 	if (flags & _SOCONNECT_NOXLATE) {
1932*0Sstevel@tonic-gate 		struct sockaddr_ux	*soaddr_ux;
1933*0Sstevel@tonic-gate 
1934*0Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
1935*0Sstevel@tonic-gate 		if (namelen != sizeof (struct sockaddr_ux)) {
1936*0Sstevel@tonic-gate 			error = EINVAL;
1937*0Sstevel@tonic-gate 			goto done;
1938*0Sstevel@tonic-gate 		}
1939*0Sstevel@tonic-gate 		soaddr_ux = (struct sockaddr_ux *)name;
1940*0Sstevel@tonic-gate 		name = (struct sockaddr *)&soaddr_ux->sou_addr;
1941*0Sstevel@tonic-gate 		namelen = sizeof (soaddr_ux->sou_addr);
1942*0Sstevel@tonic-gate 		so->so_state |= SS_FADDR_NOXLATE;
1943*0Sstevel@tonic-gate 	}
1944*0Sstevel@tonic-gate 
1945*0Sstevel@tonic-gate 	/*
1946*0Sstevel@tonic-gate 	 * Length and family checks.
1947*0Sstevel@tonic-gate 	 */
1948*0Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
1949*0Sstevel@tonic-gate 	if (error)
1950*0Sstevel@tonic-gate 		goto bad;
1951*0Sstevel@tonic-gate 
1952*0Sstevel@tonic-gate 	/*
1953*0Sstevel@tonic-gate 	 * Save foreign address. Needed for AF_UNIX as well as
1954*0Sstevel@tonic-gate 	 * transport providers that do not support TI_GETPEERNAME.
1955*0Sstevel@tonic-gate 	 * Also used for cached foreign address for TCP and UDP.
1956*0Sstevel@tonic-gate 	 */
1957*0Sstevel@tonic-gate 	if (namelen > (t_uscalar_t)so->so_faddr_maxlen) {
1958*0Sstevel@tonic-gate 		error = EINVAL;
1959*0Sstevel@tonic-gate 		goto done;
1960*0Sstevel@tonic-gate 	}
1961*0Sstevel@tonic-gate 	so->so_faddr_len = (socklen_t)namelen;
1962*0Sstevel@tonic-gate 	ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
1963*0Sstevel@tonic-gate 	bcopy(name, so->so_faddr_sa, namelen);
1964*0Sstevel@tonic-gate 	so->so_state |= SS_FADDR_VALID;
1965*0Sstevel@tonic-gate 
1966*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
1967*0Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
1968*0Sstevel@tonic-gate 			/*
1969*0Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
1970*0Sstevel@tonic-gate 			 * pass any (transport internal) source address.
1971*0Sstevel@tonic-gate 			 */
1972*0Sstevel@tonic-gate 			addr = so->so_faddr_sa;
1973*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_faddr_len;
1974*0Sstevel@tonic-gate 			src = NULL;
1975*0Sstevel@tonic-gate 			srclen = 0;
1976*0Sstevel@tonic-gate 		} else {
1977*0Sstevel@tonic-gate 			/*
1978*0Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
1979*0Sstevel@tonic-gate 			 * and translate the remote address.
1980*0Sstevel@tonic-gate 			 * Holding so_lock thus so_laddr_sa can not change.
1981*0Sstevel@tonic-gate 			 */
1982*0Sstevel@tonic-gate 			src = so->so_laddr_sa;
1983*0Sstevel@tonic-gate 			srclen = (t_uscalar_t)so->so_laddr_len;
1984*0Sstevel@tonic-gate 			dprintso(so, 1,
1985*0Sstevel@tonic-gate 				("sotpi_connect UNIX: srclen %d, src %p\n",
1986*0Sstevel@tonic-gate 				srclen, src));
1987*0Sstevel@tonic-gate 			error = so_ux_addr_xlate(so,
1988*0Sstevel@tonic-gate 				so->so_faddr_sa, (socklen_t)so->so_faddr_len,
1989*0Sstevel@tonic-gate 				(flags & _SOCONNECT_XPG4_2),
1990*0Sstevel@tonic-gate 				&addr, &addrlen);
1991*0Sstevel@tonic-gate 			if (error)
1992*0Sstevel@tonic-gate 				goto bad;
1993*0Sstevel@tonic-gate 		}
1994*0Sstevel@tonic-gate 	} else {
1995*0Sstevel@tonic-gate 		addr = so->so_faddr_sa;
1996*0Sstevel@tonic-gate 		addrlen = (t_uscalar_t)so->so_faddr_len;
1997*0Sstevel@tonic-gate 		src = NULL;
1998*0Sstevel@tonic-gate 		srclen = 0;
1999*0Sstevel@tonic-gate 	}
2000*0Sstevel@tonic-gate 	/*
2001*0Sstevel@tonic-gate 	 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
2002*0Sstevel@tonic-gate 	 * option which asks the transport provider to send T_UDERR_IND
2003*0Sstevel@tonic-gate 	 * messages. These T_UDERR_IND messages are used to return connected
2004*0Sstevel@tonic-gate 	 * style errors (e.g. ECONNRESET) for connected datagram sockets.
2005*0Sstevel@tonic-gate 	 *
2006*0Sstevel@tonic-gate 	 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
2007*0Sstevel@tonic-gate 	 * we send down a T_CONN_REQ. This is needed to let the
2008*0Sstevel@tonic-gate 	 * transport assign a local address that is consistent with
2009*0Sstevel@tonic-gate 	 * the remote address. Applications depend on a getsockname()
2010*0Sstevel@tonic-gate 	 * after a connect() to retrieve the "source" IP address for
2011*0Sstevel@tonic-gate 	 * the connected socket.  Invalidate the cached local address
2012*0Sstevel@tonic-gate 	 * to force getsockname() to enquire of the transport.
2013*0Sstevel@tonic-gate 	 */
2014*0Sstevel@tonic-gate 	if (!(so->so_mode & SM_CONNREQUIRED)) {
2015*0Sstevel@tonic-gate 		/*
2016*0Sstevel@tonic-gate 		 * Datagram socket.
2017*0Sstevel@tonic-gate 		 */
2018*0Sstevel@tonic-gate 		int32_t val;
2019*0Sstevel@tonic-gate 
2020*0Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
2021*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2022*0Sstevel@tonic-gate 
2023*0Sstevel@tonic-gate 		val = 1;
2024*0Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
2025*0Sstevel@tonic-gate 					&val, (t_uscalar_t)sizeof (val));
2026*0Sstevel@tonic-gate 
2027*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2028*0Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
2029*0Sstevel@tonic-gate 		if ((so->so_family != AF_INET && so->so_family != AF_INET6) ||
2030*0Sstevel@tonic-gate 		    (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) ||
2031*0Sstevel@tonic-gate 		    soconnect_tpi_udp) {
2032*0Sstevel@tonic-gate 			soisconnected(so);
2033*0Sstevel@tonic-gate 			goto done;
2034*0Sstevel@tonic-gate 		}
2035*0Sstevel@tonic-gate 		/*
2036*0Sstevel@tonic-gate 		 * Send down T_CONN_REQ etc.
2037*0Sstevel@tonic-gate 		 * Clear fflag to avoid returning EWOULDBLOCK.
2038*0Sstevel@tonic-gate 		 */
2039*0Sstevel@tonic-gate 		fflag = 0;
2040*0Sstevel@tonic-gate 		ASSERT(so->so_family != AF_UNIX);
2041*0Sstevel@tonic-gate 		so->so_state &= ~SS_LADDR_VALID;
2042*0Sstevel@tonic-gate 	} else if (so->so_laddr_len != 0) {
2043*0Sstevel@tonic-gate 		/*
2044*0Sstevel@tonic-gate 		 * If the local address or port was "any" then it may be
2045*0Sstevel@tonic-gate 		 * changed by the transport as a result of the
2046*0Sstevel@tonic-gate 		 * connect.  Invalidate the cached version if we have one.
2047*0Sstevel@tonic-gate 		 */
2048*0Sstevel@tonic-gate 		switch (so->so_family) {
2049*0Sstevel@tonic-gate 		case AF_INET:
2050*0Sstevel@tonic-gate 			ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t));
2051*0Sstevel@tonic-gate 			if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr ==
2052*0Sstevel@tonic-gate 			    INADDR_ANY ||
2053*0Sstevel@tonic-gate 			    ((sin_t *)so->so_laddr_sa)->sin_port == 0)
2054*0Sstevel@tonic-gate 				so->so_state &= ~SS_LADDR_VALID;
2055*0Sstevel@tonic-gate 			break;
2056*0Sstevel@tonic-gate 
2057*0Sstevel@tonic-gate 		case AF_INET6:
2058*0Sstevel@tonic-gate 			ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t));
2059*0Sstevel@tonic-gate 			if (IN6_IS_ADDR_UNSPECIFIED(
2060*0Sstevel@tonic-gate 			    &((sin6_t *)so->so_laddr_sa) ->sin6_addr) ||
2061*0Sstevel@tonic-gate 			    IN6_IS_ADDR_V4MAPPED_ANY(
2062*0Sstevel@tonic-gate 			    &((sin6_t *)so->so_laddr_sa)->sin6_addr) ||
2063*0Sstevel@tonic-gate 			    ((sin6_t *)so->so_laddr_sa)->sin6_port == 0)
2064*0Sstevel@tonic-gate 				    so->so_state &= ~SS_LADDR_VALID;
2065*0Sstevel@tonic-gate 			break;
2066*0Sstevel@tonic-gate 
2067*0Sstevel@tonic-gate 		default:
2068*0Sstevel@tonic-gate 			break;
2069*0Sstevel@tonic-gate 		}
2070*0Sstevel@tonic-gate 	}
2071*0Sstevel@tonic-gate 
2072*0Sstevel@tonic-gate 	/*
2073*0Sstevel@tonic-gate 	 * Check for failure of an earlier call
2074*0Sstevel@tonic-gate 	 */
2075*0Sstevel@tonic-gate 	if (so->so_error != 0)
2076*0Sstevel@tonic-gate 		goto so_bad;
2077*0Sstevel@tonic-gate 
2078*0Sstevel@tonic-gate 	/*
2079*0Sstevel@tonic-gate 	 * Send down T_CONN_REQ. Message was allocated above.
2080*0Sstevel@tonic-gate 	 */
2081*0Sstevel@tonic-gate 	conn_req.PRIM_type = T_CONN_REQ;
2082*0Sstevel@tonic-gate 	conn_req.DEST_length = addrlen;
2083*0Sstevel@tonic-gate 	conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req);
2084*0Sstevel@tonic-gate 	if (srclen == 0) {
2085*0Sstevel@tonic-gate 		conn_req.OPT_length = 0;
2086*0Sstevel@tonic-gate 		conn_req.OPT_offset = 0;
2087*0Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
2088*0Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
2089*0Sstevel@tonic-gate 	} else {
2090*0Sstevel@tonic-gate 		/*
2091*0Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
2092*0Sstevel@tonic-gate 		 * address option.
2093*0Sstevel@tonic-gate 		 */
2094*0Sstevel@tonic-gate 		struct T_opthdr toh;
2095*0Sstevel@tonic-gate 
2096*0Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
2097*0Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
2098*0Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
2099*0Sstevel@tonic-gate 		toh.status = 0;
2100*0Sstevel@tonic-gate 		conn_req.OPT_length =
2101*0Sstevel@tonic-gate 			(t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen));
2102*0Sstevel@tonic-gate 		conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) +
2103*0Sstevel@tonic-gate 			_TPI_ALIGN_TOPT(addrlen));
2104*0Sstevel@tonic-gate 
2105*0Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
2106*0Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
2107*0Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2108*0Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
2109*0Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
2110*0Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2111*0Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2112*0Sstevel@tonic-gate 	}
2113*0Sstevel@tonic-gate 	/*
2114*0Sstevel@tonic-gate 	 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
2115*0Sstevel@tonic-gate 	 * in order to have the right state when the T_CONN_CON shows up.
2116*0Sstevel@tonic-gate 	 */
2117*0Sstevel@tonic-gate 	soisconnecting(so);
2118*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2119*0Sstevel@tonic-gate 
2120*0Sstevel@tonic-gate #ifdef C2_AUDIT
2121*0Sstevel@tonic-gate 	if (audit_active)
2122*0Sstevel@tonic-gate 		audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0);
2123*0Sstevel@tonic-gate #endif /* C2_AUDIT */
2124*0Sstevel@tonic-gate 
2125*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2126*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2127*0Sstevel@tonic-gate 	mp = NULL;
2128*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2129*0Sstevel@tonic-gate 	if (error != 0)
2130*0Sstevel@tonic-gate 		goto bad;
2131*0Sstevel@tonic-gate 
2132*0Sstevel@tonic-gate 	if ((error = sowaitokack(so, T_CONN_REQ)) != 0)
2133*0Sstevel@tonic-gate 		goto bad;
2134*0Sstevel@tonic-gate 
2135*0Sstevel@tonic-gate 	/* Allow other threads to access the socket */
2136*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
2137*0Sstevel@tonic-gate 	need_unlock = B_FALSE;
2138*0Sstevel@tonic-gate 
2139*0Sstevel@tonic-gate 	/*
2140*0Sstevel@tonic-gate 	 * Wait until we get a T_CONN_CON or an error
2141*0Sstevel@tonic-gate 	 */
2142*0Sstevel@tonic-gate 	if ((error = sowaitconnected(so, fflag, 0)) != 0) {
2143*0Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
2144*0Sstevel@tonic-gate 		need_unlock = B_TRUE;
2145*0Sstevel@tonic-gate 	}
2146*0Sstevel@tonic-gate 
2147*0Sstevel@tonic-gate done:
2148*0Sstevel@tonic-gate 	freemsg(mp);
2149*0Sstevel@tonic-gate 	switch (error) {
2150*0Sstevel@tonic-gate 	case EINPROGRESS:
2151*0Sstevel@tonic-gate 	case EALREADY:
2152*0Sstevel@tonic-gate 	case EISCONN:
2153*0Sstevel@tonic-gate 	case EINTR:
2154*0Sstevel@tonic-gate 		/* Non-fatal errors */
2155*0Sstevel@tonic-gate 		so->so_state &= ~SS_LADDR_VALID;
2156*0Sstevel@tonic-gate 		/* FALLTHRU */
2157*0Sstevel@tonic-gate 	case 0:
2158*0Sstevel@tonic-gate 		break;
2159*0Sstevel@tonic-gate 
2160*0Sstevel@tonic-gate 	case EHOSTUNREACH:
2161*0Sstevel@tonic-gate 		if (flags & _SOCONNECT_XPG4_2) {
2162*0Sstevel@tonic-gate 			/*
2163*0Sstevel@tonic-gate 			 * X/Open specification contains a requirement that
2164*0Sstevel@tonic-gate 			 * ENETUNREACH be returned but does not require
2165*0Sstevel@tonic-gate 			 * EHOSTUNREACH. In order to keep the test suite
2166*0Sstevel@tonic-gate 			 * happy we mess with the errno here.
2167*0Sstevel@tonic-gate 			 */
2168*0Sstevel@tonic-gate 			error = ENETUNREACH;
2169*0Sstevel@tonic-gate 		}
2170*0Sstevel@tonic-gate 		/* FALLTHRU */
2171*0Sstevel@tonic-gate 
2172*0Sstevel@tonic-gate 	default:
2173*0Sstevel@tonic-gate 		ASSERT(need_unlock);
2174*0Sstevel@tonic-gate 		/*
2175*0Sstevel@tonic-gate 		 * Fatal errors: clear SS_ISCONNECTING in case it was set,
2176*0Sstevel@tonic-gate 		 * and invalidate local-address cache
2177*0Sstevel@tonic-gate 		 */
2178*0Sstevel@tonic-gate 		so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID);
2179*0Sstevel@tonic-gate 		/* A discon_ind might have already unbound us */
2180*0Sstevel@tonic-gate 		if ((flags & _SOCONNECT_DID_BIND) &&
2181*0Sstevel@tonic-gate 		    (so->so_state & SS_ISBOUND)) {
2182*0Sstevel@tonic-gate 			int err;
2183*0Sstevel@tonic-gate 
2184*0Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
2185*0Sstevel@tonic-gate 			/* LINTED - statement has no conseq */
2186*0Sstevel@tonic-gate 			if (err) {
2187*0Sstevel@tonic-gate 				eprintsoline(so, err);
2188*0Sstevel@tonic-gate 			}
2189*0Sstevel@tonic-gate 		}
2190*0Sstevel@tonic-gate 		break;
2191*0Sstevel@tonic-gate 	}
2192*0Sstevel@tonic-gate 	if (need_unlock)
2193*0Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
2194*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2195*0Sstevel@tonic-gate 	return (error);
2196*0Sstevel@tonic-gate 
2197*0Sstevel@tonic-gate so_bad:	error = sogeterr(so);
2198*0Sstevel@tonic-gate bad:	eprintsoline(so, error);
2199*0Sstevel@tonic-gate 	goto done;
2200*0Sstevel@tonic-gate }
2201*0Sstevel@tonic-gate 
2202*0Sstevel@tonic-gate int
2203*0Sstevel@tonic-gate sotpi_shutdown(struct sonode *so, int how)
2204*0Sstevel@tonic-gate {
2205*0Sstevel@tonic-gate 	struct T_ordrel_req	ordrel_req;
2206*0Sstevel@tonic-gate 	mblk_t			*mp;
2207*0Sstevel@tonic-gate 	uint_t			old_state, state_change;
2208*0Sstevel@tonic-gate 	int			error = 0;
2209*0Sstevel@tonic-gate 
2210*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
2211*0Sstevel@tonic-gate 		so, how, pr_state(so->so_state, so->so_mode)));
2212*0Sstevel@tonic-gate 
2213*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2214*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
2215*0Sstevel@tonic-gate 
2216*0Sstevel@tonic-gate 	/*
2217*0Sstevel@tonic-gate 	 * SunOS 4.X has no check for datagram sockets.
2218*0Sstevel@tonic-gate 	 * 5.X checks that it is connected (ENOTCONN)
2219*0Sstevel@tonic-gate 	 * X/Open requires that we check the connected state.
2220*0Sstevel@tonic-gate 	 */
2221*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
2222*0Sstevel@tonic-gate 		if (!xnet_skip_checks) {
2223*0Sstevel@tonic-gate 			error = ENOTCONN;
2224*0Sstevel@tonic-gate 			if (xnet_check_print) {
2225*0Sstevel@tonic-gate 				printf("sockfs: X/Open shutdown check "
2226*0Sstevel@tonic-gate 					"caused ENOTCONN\n");
2227*0Sstevel@tonic-gate 			}
2228*0Sstevel@tonic-gate 		}
2229*0Sstevel@tonic-gate 		goto done;
2230*0Sstevel@tonic-gate 	}
2231*0Sstevel@tonic-gate 	/*
2232*0Sstevel@tonic-gate 	 * Record the current state and then perform any state changes.
2233*0Sstevel@tonic-gate 	 * Then use the difference between the old and new states to
2234*0Sstevel@tonic-gate 	 * determine which messages need to be sent.
2235*0Sstevel@tonic-gate 	 * This prevents e.g. duplicate T_ORDREL_REQ when there are
2236*0Sstevel@tonic-gate 	 * duplicate calls to shutdown().
2237*0Sstevel@tonic-gate 	 */
2238*0Sstevel@tonic-gate 	old_state = so->so_state;
2239*0Sstevel@tonic-gate 
2240*0Sstevel@tonic-gate 	switch (how) {
2241*0Sstevel@tonic-gate 	case 0:
2242*0Sstevel@tonic-gate 		socantrcvmore(so);
2243*0Sstevel@tonic-gate 		break;
2244*0Sstevel@tonic-gate 	case 1:
2245*0Sstevel@tonic-gate 		socantsendmore(so);
2246*0Sstevel@tonic-gate 		break;
2247*0Sstevel@tonic-gate 	case 2:
2248*0Sstevel@tonic-gate 		socantsendmore(so);
2249*0Sstevel@tonic-gate 		socantrcvmore(so);
2250*0Sstevel@tonic-gate 		break;
2251*0Sstevel@tonic-gate 	default:
2252*0Sstevel@tonic-gate 		error = EINVAL;
2253*0Sstevel@tonic-gate 		goto done;
2254*0Sstevel@tonic-gate 	}
2255*0Sstevel@tonic-gate 
2256*0Sstevel@tonic-gate 	/*
2257*0Sstevel@tonic-gate 	 * Assumes that the SS_CANT* flags are never cleared in the above code.
2258*0Sstevel@tonic-gate 	 */
2259*0Sstevel@tonic-gate 	state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) -
2260*0Sstevel@tonic-gate 		(old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE));
2261*0Sstevel@tonic-gate 	ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0);
2262*0Sstevel@tonic-gate 
2263*0Sstevel@tonic-gate 	switch (state_change) {
2264*0Sstevel@tonic-gate 	case 0:
2265*0Sstevel@tonic-gate 		dprintso(so, 1,
2266*0Sstevel@tonic-gate 		    ("sotpi_shutdown: nothing to send in state 0x%x\n",
2267*0Sstevel@tonic-gate 		    so->so_state));
2268*0Sstevel@tonic-gate 		goto done;
2269*0Sstevel@tonic-gate 
2270*0Sstevel@tonic-gate 	case SS_CANTRCVMORE:
2271*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2272*0Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
2273*0Sstevel@tonic-gate 		/*
2274*0Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
2275*0Sstevel@tonic-gate 		 * pollwakeups, and signals.
2276*0Sstevel@tonic-gate 		 */
2277*0Sstevel@tonic-gate 		/*
2278*0Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
2279*0Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2280*0Sstevel@tonic-gate 		 */
2281*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2282*0Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
2283*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2284*0Sstevel@tonic-gate 
2285*0Sstevel@tonic-gate 		/* Flush read side queue */
2286*0Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
2287*0Sstevel@tonic-gate 
2288*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2289*0Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
2290*0Sstevel@tonic-gate 		break;
2291*0Sstevel@tonic-gate 
2292*0Sstevel@tonic-gate 	case SS_CANTSENDMORE:
2293*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2294*0Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2295*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2296*0Sstevel@tonic-gate 		break;
2297*0Sstevel@tonic-gate 
2298*0Sstevel@tonic-gate 	case SS_CANTSENDMORE|SS_CANTRCVMORE:
2299*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2300*0Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2301*0Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
2302*0Sstevel@tonic-gate 		/*
2303*0Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
2304*0Sstevel@tonic-gate 		 * pollwakeups, and signals.
2305*0Sstevel@tonic-gate 		 */
2306*0Sstevel@tonic-gate 		/*
2307*0Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
2308*0Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2309*0Sstevel@tonic-gate 		 */
2310*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2311*0Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
2312*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2313*0Sstevel@tonic-gate 
2314*0Sstevel@tonic-gate 		/* Flush read side queue */
2315*0Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
2316*0Sstevel@tonic-gate 
2317*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2318*0Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
2319*0Sstevel@tonic-gate 		break;
2320*0Sstevel@tonic-gate 	}
2321*0Sstevel@tonic-gate 
2322*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
2323*0Sstevel@tonic-gate 
2324*0Sstevel@tonic-gate 	/*
2325*0Sstevel@tonic-gate 	 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
2326*0Sstevel@tonic-gate 	 * was set due to this call and the new state has both of them set:
2327*0Sstevel@tonic-gate 	 *	Send the AF_UNIX close indication
2328*0Sstevel@tonic-gate 	 *	For T_COTS send a discon_ind
2329*0Sstevel@tonic-gate 	 *
2330*0Sstevel@tonic-gate 	 * If cantsend was set due to this call:
2331*0Sstevel@tonic-gate 	 *	For T_COTSORD send an ordrel_ind
2332*0Sstevel@tonic-gate 	 *
2333*0Sstevel@tonic-gate 	 * Note that for T_CLTS there is no message sent here.
2334*0Sstevel@tonic-gate 	 */
2335*0Sstevel@tonic-gate 	if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
2336*0Sstevel@tonic-gate 	    (SS_CANTRCVMORE|SS_CANTSENDMORE)) {
2337*0Sstevel@tonic-gate 		/*
2338*0Sstevel@tonic-gate 		 * For SunOS 4.X compatibility we tell the other end
2339*0Sstevel@tonic-gate 		 * that we are unable to receive at this point.
2340*0Sstevel@tonic-gate 		 */
2341*0Sstevel@tonic-gate 		if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS)
2342*0Sstevel@tonic-gate 			so_unix_close(so);
2343*0Sstevel@tonic-gate 
2344*0Sstevel@tonic-gate 		if (so->so_serv_type == T_COTS)
2345*0Sstevel@tonic-gate 			error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
2346*0Sstevel@tonic-gate 	}
2347*0Sstevel@tonic-gate 	if ((state_change & SS_CANTSENDMORE) &&
2348*0Sstevel@tonic-gate 	    (so->so_serv_type == T_COTS_ORD)) {
2349*0Sstevel@tonic-gate 		/* Send an orderly release */
2350*0Sstevel@tonic-gate 		ordrel_req.PRIM_type = T_ORDREL_REQ;
2351*0Sstevel@tonic-gate 
2352*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2353*0Sstevel@tonic-gate 		mp = soallocproto1(&ordrel_req, sizeof (ordrel_req),
2354*0Sstevel@tonic-gate 		    0, _ALLOC_SLEEP);
2355*0Sstevel@tonic-gate 		/*
2356*0Sstevel@tonic-gate 		 * Send down the T_ORDREL_REQ even if there is flow control.
2357*0Sstevel@tonic-gate 		 * This prevents shutdown from blocking.
2358*0Sstevel@tonic-gate 		 * Note that there is no T_OK_ACK for ordrel_req.
2359*0Sstevel@tonic-gate 		 */
2360*0Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2361*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2362*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2363*0Sstevel@tonic-gate 		if (error) {
2364*0Sstevel@tonic-gate 			eprintsoline(so, error);
2365*0Sstevel@tonic-gate 			goto done;
2366*0Sstevel@tonic-gate 		}
2367*0Sstevel@tonic-gate 	}
2368*0Sstevel@tonic-gate 
2369*0Sstevel@tonic-gate done:
2370*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
2371*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2372*0Sstevel@tonic-gate 	return (error);
2373*0Sstevel@tonic-gate }
2374*0Sstevel@tonic-gate 
2375*0Sstevel@tonic-gate /*
2376*0Sstevel@tonic-gate  * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
2377*0Sstevel@tonic-gate  * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
2378*0Sstevel@tonic-gate  * that we have closed.
2379*0Sstevel@tonic-gate  * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
2380*0Sstevel@tonic-gate  * T_UNITDATA_REQ containing the same option.
2381*0Sstevel@tonic-gate  *
2382*0Sstevel@tonic-gate  * For SOCK_DGRAM half-connections (somebody connected to this end
2383*0Sstevel@tonic-gate  * but this end is not connect) we don't know where to send any
2384*0Sstevel@tonic-gate  * SO_UNIX_CLOSE.
2385*0Sstevel@tonic-gate  *
2386*0Sstevel@tonic-gate  * We have to ignore stream head errors just in case there has been
2387*0Sstevel@tonic-gate  * a shutdown(output).
2388*0Sstevel@tonic-gate  * Ignore any flow control to try to get the message more quickly to the peer.
2389*0Sstevel@tonic-gate  * While locally ignoring flow control solves the problem when there
2390*0Sstevel@tonic-gate  * is only the loopback transport on the stream it would not provide
2391*0Sstevel@tonic-gate  * the correct AF_UNIX socket semantics when one or more modules have
2392*0Sstevel@tonic-gate  * been pushed.
2393*0Sstevel@tonic-gate  */
2394*0Sstevel@tonic-gate void
2395*0Sstevel@tonic-gate so_unix_close(struct sonode *so)
2396*0Sstevel@tonic-gate {
2397*0Sstevel@tonic-gate 	int		error;
2398*0Sstevel@tonic-gate 	struct T_opthdr	toh;
2399*0Sstevel@tonic-gate 	mblk_t		*mp;
2400*0Sstevel@tonic-gate 
2401*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
2402*0Sstevel@tonic-gate 
2403*0Sstevel@tonic-gate 	ASSERT(so->so_family == AF_UNIX);
2404*0Sstevel@tonic-gate 
2405*0Sstevel@tonic-gate 	if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
2406*0Sstevel@tonic-gate 	    (SS_ISCONNECTED|SS_ISBOUND))
2407*0Sstevel@tonic-gate 		return;
2408*0Sstevel@tonic-gate 
2409*0Sstevel@tonic-gate 	dprintso(so, 1, ("so_unix_close(%p) %s\n",
2410*0Sstevel@tonic-gate 		so, pr_state(so->so_state, so->so_mode)));
2411*0Sstevel@tonic-gate 
2412*0Sstevel@tonic-gate 	toh.level = SOL_SOCKET;
2413*0Sstevel@tonic-gate 	toh.name = SO_UNIX_CLOSE;
2414*0Sstevel@tonic-gate 
2415*0Sstevel@tonic-gate 	/* zero length + header */
2416*0Sstevel@tonic-gate 	toh.len = (t_uscalar_t)sizeof (struct T_opthdr);
2417*0Sstevel@tonic-gate 	toh.status = 0;
2418*0Sstevel@tonic-gate 
2419*0Sstevel@tonic-gate 	if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) {
2420*0Sstevel@tonic-gate 		struct T_optdata_req tdr;
2421*0Sstevel@tonic-gate 
2422*0Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
2423*0Sstevel@tonic-gate 		tdr.DATA_flag = 0;
2424*0Sstevel@tonic-gate 
2425*0Sstevel@tonic-gate 		tdr.OPT_length = (t_scalar_t)sizeof (toh);
2426*0Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
2427*0Sstevel@tonic-gate 
2428*0Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
2429*0Sstevel@tonic-gate 		mp = soallocproto2(&tdr, sizeof (tdr),
2430*0Sstevel@tonic-gate 		    &toh, sizeof (toh), 0, _ALLOC_SLEEP);
2431*0Sstevel@tonic-gate 	} else {
2432*0Sstevel@tonic-gate 		struct T_unitdata_req	tudr;
2433*0Sstevel@tonic-gate 		void			*addr;
2434*0Sstevel@tonic-gate 		socklen_t		addrlen;
2435*0Sstevel@tonic-gate 		void			*src;
2436*0Sstevel@tonic-gate 		socklen_t		srclen;
2437*0Sstevel@tonic-gate 		struct T_opthdr		toh2;
2438*0Sstevel@tonic-gate 		t_scalar_t		size;
2439*0Sstevel@tonic-gate 
2440*0Sstevel@tonic-gate 		/* Connecteded DGRAM socket */
2441*0Sstevel@tonic-gate 
2442*0Sstevel@tonic-gate 		/*
2443*0Sstevel@tonic-gate 		 * For AF_UNIX the destination address is translated to
2444*0Sstevel@tonic-gate 		 * an internal name and the source address is passed as
2445*0Sstevel@tonic-gate 		 * an option.
2446*0Sstevel@tonic-gate 		 */
2447*0Sstevel@tonic-gate 		/*
2448*0Sstevel@tonic-gate 		 * Length and family checks.
2449*0Sstevel@tonic-gate 		 */
2450*0Sstevel@tonic-gate 		error = so_addr_verify(so, so->so_faddr_sa,
2451*0Sstevel@tonic-gate 					(t_uscalar_t)so->so_faddr_len);
2452*0Sstevel@tonic-gate 		if (error) {
2453*0Sstevel@tonic-gate 			eprintsoline(so, error);
2454*0Sstevel@tonic-gate 			return;
2455*0Sstevel@tonic-gate 		}
2456*0Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
2457*0Sstevel@tonic-gate 			/*
2458*0Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
2459*0Sstevel@tonic-gate 			 * pass any (transport internal) source address.
2460*0Sstevel@tonic-gate 			 */
2461*0Sstevel@tonic-gate 			addr = so->so_faddr_sa;
2462*0Sstevel@tonic-gate 			addrlen = (t_uscalar_t)so->so_faddr_len;
2463*0Sstevel@tonic-gate 			src = NULL;
2464*0Sstevel@tonic-gate 			srclen = 0;
2465*0Sstevel@tonic-gate 		} else {
2466*0Sstevel@tonic-gate 			/*
2467*0Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
2468*0Sstevel@tonic-gate 			 * and translate the remote address.
2469*0Sstevel@tonic-gate 			 * Holding so_lock thus so_laddr_sa can not change.
2470*0Sstevel@tonic-gate 			 */
2471*0Sstevel@tonic-gate 			src = so->so_laddr_sa;
2472*0Sstevel@tonic-gate 			srclen = (socklen_t)so->so_laddr_len;
2473*0Sstevel@tonic-gate 			dprintso(so, 1,
2474*0Sstevel@tonic-gate 				("so_ux_close: srclen %d, src %p\n",
2475*0Sstevel@tonic-gate 				srclen, src));
2476*0Sstevel@tonic-gate 			error = so_ux_addr_xlate(so,
2477*0Sstevel@tonic-gate 				so->so_faddr_sa,
2478*0Sstevel@tonic-gate 				(socklen_t)so->so_faddr_len, 0,
2479*0Sstevel@tonic-gate 				&addr, &addrlen);
2480*0Sstevel@tonic-gate 			if (error) {
2481*0Sstevel@tonic-gate 				eprintsoline(so, error);
2482*0Sstevel@tonic-gate 				return;
2483*0Sstevel@tonic-gate 			}
2484*0Sstevel@tonic-gate 		}
2485*0Sstevel@tonic-gate 		tudr.PRIM_type = T_UNITDATA_REQ;
2486*0Sstevel@tonic-gate 		tudr.DEST_length = addrlen;
2487*0Sstevel@tonic-gate 		tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
2488*0Sstevel@tonic-gate 		if (srclen == 0) {
2489*0Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)sizeof (toh);
2490*0Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2491*0Sstevel@tonic-gate 				_TPI_ALIGN_TOPT(addrlen));
2492*0Sstevel@tonic-gate 
2493*0Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
2494*0Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
2495*0Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
2496*0Sstevel@tonic-gate 			    addr, addrlen, size, _ALLOC_SLEEP);
2497*0Sstevel@tonic-gate 			mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen);
2498*0Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
2499*0Sstevel@tonic-gate 		} else {
2500*0Sstevel@tonic-gate 			/*
2501*0Sstevel@tonic-gate 			 * There is a AF_UNIX sockaddr_un to include as a
2502*0Sstevel@tonic-gate 			 * source address option.
2503*0Sstevel@tonic-gate 			 */
2504*0Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) +
2505*0Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(srclen));
2506*0Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2507*0Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(addrlen));
2508*0Sstevel@tonic-gate 
2509*0Sstevel@tonic-gate 			toh2.level = SOL_SOCKET;
2510*0Sstevel@tonic-gate 			toh2.name = SO_SRCADDR;
2511*0Sstevel@tonic-gate 			toh2.len = (t_uscalar_t)(srclen +
2512*0Sstevel@tonic-gate 					sizeof (struct T_opthdr));
2513*0Sstevel@tonic-gate 			toh2.status = 0;
2514*0Sstevel@tonic-gate 
2515*0Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
2516*0Sstevel@tonic-gate 
2517*0Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
2518*0Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
2519*0Sstevel@tonic-gate 			    addr, addrlen, size, _ALLOC_SLEEP);
2520*0Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2521*0Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
2522*0Sstevel@tonic-gate 			soappendmsg(mp, &toh2, sizeof (toh2));
2523*0Sstevel@tonic-gate 			soappendmsg(mp, src, srclen);
2524*0Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2525*0Sstevel@tonic-gate 		}
2526*0Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2527*0Sstevel@tonic-gate 	}
2528*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2529*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2530*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2531*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2532*0Sstevel@tonic-gate }
2533*0Sstevel@tonic-gate 
2534*0Sstevel@tonic-gate /*
2535*0Sstevel@tonic-gate  * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK.
2536*0Sstevel@tonic-gate  */
2537*0Sstevel@tonic-gate int
2538*0Sstevel@tonic-gate sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags)
2539*0Sstevel@tonic-gate {
2540*0Sstevel@tonic-gate 	mblk_t		*mp, *nmp;
2541*0Sstevel@tonic-gate 	int		error;
2542*0Sstevel@tonic-gate 
2543*0Sstevel@tonic-gate 	dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags));
2544*0Sstevel@tonic-gate 
2545*0Sstevel@tonic-gate 	/*
2546*0Sstevel@tonic-gate 	 * There is never any oob data with addresses or control since
2547*0Sstevel@tonic-gate 	 * the T_EXDATA_IND does not carry any options.
2548*0Sstevel@tonic-gate 	 */
2549*0Sstevel@tonic-gate 	msg->msg_controllen = 0;
2550*0Sstevel@tonic-gate 	msg->msg_namelen = 0;
2551*0Sstevel@tonic-gate 
2552*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2553*0Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
2554*0Sstevel@tonic-gate 	if ((so->so_options & SO_OOBINLINE) ||
2555*0Sstevel@tonic-gate 	    (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) {
2556*0Sstevel@tonic-gate 		dprintso(so, 1, ("sorecvoob: inline or data consumed\n"));
2557*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2558*0Sstevel@tonic-gate 		return (EINVAL);
2559*0Sstevel@tonic-gate 	}
2560*0Sstevel@tonic-gate 	if (!(so->so_state & SS_HAVEOOBDATA)) {
2561*0Sstevel@tonic-gate 		dprintso(so, 1, ("sorecvoob: no data yet\n"));
2562*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2563*0Sstevel@tonic-gate 		return (EWOULDBLOCK);
2564*0Sstevel@tonic-gate 	}
2565*0Sstevel@tonic-gate 	ASSERT(so->so_oobmsg != NULL);
2566*0Sstevel@tonic-gate 	mp = so->so_oobmsg;
2567*0Sstevel@tonic-gate 	if (flags & MSG_PEEK) {
2568*0Sstevel@tonic-gate 		/*
2569*0Sstevel@tonic-gate 		 * Since recv* can not return ENOBUFS we can not use dupmsg.
2570*0Sstevel@tonic-gate 		 * Instead we revert to the consolidation private
2571*0Sstevel@tonic-gate 		 * allocb_wait plus bcopy.
2572*0Sstevel@tonic-gate 		 */
2573*0Sstevel@tonic-gate 		mblk_t *mp1;
2574*0Sstevel@tonic-gate 
2575*0Sstevel@tonic-gate 		mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL);
2576*0Sstevel@tonic-gate 		ASSERT(mp1);
2577*0Sstevel@tonic-gate 
2578*0Sstevel@tonic-gate 		while (mp != NULL) {
2579*0Sstevel@tonic-gate 			ssize_t size;
2580*0Sstevel@tonic-gate 
2581*0Sstevel@tonic-gate 			size = MBLKL(mp);
2582*0Sstevel@tonic-gate 			bcopy(mp->b_rptr, mp1->b_wptr, size);
2583*0Sstevel@tonic-gate 			mp1->b_wptr += size;
2584*0Sstevel@tonic-gate 			ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim);
2585*0Sstevel@tonic-gate 			mp = mp->b_cont;
2586*0Sstevel@tonic-gate 		}
2587*0Sstevel@tonic-gate 		mp = mp1;
2588*0Sstevel@tonic-gate 	} else {
2589*0Sstevel@tonic-gate 		/*
2590*0Sstevel@tonic-gate 		 * Update the state indicating that the data has been consumed.
2591*0Sstevel@tonic-gate 		 * Keep SS_OOBPEND set until data is consumed past the mark.
2592*0Sstevel@tonic-gate 		 */
2593*0Sstevel@tonic-gate 		so->so_oobmsg = NULL;
2594*0Sstevel@tonic-gate 		so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA;
2595*0Sstevel@tonic-gate 	}
2596*0Sstevel@tonic-gate 	dprintso(so, 1,
2597*0Sstevel@tonic-gate 		("after recvoob(%p): counts %d/%d state %s\n",
2598*0Sstevel@tonic-gate 		so, so->so_oobsigcnt,
2599*0Sstevel@tonic-gate 		so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
2600*0Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
2601*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2602*0Sstevel@tonic-gate 
2603*0Sstevel@tonic-gate 	error = 0;
2604*0Sstevel@tonic-gate 	nmp = mp;
2605*0Sstevel@tonic-gate 	while (nmp != NULL && uiop->uio_resid > 0) {
2606*0Sstevel@tonic-gate 		ssize_t n = MBLKL(nmp);
2607*0Sstevel@tonic-gate 
2608*0Sstevel@tonic-gate 		n = MIN(n, uiop->uio_resid);
2609*0Sstevel@tonic-gate 		if (n > 0)
2610*0Sstevel@tonic-gate 			error = uiomove(nmp->b_rptr, n,
2611*0Sstevel@tonic-gate 					UIO_READ, uiop);
2612*0Sstevel@tonic-gate 		if (error)
2613*0Sstevel@tonic-gate 			break;
2614*0Sstevel@tonic-gate 		nmp = nmp->b_cont;
2615*0Sstevel@tonic-gate 	}
2616*0Sstevel@tonic-gate 	freemsg(mp);
2617*0Sstevel@tonic-gate 	return (error);
2618*0Sstevel@tonic-gate }
2619*0Sstevel@tonic-gate 
2620*0Sstevel@tonic-gate /*
2621*0Sstevel@tonic-gate  * Called by sotpi_recvmsg when reading a non-zero amount of data.
2622*0Sstevel@tonic-gate  * In addition, the caller typically verifies that there is some
2623*0Sstevel@tonic-gate  * potential state to clear by checking
2624*0Sstevel@tonic-gate  *	if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
2625*0Sstevel@tonic-gate  * before calling this routine.
2626*0Sstevel@tonic-gate  * Note that such a check can be made without holding so_lock since
2627*0Sstevel@tonic-gate  * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
2628*0Sstevel@tonic-gate  * decrements so_oobsigcnt.
2629*0Sstevel@tonic-gate  *
2630*0Sstevel@tonic-gate  * When data is read *after* the point that all pending
2631*0Sstevel@tonic-gate  * oob data has been consumed the oob indication is cleared.
2632*0Sstevel@tonic-gate  *
2633*0Sstevel@tonic-gate  * This logic keeps select/poll returning POLLRDBAND and
2634*0Sstevel@tonic-gate  * SIOCATMARK returning true until we have read past
2635*0Sstevel@tonic-gate  * the mark.
2636*0Sstevel@tonic-gate  */
2637*0Sstevel@tonic-gate static void
2638*0Sstevel@tonic-gate sorecv_update_oobstate(struct sonode *so)
2639*0Sstevel@tonic-gate {
2640*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2641*0Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
2642*0Sstevel@tonic-gate 	dprintso(so, 1,
2643*0Sstevel@tonic-gate 		("sorecv_update_oobstate: counts %d/%d state %s\n",
2644*0Sstevel@tonic-gate 		so->so_oobsigcnt,
2645*0Sstevel@tonic-gate 		so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
2646*0Sstevel@tonic-gate 	if (so->so_oobsigcnt == 0) {
2647*0Sstevel@tonic-gate 		/* No more pending oob indications */
2648*0Sstevel@tonic-gate 		so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
2649*0Sstevel@tonic-gate 		freemsg(so->so_oobmsg);
2650*0Sstevel@tonic-gate 		so->so_oobmsg = NULL;
2651*0Sstevel@tonic-gate 	}
2652*0Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
2653*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2654*0Sstevel@tonic-gate }
2655*0Sstevel@tonic-gate 
2656*0Sstevel@tonic-gate /*
2657*0Sstevel@tonic-gate  * Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
2658*0Sstevel@tonic-gate  */
2659*0Sstevel@tonic-gate static int
2660*0Sstevel@tonic-gate nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
2661*0Sstevel@tonic-gate {
2662*0Sstevel@tonic-gate 	int	error = 0;
2663*0Sstevel@tonic-gate 	mblk_t *tmp = NULL;
2664*0Sstevel@tonic-gate 	mblk_t *pmp = NULL;
2665*0Sstevel@tonic-gate 	mblk_t *nmp = so->so_nl7c_rcv_mp;
2666*0Sstevel@tonic-gate 
2667*0Sstevel@tonic-gate 	ASSERT(nmp != NULL);
2668*0Sstevel@tonic-gate 
2669*0Sstevel@tonic-gate 	while (nmp != NULL && uiop->uio_resid > 0) {
2670*0Sstevel@tonic-gate 		ssize_t n;
2671*0Sstevel@tonic-gate 
2672*0Sstevel@tonic-gate 		if (DB_TYPE(nmp) == M_DATA) {
2673*0Sstevel@tonic-gate 			/*
2674*0Sstevel@tonic-gate 			 * We have some data, uiomove up to resid bytes.
2675*0Sstevel@tonic-gate 			 */
2676*0Sstevel@tonic-gate 			n = MIN(MBLKL(nmp), uiop->uio_resid);
2677*0Sstevel@tonic-gate 			if (n > 0)
2678*0Sstevel@tonic-gate 				error = uiomove(nmp->b_rptr, n, UIO_READ, uiop);
2679*0Sstevel@tonic-gate 			if (error)
2680*0Sstevel@tonic-gate 				break;
2681*0Sstevel@tonic-gate 			nmp->b_rptr += n;
2682*0Sstevel@tonic-gate 			if (nmp->b_rptr == nmp->b_wptr) {
2683*0Sstevel@tonic-gate 				pmp = nmp;
2684*0Sstevel@tonic-gate 				nmp = nmp->b_cont;
2685*0Sstevel@tonic-gate 			}
2686*0Sstevel@tonic-gate 		} else {
2687*0Sstevel@tonic-gate 			/*
2688*0Sstevel@tonic-gate 			 * We only handle data, save for caller to handle.
2689*0Sstevel@tonic-gate 			 */
2690*0Sstevel@tonic-gate 			if (pmp != NULL) {
2691*0Sstevel@tonic-gate 				pmp->b_cont = nmp->b_cont;
2692*0Sstevel@tonic-gate 			}
2693*0Sstevel@tonic-gate 			nmp->b_cont = NULL;
2694*0Sstevel@tonic-gate 			if (*rmp == NULL) {
2695*0Sstevel@tonic-gate 				*rmp = nmp;
2696*0Sstevel@tonic-gate 			} else {
2697*0Sstevel@tonic-gate 				tmp->b_next = nmp;
2698*0Sstevel@tonic-gate 			}
2699*0Sstevel@tonic-gate 			nmp = nmp->b_cont;
2700*0Sstevel@tonic-gate 			tmp = nmp;
2701*0Sstevel@tonic-gate 		}
2702*0Sstevel@tonic-gate 	}
2703*0Sstevel@tonic-gate 	if (pmp != NULL) {
2704*0Sstevel@tonic-gate 		/* Free any mblk_t(s) which we have consumed */
2705*0Sstevel@tonic-gate 		pmp->b_cont = NULL;
2706*0Sstevel@tonic-gate 		freemsg(so->so_nl7c_rcv_mp);
2707*0Sstevel@tonic-gate 	}
2708*0Sstevel@tonic-gate 	if ((so->so_nl7c_rcv_mp = nmp) == NULL) {
2709*0Sstevel@tonic-gate 		/* Last mblk_t so return the saved rval from kstrgetmsg() */
2710*0Sstevel@tonic-gate 		rp->r_vals = so->so_nl7c_rcv_rval;
2711*0Sstevel@tonic-gate 		so->so_nl7c_rcv_rval = 0;
2712*0Sstevel@tonic-gate 	} else {
2713*0Sstevel@tonic-gate 		/* More mblk_t(s) to process so no rval to return */
2714*0Sstevel@tonic-gate 		rp->r_vals = 0;
2715*0Sstevel@tonic-gate 	}
2716*0Sstevel@tonic-gate 	return (error);
2717*0Sstevel@tonic-gate }
2718*0Sstevel@tonic-gate 
2719*0Sstevel@tonic-gate /*
2720*0Sstevel@tonic-gate  * Receive the next message on the queue.
2721*0Sstevel@tonic-gate  * If msg_controllen is non-zero when called the caller is interested in
2722*0Sstevel@tonic-gate  * any received control info (options).
2723*0Sstevel@tonic-gate  * If msg_namelen is non-zero when called the caller is interested in
2724*0Sstevel@tonic-gate  * any received source address.
2725*0Sstevel@tonic-gate  * The routine returns with msg_control and msg_name pointing to
2726*0Sstevel@tonic-gate  * kmem_alloc'ed memory which the caller has to free.
2727*0Sstevel@tonic-gate  */
2728*0Sstevel@tonic-gate int
2729*0Sstevel@tonic-gate sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
2730*0Sstevel@tonic-gate {
2731*0Sstevel@tonic-gate 	union T_primitives	*tpr;
2732*0Sstevel@tonic-gate 	mblk_t			*mp;
2733*0Sstevel@tonic-gate 	uchar_t			pri;
2734*0Sstevel@tonic-gate 	int			pflag, opflag;
2735*0Sstevel@tonic-gate 	void			*control;
2736*0Sstevel@tonic-gate 	t_uscalar_t		controllen;
2737*0Sstevel@tonic-gate 	t_uscalar_t		namelen;
2738*0Sstevel@tonic-gate 	int			so_state = so->so_state; /* Snapshot */
2739*0Sstevel@tonic-gate 	ssize_t			saved_resid;
2740*0Sstevel@tonic-gate 	int			error;
2741*0Sstevel@tonic-gate 	rval_t			rval;
2742*0Sstevel@tonic-gate 	int			flags;
2743*0Sstevel@tonic-gate 	clock_t			timout;
2744*0Sstevel@tonic-gate 	int			first;
2745*0Sstevel@tonic-gate 
2746*0Sstevel@tonic-gate 	flags = msg->msg_flags;
2747*0Sstevel@tonic-gate 	msg->msg_flags = 0;
2748*0Sstevel@tonic-gate 
2749*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
2750*0Sstevel@tonic-gate 		so, msg, flags,
2751*0Sstevel@tonic-gate 		pr_state(so->so_state, so->so_mode), so->so_error));
2752*0Sstevel@tonic-gate 
2753*0Sstevel@tonic-gate 	/*
2754*0Sstevel@tonic-gate 	 * If we are not connected because we have never been connected
2755*0Sstevel@tonic-gate 	 * we return ENOTCONN. If we have been connected (but are no longer
2756*0Sstevel@tonic-gate 	 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
2757*0Sstevel@tonic-gate 	 * the EOF.
2758*0Sstevel@tonic-gate 	 *
2759*0Sstevel@tonic-gate 	 * An alternative would be to post an ENOTCONN error in stream head
2760*0Sstevel@tonic-gate 	 * (read+write) and clear it when we're connected. However, that error
2761*0Sstevel@tonic-gate 	 * would cause incorrect poll/select behavior!
2762*0Sstevel@tonic-gate 	 */
2763*0Sstevel@tonic-gate 	if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
2764*0Sstevel@tonic-gate 	    (so->so_mode & SM_CONNREQUIRED)) {
2765*0Sstevel@tonic-gate 		return (ENOTCONN);
2766*0Sstevel@tonic-gate 	}
2767*0Sstevel@tonic-gate 
2768*0Sstevel@tonic-gate 	/*
2769*0Sstevel@tonic-gate 	 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
2770*0Sstevel@tonic-gate 	 * after checking that the read queue is empty) and returns zero.
2771*0Sstevel@tonic-gate 	 * This implementation will sleep (in kstrgetmsg) even if uio_resid
2772*0Sstevel@tonic-gate 	 * is zero.
2773*0Sstevel@tonic-gate 	 */
2774*0Sstevel@tonic-gate 
2775*0Sstevel@tonic-gate 	if (flags & MSG_OOB) {
2776*0Sstevel@tonic-gate 		/* Check that the transport supports OOB */
2777*0Sstevel@tonic-gate 		if (!(so->so_mode & SM_EXDATA))
2778*0Sstevel@tonic-gate 			return (EOPNOTSUPP);
2779*0Sstevel@tonic-gate 		return (sorecvoob(so, msg, uiop, flags));
2780*0Sstevel@tonic-gate 	}
2781*0Sstevel@tonic-gate 
2782*0Sstevel@tonic-gate 	/*
2783*0Sstevel@tonic-gate 	 * Set msg_controllen and msg_namelen to zero here to make it
2784*0Sstevel@tonic-gate 	 * simpler in the cases that no control or name is returned.
2785*0Sstevel@tonic-gate 	 */
2786*0Sstevel@tonic-gate 	controllen = msg->msg_controllen;
2787*0Sstevel@tonic-gate 	namelen = msg->msg_namelen;
2788*0Sstevel@tonic-gate 	msg->msg_controllen = 0;
2789*0Sstevel@tonic-gate 	msg->msg_namelen = 0;
2790*0Sstevel@tonic-gate 
2791*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
2792*0Sstevel@tonic-gate 		namelen, controllen));
2793*0Sstevel@tonic-gate 
2794*0Sstevel@tonic-gate 	/*
2795*0Sstevel@tonic-gate 	 * If an NL7C enabled socket and not waiting for write data.
2796*0Sstevel@tonic-gate 	 */
2797*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
2798*0Sstevel@tonic-gate 	if ((so->so_nl7c_flags & (NL7C_ENABLED|NL7C_WAITWRITE)) ==
2799*0Sstevel@tonic-gate 	    NL7C_ENABLED) {
2800*0Sstevel@tonic-gate 		if (so->so_nl7c_uri) {
2801*0Sstevel@tonic-gate 			/*
2802*0Sstevel@tonic-gate 			 * Close uri processing for a previous request.
2803*0Sstevel@tonic-gate 			 */
2804*0Sstevel@tonic-gate 			nl7c_close(so);
2805*0Sstevel@tonic-gate 		}
2806*0Sstevel@tonic-gate 		if (nl7c_process(so,
2807*0Sstevel@tonic-gate 		    (so->so_state & (SS_NONBLOCK|SS_NDELAY)),
2808*0Sstevel@tonic-gate 		    (int)((tcp_t *)so->so_priv)->tcp_mss)) {
2809*0Sstevel@tonic-gate 			/*
2810*0Sstevel@tonic-gate 			 * NL7C has completed processing on the socket,
2811*0Sstevel@tonic-gate 			 * clear the enabled bit as no further NL7C
2812*0Sstevel@tonic-gate 			 * processing will be needed.
2813*0Sstevel@tonic-gate 			 */
2814*0Sstevel@tonic-gate 			so->so_nl7c_flags = 0;
2815*0Sstevel@tonic-gate 		}
2816*0Sstevel@tonic-gate 	}
2817*0Sstevel@tonic-gate 
2818*0Sstevel@tonic-gate 	/*
2819*0Sstevel@tonic-gate 	 * Only one reader is allowed at any given time. This is needed
2820*0Sstevel@tonic-gate 	 * for T_EXDATA handling and, in the future, MSG_WAITALL.
2821*0Sstevel@tonic-gate 	 *
2822*0Sstevel@tonic-gate 	 * This is slightly different that BSD behavior in that it fails with
2823*0Sstevel@tonic-gate 	 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
2824*0Sstevel@tonic-gate 	 * is single-threaded using sblock(), which is dropped while waiting
2825*0Sstevel@tonic-gate 	 * for data to appear. The difference shows up e.g. if one
2826*0Sstevel@tonic-gate 	 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
2827*0Sstevel@tonic-gate 	 * does use nonblocking io and different threads are reading each
2828*0Sstevel@tonic-gate 	 * file descriptor. In BSD there would never be an EWOULDBLOCK error
2829*0Sstevel@tonic-gate 	 * in this case as long as the read queue doesn't get empty.
2830*0Sstevel@tonic-gate 	 * In this implementation the thread using nonblocking io can
2831*0Sstevel@tonic-gate 	 * get an EWOULDBLOCK error due to the blocking thread executing
2832*0Sstevel@tonic-gate 	 * e.g. in the uiomove in kstrgetmsg.
2833*0Sstevel@tonic-gate 	 * This difference is not believed to be significant.
2834*0Sstevel@tonic-gate 	 */
2835*0Sstevel@tonic-gate 	error = so_lock_read_intr(so, uiop->uio_fmode);	/* Set SOREADLOCKED */
2836*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2837*0Sstevel@tonic-gate 	if (error)
2838*0Sstevel@tonic-gate 		return (error);
2839*0Sstevel@tonic-gate 
2840*0Sstevel@tonic-gate 	/*
2841*0Sstevel@tonic-gate 	 * Tell kstrgetmsg to not inspect the stream head errors until all
2842*0Sstevel@tonic-gate 	 * queued data has been consumed.
2843*0Sstevel@tonic-gate 	 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
2844*0Sstevel@tonic-gate 	 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
2845*0Sstevel@tonic-gate 	 *
2846*0Sstevel@tonic-gate 	 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
2847*0Sstevel@tonic-gate 	 * to T_OPTDATA_IND that do not contain any user-visible control msg.
2848*0Sstevel@tonic-gate 	 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
2849*0Sstevel@tonic-gate 	 */
2850*0Sstevel@tonic-gate 	pflag = MSG_ANY | MSG_DELAYERROR;
2851*0Sstevel@tonic-gate 	if (flags & MSG_PEEK) {
2852*0Sstevel@tonic-gate 		pflag |= MSG_IPEEK;
2853*0Sstevel@tonic-gate 		flags &= ~MSG_WAITALL;
2854*0Sstevel@tonic-gate 	}
2855*0Sstevel@tonic-gate 	if (so->so_mode & SM_ATOMIC)
2856*0Sstevel@tonic-gate 		pflag |= MSG_DISCARDTAIL;
2857*0Sstevel@tonic-gate 
2858*0Sstevel@tonic-gate 	if (flags & MSG_DONTWAIT)
2859*0Sstevel@tonic-gate 		timout = 0;
2860*0Sstevel@tonic-gate 	else
2861*0Sstevel@tonic-gate 		timout = -1;
2862*0Sstevel@tonic-gate 	opflag = pflag;
2863*0Sstevel@tonic-gate 	first = 1;
2864*0Sstevel@tonic-gate 
2865*0Sstevel@tonic-gate 	/*
2866*0Sstevel@tonic-gate 	 * If so saved NL7C rcv mblk_t(s) uiomove them first
2867*0Sstevel@tonic-gate 	 * else get'm from the streamhead.
2868*0Sstevel@tonic-gate 	 */
2869*0Sstevel@tonic-gate retry:
2870*0Sstevel@tonic-gate 	saved_resid = uiop->uio_resid;
2871*0Sstevel@tonic-gate 	pri = 0;
2872*0Sstevel@tonic-gate 	mp = NULL;
2873*0Sstevel@tonic-gate 	if (so->so_nl7c_rcv_mp != NULL) {
2874*0Sstevel@tonic-gate 		error = nl7c_sorecv(so, &mp, uiop, &rval);
2875*0Sstevel@tonic-gate 	} else {
2876*0Sstevel@tonic-gate 		error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
2877*0Sstevel@tonic-gate 		    timout, &rval);
2878*0Sstevel@tonic-gate 	}
2879*0Sstevel@tonic-gate 	if (error) {
2880*0Sstevel@tonic-gate 		switch (error) {
2881*0Sstevel@tonic-gate 		case EINTR:
2882*0Sstevel@tonic-gate 		case EWOULDBLOCK:
2883*0Sstevel@tonic-gate 			if (!first)
2884*0Sstevel@tonic-gate 				error = 0;
2885*0Sstevel@tonic-gate 			break;
2886*0Sstevel@tonic-gate 		case ETIME:
2887*0Sstevel@tonic-gate 			/* Returned from kstrgetmsg when timeout expires */
2888*0Sstevel@tonic-gate 			if (!first)
2889*0Sstevel@tonic-gate 				error = 0;
2890*0Sstevel@tonic-gate 			else
2891*0Sstevel@tonic-gate 				error = EWOULDBLOCK;
2892*0Sstevel@tonic-gate 			break;
2893*0Sstevel@tonic-gate 		default:
2894*0Sstevel@tonic-gate 			eprintsoline(so, error);
2895*0Sstevel@tonic-gate 			break;
2896*0Sstevel@tonic-gate 		}
2897*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2898*0Sstevel@tonic-gate 		so_unlock_read(so);	/* Clear SOREADLOCKED */
2899*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2900*0Sstevel@tonic-gate 		return (error);
2901*0Sstevel@tonic-gate 	}
2902*0Sstevel@tonic-gate 	/*
2903*0Sstevel@tonic-gate 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
2904*0Sstevel@tonic-gate 	 * For non-datagrams MOREDATA is used to set MSG_EOR.
2905*0Sstevel@tonic-gate 	 */
2906*0Sstevel@tonic-gate 	ASSERT(!(rval.r_val1 & MORECTL));
2907*0Sstevel@tonic-gate 	if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
2908*0Sstevel@tonic-gate 		msg->msg_flags |= MSG_TRUNC;
2909*0Sstevel@tonic-gate 
2910*0Sstevel@tonic-gate 	if (mp == NULL) {
2911*0Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n"));
2912*0Sstevel@tonic-gate 		/*
2913*0Sstevel@tonic-gate 		 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
2914*0Sstevel@tonic-gate 		 * The draft Posix socket spec states that the mark should
2915*0Sstevel@tonic-gate 		 * not be cleared when peeking. We follow the latter.
2916*0Sstevel@tonic-gate 		 */
2917*0Sstevel@tonic-gate 		if ((so->so_state &
2918*0Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
2919*0Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
2920*0Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
2921*0Sstevel@tonic-gate 			sorecv_update_oobstate(so);
2922*0Sstevel@tonic-gate 		}
2923*0Sstevel@tonic-gate 
2924*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2925*0Sstevel@tonic-gate 		/* Set MSG_EOR based on MOREDATA */
2926*0Sstevel@tonic-gate 		if (!(rval.r_val1 & MOREDATA)) {
2927*0Sstevel@tonic-gate 			if (so->so_state & SS_SAVEDEOR) {
2928*0Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
2929*0Sstevel@tonic-gate 				so->so_state &= ~SS_SAVEDEOR;
2930*0Sstevel@tonic-gate 			}
2931*0Sstevel@tonic-gate 		}
2932*0Sstevel@tonic-gate 		/*
2933*0Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
2934*0Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
2935*0Sstevel@tonic-gate 		 */
2936*0Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
2937*0Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
2938*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
2939*0Sstevel@tonic-gate 			first = 0;
2940*0Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
2941*0Sstevel@tonic-gate 			goto retry;
2942*0Sstevel@tonic-gate 		}
2943*0Sstevel@tonic-gate 		so_unlock_read(so);	/* Clear SOREADLOCKED */
2944*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2945*0Sstevel@tonic-gate 		return (0);
2946*0Sstevel@tonic-gate 	}
2947*0Sstevel@tonic-gate 
2948*0Sstevel@tonic-gate 	/* strsock_proto has already verified length and alignment */
2949*0Sstevel@tonic-gate 	tpr = (union T_primitives *)mp->b_rptr;
2950*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type));
2951*0Sstevel@tonic-gate 
2952*0Sstevel@tonic-gate 	switch (tpr->type) {
2953*0Sstevel@tonic-gate 	case T_DATA_IND: {
2954*0Sstevel@tonic-gate 		if ((so->so_state &
2955*0Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
2956*0Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
2957*0Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
2958*0Sstevel@tonic-gate 			sorecv_update_oobstate(so);
2959*0Sstevel@tonic-gate 		}
2960*0Sstevel@tonic-gate 
2961*0Sstevel@tonic-gate 		/*
2962*0Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
2963*0Sstevel@tonic-gate 		 * MORE_flag and MOREDATA.
2964*0Sstevel@tonic-gate 		 */
2965*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
2966*0Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
2967*0Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
2968*0Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
2969*0Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
2970*0Sstevel@tonic-gate 			else
2971*0Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
2972*0Sstevel@tonic-gate 		}
2973*0Sstevel@tonic-gate 		freemsg(mp);
2974*0Sstevel@tonic-gate 		/*
2975*0Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
2976*0Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
2977*0Sstevel@tonic-gate 		 */
2978*0Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
2979*0Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
2980*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
2981*0Sstevel@tonic-gate 			first = 0;
2982*0Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
2983*0Sstevel@tonic-gate 			goto retry;
2984*0Sstevel@tonic-gate 		}
2985*0Sstevel@tonic-gate 		so_unlock_read(so);	/* Clear SOREADLOCKED */
2986*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
2987*0Sstevel@tonic-gate 		return (0);
2988*0Sstevel@tonic-gate 	}
2989*0Sstevel@tonic-gate 	case T_UNITDATA_IND: {
2990*0Sstevel@tonic-gate 		void *addr;
2991*0Sstevel@tonic-gate 		t_uscalar_t addrlen;
2992*0Sstevel@tonic-gate 		void *abuf;
2993*0Sstevel@tonic-gate 		t_uscalar_t optlen;
2994*0Sstevel@tonic-gate 		void *opt;
2995*0Sstevel@tonic-gate 
2996*0Sstevel@tonic-gate 		if ((so->so_state &
2997*0Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
2998*0Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
2999*0Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
3000*0Sstevel@tonic-gate 			sorecv_update_oobstate(so);
3001*0Sstevel@tonic-gate 		}
3002*0Sstevel@tonic-gate 
3003*0Sstevel@tonic-gate 		if (namelen != 0) {
3004*0Sstevel@tonic-gate 			/* Caller wants source address */
3005*0Sstevel@tonic-gate 			addrlen = tpr->unitdata_ind.SRC_length;
3006*0Sstevel@tonic-gate 			addr = sogetoff(mp,
3007*0Sstevel@tonic-gate 				tpr->unitdata_ind.SRC_offset,
3008*0Sstevel@tonic-gate 				addrlen, 1);
3009*0Sstevel@tonic-gate 			if (addr == NULL) {
3010*0Sstevel@tonic-gate 				freemsg(mp);
3011*0Sstevel@tonic-gate 				error = EPROTO;
3012*0Sstevel@tonic-gate 				eprintsoline(so, error);
3013*0Sstevel@tonic-gate 				goto err;
3014*0Sstevel@tonic-gate 			}
3015*0Sstevel@tonic-gate 			if (so->so_family == AF_UNIX) {
3016*0Sstevel@tonic-gate 				/*
3017*0Sstevel@tonic-gate 				 * Can not use the transport level address.
3018*0Sstevel@tonic-gate 				 * If there is a SO_SRCADDR option carrying
3019*0Sstevel@tonic-gate 				 * the socket level address it will be
3020*0Sstevel@tonic-gate 				 * extracted below.
3021*0Sstevel@tonic-gate 				 */
3022*0Sstevel@tonic-gate 				addr = NULL;
3023*0Sstevel@tonic-gate 				addrlen = 0;
3024*0Sstevel@tonic-gate 			}
3025*0Sstevel@tonic-gate 		}
3026*0Sstevel@tonic-gate 		optlen = tpr->unitdata_ind.OPT_length;
3027*0Sstevel@tonic-gate 		if (optlen != 0) {
3028*0Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
3029*0Sstevel@tonic-gate 
3030*0Sstevel@tonic-gate 			/*
3031*0Sstevel@tonic-gate 			 * Extract any source address option.
3032*0Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
3033*0Sstevel@tonic-gate 			 */
3034*0Sstevel@tonic-gate 			opt = sogetoff(mp,
3035*0Sstevel@tonic-gate 				tpr->unitdata_ind.OPT_offset,
3036*0Sstevel@tonic-gate 				optlen, __TPI_ALIGN_SIZE);
3037*0Sstevel@tonic-gate 
3038*0Sstevel@tonic-gate 			if (opt == NULL) {
3039*0Sstevel@tonic-gate 				freemsg(mp);
3040*0Sstevel@tonic-gate 				error = EPROTO;
3041*0Sstevel@tonic-gate 				eprintsoline(so, error);
3042*0Sstevel@tonic-gate 				goto err;
3043*0Sstevel@tonic-gate 			}
3044*0Sstevel@tonic-gate 			if (so->so_family == AF_UNIX)
3045*0Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
3046*0Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
3047*0Sstevel@tonic-gate 						!(flags & MSG_XPG4_2));
3048*0Sstevel@tonic-gate 			if (controllen != 0)
3049*0Sstevel@tonic-gate 				controllen = ncontrollen;
3050*0Sstevel@tonic-gate 			else if (ncontrollen != 0)
3051*0Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
3052*0Sstevel@tonic-gate 		} else {
3053*0Sstevel@tonic-gate 			controllen = 0;
3054*0Sstevel@tonic-gate 		}
3055*0Sstevel@tonic-gate 
3056*0Sstevel@tonic-gate 		if (namelen != 0) {
3057*0Sstevel@tonic-gate 			/*
3058*0Sstevel@tonic-gate 			 * Return address to caller.
3059*0Sstevel@tonic-gate 			 * Caller handles truncation if length
3060*0Sstevel@tonic-gate 			 * exceeds msg_namelen.
3061*0Sstevel@tonic-gate 			 * NOTE: AF_UNIX NUL termination is ensured by
3062*0Sstevel@tonic-gate 			 * the sender's copyin_name().
3063*0Sstevel@tonic-gate 			 */
3064*0Sstevel@tonic-gate 			abuf = kmem_alloc(addrlen, KM_SLEEP);
3065*0Sstevel@tonic-gate 
3066*0Sstevel@tonic-gate 			bcopy(addr, abuf, addrlen);
3067*0Sstevel@tonic-gate 			msg->msg_name = abuf;
3068*0Sstevel@tonic-gate 			msg->msg_namelen = addrlen;
3069*0Sstevel@tonic-gate 		}
3070*0Sstevel@tonic-gate 
3071*0Sstevel@tonic-gate 		if (controllen != 0) {
3072*0Sstevel@tonic-gate 			/*
3073*0Sstevel@tonic-gate 			 * Return control msg to caller.
3074*0Sstevel@tonic-gate 			 * Caller handles truncation if length
3075*0Sstevel@tonic-gate 			 * exceeds msg_controllen.
3076*0Sstevel@tonic-gate 			 */
3077*0Sstevel@tonic-gate 			control = kmem_alloc(controllen, KM_SLEEP);
3078*0Sstevel@tonic-gate 
3079*0Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
3080*0Sstevel@tonic-gate 					!(flags & MSG_XPG4_2),
3081*0Sstevel@tonic-gate 					control, controllen);
3082*0Sstevel@tonic-gate 			if (error) {
3083*0Sstevel@tonic-gate 				freemsg(mp);
3084*0Sstevel@tonic-gate 				if (msg->msg_namelen != 0)
3085*0Sstevel@tonic-gate 					kmem_free(msg->msg_name,
3086*0Sstevel@tonic-gate 						msg->msg_namelen);
3087*0Sstevel@tonic-gate 				kmem_free(control, controllen);
3088*0Sstevel@tonic-gate 				eprintsoline(so, error);
3089*0Sstevel@tonic-gate 				goto err;
3090*0Sstevel@tonic-gate 			}
3091*0Sstevel@tonic-gate 			msg->msg_control = control;
3092*0Sstevel@tonic-gate 			msg->msg_controllen = controllen;
3093*0Sstevel@tonic-gate 		}
3094*0Sstevel@tonic-gate 
3095*0Sstevel@tonic-gate 		freemsg(mp);
3096*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
3097*0Sstevel@tonic-gate 		so_unlock_read(so);	/* Clear SOREADLOCKED */
3098*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
3099*0Sstevel@tonic-gate 		return (0);
3100*0Sstevel@tonic-gate 	}
3101*0Sstevel@tonic-gate 	case T_OPTDATA_IND: {
3102*0Sstevel@tonic-gate 		struct T_optdata_req *tdr;
3103*0Sstevel@tonic-gate 		void *opt;
3104*0Sstevel@tonic-gate 		t_uscalar_t optlen;
3105*0Sstevel@tonic-gate 
3106*0Sstevel@tonic-gate 		if ((so->so_state &
3107*0Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3108*0Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
3109*0Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
3110*0Sstevel@tonic-gate 			sorecv_update_oobstate(so);
3111*0Sstevel@tonic-gate 		}
3112*0Sstevel@tonic-gate 
3113*0Sstevel@tonic-gate 		tdr = (struct T_optdata_req *)mp->b_rptr;
3114*0Sstevel@tonic-gate 		optlen = tdr->OPT_length;
3115*0Sstevel@tonic-gate 		if (optlen != 0) {
3116*0Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
3117*0Sstevel@tonic-gate 			/*
3118*0Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
3119*0Sstevel@tonic-gate 			 */
3120*0Sstevel@tonic-gate 			opt = sogetoff(mp,
3121*0Sstevel@tonic-gate 					tpr->optdata_ind.OPT_offset,
3122*0Sstevel@tonic-gate 					optlen, __TPI_ALIGN_SIZE);
3123*0Sstevel@tonic-gate 
3124*0Sstevel@tonic-gate 			if (opt == NULL) {
3125*0Sstevel@tonic-gate 				freemsg(mp);
3126*0Sstevel@tonic-gate 				error = EPROTO;
3127*0Sstevel@tonic-gate 				eprintsoline(so, error);
3128*0Sstevel@tonic-gate 				goto err;
3129*0Sstevel@tonic-gate 			}
3130*0Sstevel@tonic-gate 
3131*0Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
3132*0Sstevel@tonic-gate 						!(flags & MSG_XPG4_2));
3133*0Sstevel@tonic-gate 			if (controllen != 0)
3134*0Sstevel@tonic-gate 				controllen = ncontrollen;
3135*0Sstevel@tonic-gate 			else if (ncontrollen != 0)
3136*0Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
3137*0Sstevel@tonic-gate 		} else {
3138*0Sstevel@tonic-gate 			controllen = 0;
3139*0Sstevel@tonic-gate 		}
3140*0Sstevel@tonic-gate 
3141*0Sstevel@tonic-gate 		if (controllen != 0) {
3142*0Sstevel@tonic-gate 			/*
3143*0Sstevel@tonic-gate 			 * Return control msg to caller.
3144*0Sstevel@tonic-gate 			 * Caller handles truncation if length
3145*0Sstevel@tonic-gate 			 * exceeds msg_controllen.
3146*0Sstevel@tonic-gate 			 */
3147*0Sstevel@tonic-gate 			control = kmem_alloc(controllen, KM_SLEEP);
3148*0Sstevel@tonic-gate 
3149*0Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
3150*0Sstevel@tonic-gate 					!(flags & MSG_XPG4_2),
3151*0Sstevel@tonic-gate 					control, controllen);
3152*0Sstevel@tonic-gate 			if (error) {
3153*0Sstevel@tonic-gate 				freemsg(mp);
3154*0Sstevel@tonic-gate 				kmem_free(control, controllen);
3155*0Sstevel@tonic-gate 				eprintsoline(so, error);
3156*0Sstevel@tonic-gate 				goto err;
3157*0Sstevel@tonic-gate 			}
3158*0Sstevel@tonic-gate 			msg->msg_control = control;
3159*0Sstevel@tonic-gate 			msg->msg_controllen = controllen;
3160*0Sstevel@tonic-gate 		}
3161*0Sstevel@tonic-gate 
3162*0Sstevel@tonic-gate 		/*
3163*0Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
3164*0Sstevel@tonic-gate 		 * DATA_flag and MOREDATA.
3165*0Sstevel@tonic-gate 		 */
3166*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
3167*0Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
3168*0Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
3169*0Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
3170*0Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
3171*0Sstevel@tonic-gate 			else
3172*0Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
3173*0Sstevel@tonic-gate 		}
3174*0Sstevel@tonic-gate 		freemsg(mp);
3175*0Sstevel@tonic-gate 		/*
3176*0Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
3177*0Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
3178*0Sstevel@tonic-gate 		 * Not possible to wait if control info was received.
3179*0Sstevel@tonic-gate 		 */
3180*0Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3181*0Sstevel@tonic-gate 		    controllen == 0 &&
3182*0Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3183*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
3184*0Sstevel@tonic-gate 			first = 0;
3185*0Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
3186*0Sstevel@tonic-gate 			goto retry;
3187*0Sstevel@tonic-gate 		}
3188*0Sstevel@tonic-gate 		so_unlock_read(so);	/* Clear SOREADLOCKED */
3189*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
3190*0Sstevel@tonic-gate 		return (0);
3191*0Sstevel@tonic-gate 	}
3192*0Sstevel@tonic-gate 	case T_EXDATA_IND: {
3193*0Sstevel@tonic-gate 		dprintso(so, 1,
3194*0Sstevel@tonic-gate 			("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
3195*0Sstevel@tonic-gate 			"state %s\n",
3196*0Sstevel@tonic-gate 			so->so_oobsigcnt, so->so_oobcnt,
3197*0Sstevel@tonic-gate 			saved_resid - uiop->uio_resid,
3198*0Sstevel@tonic-gate 			pr_state(so->so_state, so->so_mode)));
3199*0Sstevel@tonic-gate 		/*
3200*0Sstevel@tonic-gate 		 * kstrgetmsg handles MSGMARK so there is nothing to
3201*0Sstevel@tonic-gate 		 * inspect in the T_EXDATA_IND.
3202*0Sstevel@tonic-gate 		 * strsock_proto makes the stream head queue the T_EXDATA_IND
3203*0Sstevel@tonic-gate 		 * as a separate message with no M_DATA component. Furthermore,
3204*0Sstevel@tonic-gate 		 * the stream head does not consolidate M_DATA messages onto
3205*0Sstevel@tonic-gate 		 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
3206*0Sstevel@tonic-gate 		 * remains a message by itself. This is needed since MSGMARK
3207*0Sstevel@tonic-gate 		 * marks both the whole message as well as the last byte
3208*0Sstevel@tonic-gate 		 * of the message.
3209*0Sstevel@tonic-gate 		 */
3210*0Sstevel@tonic-gate 		freemsg(mp);
3211*0Sstevel@tonic-gate 		ASSERT(uiop->uio_resid == saved_resid);	/* No data */
3212*0Sstevel@tonic-gate 		if (flags & MSG_PEEK) {
3213*0Sstevel@tonic-gate 			/*
3214*0Sstevel@tonic-gate 			 * Even though we are peeking we consume the
3215*0Sstevel@tonic-gate 			 * T_EXDATA_IND thereby moving the mark information
3216*0Sstevel@tonic-gate 			 * to SS_RCVATMARK. Then the oob code below will
3217*0Sstevel@tonic-gate 			 * retry the peeking kstrgetmsg.
3218*0Sstevel@tonic-gate 			 * Note that the stream head read queue is
3219*0Sstevel@tonic-gate 			 * never flushed without holding SOREADLOCKED
3220*0Sstevel@tonic-gate 			 * thus the T_EXDATA_IND can not disappear
3221*0Sstevel@tonic-gate 			 * underneath us.
3222*0Sstevel@tonic-gate 			 */
3223*0Sstevel@tonic-gate 			dprintso(so, 1,
3224*0Sstevel@tonic-gate 				("sotpi_recvmsg: consume EXDATA_IND "
3225*0Sstevel@tonic-gate 				"counts %d/%d state %s\n",
3226*0Sstevel@tonic-gate 				so->so_oobsigcnt,
3227*0Sstevel@tonic-gate 				so->so_oobcnt,
3228*0Sstevel@tonic-gate 				pr_state(so->so_state, so->so_mode)));
3229*0Sstevel@tonic-gate 
3230*0Sstevel@tonic-gate 			pflag = MSG_ANY | MSG_DELAYERROR;
3231*0Sstevel@tonic-gate 			if (so->so_mode & SM_ATOMIC)
3232*0Sstevel@tonic-gate 				pflag |= MSG_DISCARDTAIL;
3233*0Sstevel@tonic-gate 
3234*0Sstevel@tonic-gate 			pri = 0;
3235*0Sstevel@tonic-gate 			mp = NULL;
3236*0Sstevel@tonic-gate 
3237*0Sstevel@tonic-gate 			error = kstrgetmsg(SOTOV(so), &mp, uiop,
3238*0Sstevel@tonic-gate 				&pri, &pflag, (clock_t)-1, &rval);
3239*0Sstevel@tonic-gate 			ASSERT(uiop->uio_resid == saved_resid);
3240*0Sstevel@tonic-gate 
3241*0Sstevel@tonic-gate 			if (error) {
3242*0Sstevel@tonic-gate #ifdef SOCK_DEBUG
3243*0Sstevel@tonic-gate 				if (error != EWOULDBLOCK && error != EINTR) {
3244*0Sstevel@tonic-gate 					eprintsoline(so, error);
3245*0Sstevel@tonic-gate 				}
3246*0Sstevel@tonic-gate #endif /* SOCK_DEBUG */
3247*0Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
3248*0Sstevel@tonic-gate 				so_unlock_read(so);	/* Clear SOREADLOCKED */
3249*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
3250*0Sstevel@tonic-gate 				return (error);
3251*0Sstevel@tonic-gate 			}
3252*0Sstevel@tonic-gate 			ASSERT(mp);
3253*0Sstevel@tonic-gate 			tpr = (union T_primitives *)mp->b_rptr;
3254*0Sstevel@tonic-gate 			ASSERT(tpr->type == T_EXDATA_IND);
3255*0Sstevel@tonic-gate 			freemsg(mp);
3256*0Sstevel@tonic-gate 		} /* end "if (flags & MSG_PEEK)" */
3257*0Sstevel@tonic-gate 
3258*0Sstevel@tonic-gate 		/*
3259*0Sstevel@tonic-gate 		 * Decrement the number of queued and pending oob.
3260*0Sstevel@tonic-gate 		 *
3261*0Sstevel@tonic-gate 		 * SS_RCVATMARK is cleared when we read past a mark.
3262*0Sstevel@tonic-gate 		 * SS_HAVEOOBDATA is cleared when we've read past the
3263*0Sstevel@tonic-gate 		 * last mark.
3264*0Sstevel@tonic-gate 		 * SS_OOBPEND is cleared if we've read past the last
3265*0Sstevel@tonic-gate 		 * mark and no (new) SIGURG has been posted.
3266*0Sstevel@tonic-gate 		 */
3267*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
3268*0Sstevel@tonic-gate 		ASSERT(so_verify_oobstate(so));
3269*0Sstevel@tonic-gate 		ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
3270*0Sstevel@tonic-gate 		ASSERT(so->so_oobsigcnt > 0);
3271*0Sstevel@tonic-gate 		so->so_oobsigcnt--;
3272*0Sstevel@tonic-gate 		ASSERT(so->so_oobcnt > 0);
3273*0Sstevel@tonic-gate 		so->so_oobcnt--;
3274*0Sstevel@tonic-gate 		/*
3275*0Sstevel@tonic-gate 		 * Since the T_EXDATA_IND has been removed from the stream
3276*0Sstevel@tonic-gate 		 * head, but we have not read data past the mark,
3277*0Sstevel@tonic-gate 		 * sockfs needs to track that the socket is still at the mark.
3278*0Sstevel@tonic-gate 		 *
3279*0Sstevel@tonic-gate 		 * Since no data was received call kstrgetmsg again to wait
3280*0Sstevel@tonic-gate 		 * for data.
3281*0Sstevel@tonic-gate 		 */
3282*0Sstevel@tonic-gate 		so->so_state |= SS_RCVATMARK;
3283*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
3284*0Sstevel@tonic-gate 		dprintso(so, 1,
3285*0Sstevel@tonic-gate 		    ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
3286*0Sstevel@tonic-gate 		    so->so_oobsigcnt, so->so_oobcnt,
3287*0Sstevel@tonic-gate 		    pr_state(so->so_state, so->so_mode)));
3288*0Sstevel@tonic-gate 		pflag = opflag;
3289*0Sstevel@tonic-gate 		goto retry;
3290*0Sstevel@tonic-gate 	}
3291*0Sstevel@tonic-gate 	default:
3292*0Sstevel@tonic-gate 		ASSERT(0);
3293*0Sstevel@tonic-gate 		freemsg(mp);
3294*0Sstevel@tonic-gate 		error = EPROTO;
3295*0Sstevel@tonic-gate 		eprintsoline(so, error);
3296*0Sstevel@tonic-gate 		goto err;
3297*0Sstevel@tonic-gate 	}
3298*0Sstevel@tonic-gate 	/* NOTREACHED */
3299*0Sstevel@tonic-gate err:
3300*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
3301*0Sstevel@tonic-gate 	so_unlock_read(so);	/* Clear SOREADLOCKED */
3302*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
3303*0Sstevel@tonic-gate 	return (error);
3304*0Sstevel@tonic-gate }
3305*0Sstevel@tonic-gate 
3306*0Sstevel@tonic-gate /*
3307*0Sstevel@tonic-gate  * Sending data with options on a datagram socket.
3308*0Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
3309*0Sstevel@tonic-gate  */
3310*0Sstevel@tonic-gate static int
3311*0Sstevel@tonic-gate sosend_dgramcmsg(struct sonode *so,
3312*0Sstevel@tonic-gate 		struct sockaddr *name,
3313*0Sstevel@tonic-gate 		t_uscalar_t namelen,
3314*0Sstevel@tonic-gate 		struct uio *uiop,
3315*0Sstevel@tonic-gate 		void *control,
3316*0Sstevel@tonic-gate 		t_uscalar_t controllen,
3317*0Sstevel@tonic-gate 		int flags)
3318*0Sstevel@tonic-gate {
3319*0Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
3320*0Sstevel@tonic-gate 	mblk_t			*mp;
3321*0Sstevel@tonic-gate 	int			error;
3322*0Sstevel@tonic-gate 	void			*addr;
3323*0Sstevel@tonic-gate 	socklen_t		addrlen;
3324*0Sstevel@tonic-gate 	void			*src;
3325*0Sstevel@tonic-gate 	socklen_t		srclen;
3326*0Sstevel@tonic-gate 	ssize_t			len;
3327*0Sstevel@tonic-gate 	int			size;
3328*0Sstevel@tonic-gate 	struct T_opthdr		toh;
3329*0Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
3330*0Sstevel@tonic-gate 	t_uscalar_t		optlen;
3331*0Sstevel@tonic-gate 	void			*fds;
3332*0Sstevel@tonic-gate 	int			fdlen;
3333*0Sstevel@tonic-gate 
3334*0Sstevel@tonic-gate 	ASSERT(name && namelen);
3335*0Sstevel@tonic-gate 	ASSERT(control && controllen);
3336*0Sstevel@tonic-gate 
3337*0Sstevel@tonic-gate 	len = uiop->uio_resid;
3338*0Sstevel@tonic-gate 	if (len > (ssize_t)so->so_tidu_size) {
3339*0Sstevel@tonic-gate 		return (EMSGSIZE);
3340*0Sstevel@tonic-gate 	}
3341*0Sstevel@tonic-gate 
3342*0Sstevel@tonic-gate 	/*
3343*0Sstevel@tonic-gate 	 * For AF_UNIX the destination address is translated to an internal
3344*0Sstevel@tonic-gate 	 * name and the source address is passed as an option.
3345*0Sstevel@tonic-gate 	 * Also, file descriptors are passed as file pointers in an
3346*0Sstevel@tonic-gate 	 * option.
3347*0Sstevel@tonic-gate 	 */
3348*0Sstevel@tonic-gate 
3349*0Sstevel@tonic-gate 	/*
3350*0Sstevel@tonic-gate 	 * Length and family checks.
3351*0Sstevel@tonic-gate 	 */
3352*0Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
3353*0Sstevel@tonic-gate 	if (error) {
3354*0Sstevel@tonic-gate 		eprintsoline(so, error);
3355*0Sstevel@tonic-gate 		return (error);
3356*0Sstevel@tonic-gate 	}
3357*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
3358*0Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
3359*0Sstevel@tonic-gate 			/*
3360*0Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
3361*0Sstevel@tonic-gate 			 * pass any (transport internal) source address.
3362*0Sstevel@tonic-gate 			 */
3363*0Sstevel@tonic-gate 			addr = name;
3364*0Sstevel@tonic-gate 			addrlen = namelen;
3365*0Sstevel@tonic-gate 			src = NULL;
3366*0Sstevel@tonic-gate 			srclen = 0;
3367*0Sstevel@tonic-gate 		} else {
3368*0Sstevel@tonic-gate 			/*
3369*0Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
3370*0Sstevel@tonic-gate 			 * and translate the remote address.
3371*0Sstevel@tonic-gate 			 *
3372*0Sstevel@tonic-gate 			 * Note that this code does not prevent so_laddr_sa
3373*0Sstevel@tonic-gate 			 * from changing while it is being used. Thus
3374*0Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
3375*0Sstevel@tonic-gate 			 * send the peer might see a partially new and a
3376*0Sstevel@tonic-gate 			 * partially old "from" address.
3377*0Sstevel@tonic-gate 			 */
3378*0Sstevel@tonic-gate 			src = so->so_laddr_sa;
3379*0Sstevel@tonic-gate 			srclen = (t_uscalar_t)so->so_laddr_len;
3380*0Sstevel@tonic-gate 			dprintso(so, 1,
3381*0Sstevel@tonic-gate 			    ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
3382*0Sstevel@tonic-gate 			    srclen, src));
3383*0Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
3384*0Sstevel@tonic-gate 				(flags & MSG_XPG4_2),
3385*0Sstevel@tonic-gate 				&addr, &addrlen);
3386*0Sstevel@tonic-gate 			if (error) {
3387*0Sstevel@tonic-gate 				eprintsoline(so, error);
3388*0Sstevel@tonic-gate 				return (error);
3389*0Sstevel@tonic-gate 			}
3390*0Sstevel@tonic-gate 		}
3391*0Sstevel@tonic-gate 	} else {
3392*0Sstevel@tonic-gate 		addr = name;
3393*0Sstevel@tonic-gate 		addrlen = namelen;
3394*0Sstevel@tonic-gate 		src = NULL;
3395*0Sstevel@tonic-gate 		srclen = 0;
3396*0Sstevel@tonic-gate 	}
3397*0Sstevel@tonic-gate 	optlen = so_optlen(control, controllen,
3398*0Sstevel@tonic-gate 					!(flags & MSG_XPG4_2));
3399*0Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
3400*0Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
3401*0Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
3402*0Sstevel@tonic-gate 	if (srclen != 0)
3403*0Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) +
3404*0Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(srclen));
3405*0Sstevel@tonic-gate 	else
3406*0Sstevel@tonic-gate 		tudr.OPT_length = optlen;
3407*0Sstevel@tonic-gate 	tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
3408*0Sstevel@tonic-gate 				_TPI_ALIGN_TOPT(addrlen));
3409*0Sstevel@tonic-gate 
3410*0Sstevel@tonic-gate 	size = tudr.OPT_offset + tudr.OPT_length;
3411*0Sstevel@tonic-gate 
3412*0Sstevel@tonic-gate 	/*
3413*0Sstevel@tonic-gate 	 * File descriptors only when SM_FDPASSING set.
3414*0Sstevel@tonic-gate 	 */
3415*0Sstevel@tonic-gate 	error = so_getfdopt(control, controllen,
3416*0Sstevel@tonic-gate 			!(flags & MSG_XPG4_2), &fds, &fdlen);
3417*0Sstevel@tonic-gate 	if (error)
3418*0Sstevel@tonic-gate 		return (error);
3419*0Sstevel@tonic-gate 	if (fdlen != -1) {
3420*0Sstevel@tonic-gate 		if (!(so->so_mode & SM_FDPASSING))
3421*0Sstevel@tonic-gate 			return (EOPNOTSUPP);
3422*0Sstevel@tonic-gate 
3423*0Sstevel@tonic-gate 		error = fdbuf_create(fds, fdlen, &fdbuf);
3424*0Sstevel@tonic-gate 		if (error)
3425*0Sstevel@tonic-gate 			return (error);
3426*0Sstevel@tonic-gate 		mp = fdbuf_allocmsg(size, fdbuf);
3427*0Sstevel@tonic-gate 		if (mp == NULL)
3428*0Sstevel@tonic-gate 			fdbuf_free(fdbuf);
3429*0Sstevel@tonic-gate 	} else {
3430*0Sstevel@tonic-gate 		mp = soallocproto(size, _ALLOC_INTR);
3431*0Sstevel@tonic-gate 	}
3432*0Sstevel@tonic-gate 	if (mp == NULL) {
3433*0Sstevel@tonic-gate 		/*
3434*0Sstevel@tonic-gate 		 * Caught a signal waiting for memory.
3435*0Sstevel@tonic-gate 		 * Let send* return EINTR.
3436*0Sstevel@tonic-gate 		 */
3437*0Sstevel@tonic-gate 		return (EINTR);
3438*0Sstevel@tonic-gate 	}
3439*0Sstevel@tonic-gate 	soappendmsg(mp, &tudr, sizeof (tudr));
3440*0Sstevel@tonic-gate 	soappendmsg(mp, addr, addrlen);
3441*0Sstevel@tonic-gate 	mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
3442*0Sstevel@tonic-gate 
3443*0Sstevel@tonic-gate 	if (fdlen != -1) {
3444*0Sstevel@tonic-gate 		ASSERT(fdbuf != NULL);
3445*0Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
3446*0Sstevel@tonic-gate 		toh.name = SO_FILEP;
3447*0Sstevel@tonic-gate 		toh.len = fdbuf->fd_size +
3448*0Sstevel@tonic-gate 				(t_uscalar_t)sizeof (struct T_opthdr);
3449*0Sstevel@tonic-gate 		toh.status = 0;
3450*0Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
3451*0Sstevel@tonic-gate 		soappendmsg(mp, fdbuf, fdbuf->fd_size);
3452*0Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3453*0Sstevel@tonic-gate 	}
3454*0Sstevel@tonic-gate 	if (srclen != 0) {
3455*0Sstevel@tonic-gate 		/*
3456*0Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
3457*0Sstevel@tonic-gate 		 * address option.
3458*0Sstevel@tonic-gate 		 */
3459*0Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
3460*0Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
3461*0Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
3462*0Sstevel@tonic-gate 		toh.status = 0;
3463*0Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
3464*0Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
3465*0Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
3466*0Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3467*0Sstevel@tonic-gate 	}
3468*0Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3469*0Sstevel@tonic-gate 	so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3470*0Sstevel@tonic-gate 	/* At most 3 bytes left in the message */
3471*0Sstevel@tonic-gate 	ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE));
3472*0Sstevel@tonic-gate 	ASSERT(MBLKL(mp) <= (ssize_t)size);
3473*0Sstevel@tonic-gate 
3474*0Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3475*0Sstevel@tonic-gate #ifdef C2_AUDIT
3476*0Sstevel@tonic-gate 	if (audit_active)
3477*0Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
3478*0Sstevel@tonic-gate #endif /* C2_AUDIT */
3479*0Sstevel@tonic-gate 
3480*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
3481*0Sstevel@tonic-gate #ifdef SOCK_DEBUG
3482*0Sstevel@tonic-gate 	if (error) {
3483*0Sstevel@tonic-gate 		eprintsoline(so, error);
3484*0Sstevel@tonic-gate 	}
3485*0Sstevel@tonic-gate #endif /* SOCK_DEBUG */
3486*0Sstevel@tonic-gate 	return (error);
3487*0Sstevel@tonic-gate }
3488*0Sstevel@tonic-gate 
3489*0Sstevel@tonic-gate /*
3490*0Sstevel@tonic-gate  * Sending data with options on a connected stream socket.
3491*0Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
3492*0Sstevel@tonic-gate  */
3493*0Sstevel@tonic-gate static int
3494*0Sstevel@tonic-gate sosend_svccmsg(struct sonode *so,
3495*0Sstevel@tonic-gate 		struct uio *uiop,
3496*0Sstevel@tonic-gate 		int more,
3497*0Sstevel@tonic-gate 		void *control,
3498*0Sstevel@tonic-gate 		t_uscalar_t controllen,
3499*0Sstevel@tonic-gate 		int flags)
3500*0Sstevel@tonic-gate {
3501*0Sstevel@tonic-gate 	struct T_optdata_req	tdr;
3502*0Sstevel@tonic-gate 	mblk_t			*mp;
3503*0Sstevel@tonic-gate 	int			error;
3504*0Sstevel@tonic-gate 	ssize_t			iosize;
3505*0Sstevel@tonic-gate 	int			first = 1;
3506*0Sstevel@tonic-gate 	int			size;
3507*0Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
3508*0Sstevel@tonic-gate 	t_uscalar_t		optlen;
3509*0Sstevel@tonic-gate 	void			*fds;
3510*0Sstevel@tonic-gate 	int			fdlen;
3511*0Sstevel@tonic-gate 	struct T_opthdr		toh;
3512*0Sstevel@tonic-gate 
3513*0Sstevel@tonic-gate 	dprintso(so, 1,
3514*0Sstevel@tonic-gate 		("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
3515*0Sstevel@tonic-gate 
3516*0Sstevel@tonic-gate 	/*
3517*0Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
3518*0Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
3519*0Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
3520*0Sstevel@tonic-gate 	 */
3521*0Sstevel@tonic-gate 
3522*0Sstevel@tonic-gate 	/* Options on connection-oriented only when SM_OPTDATA set. */
3523*0Sstevel@tonic-gate 	if (!(so->so_mode & SM_OPTDATA))
3524*0Sstevel@tonic-gate 		return (EOPNOTSUPP);
3525*0Sstevel@tonic-gate 
3526*0Sstevel@tonic-gate 	do {
3527*0Sstevel@tonic-gate 		/*
3528*0Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
3529*0Sstevel@tonic-gate 		 * message or if the caller passed in "more".
3530*0Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
3531*0Sstevel@tonic-gate 		 */
3532*0Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
3533*0Sstevel@tonic-gate 		iosize = so->so_tidu_size;
3534*0Sstevel@tonic-gate 		if (iosize <= 0)
3535*0Sstevel@tonic-gate 			return (EMSGSIZE);
3536*0Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
3537*0Sstevel@tonic-gate 			tdr.DATA_flag = 1;
3538*0Sstevel@tonic-gate 		} else {
3539*0Sstevel@tonic-gate 			if (more)
3540*0Sstevel@tonic-gate 				tdr.DATA_flag = 1;
3541*0Sstevel@tonic-gate 			else
3542*0Sstevel@tonic-gate 				tdr.DATA_flag = 0;
3543*0Sstevel@tonic-gate 			iosize = uiop->uio_resid;
3544*0Sstevel@tonic-gate 		}
3545*0Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
3546*0Sstevel@tonic-gate 			tdr.DATA_flag, iosize));
3547*0Sstevel@tonic-gate 
3548*0Sstevel@tonic-gate 		optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2));
3549*0Sstevel@tonic-gate 		tdr.OPT_length = optlen;
3550*0Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
3551*0Sstevel@tonic-gate 
3552*0Sstevel@tonic-gate 		size = (int)sizeof (tdr) + optlen;
3553*0Sstevel@tonic-gate 		/*
3554*0Sstevel@tonic-gate 		 * File descriptors only when SM_FDPASSING set.
3555*0Sstevel@tonic-gate 		 */
3556*0Sstevel@tonic-gate 		error = so_getfdopt(control, controllen,
3557*0Sstevel@tonic-gate 				!(flags & MSG_XPG4_2), &fds, &fdlen);
3558*0Sstevel@tonic-gate 		if (error)
3559*0Sstevel@tonic-gate 			return (error);
3560*0Sstevel@tonic-gate 		if (fdlen != -1) {
3561*0Sstevel@tonic-gate 			if (!(so->so_mode & SM_FDPASSING))
3562*0Sstevel@tonic-gate 				return (EOPNOTSUPP);
3563*0Sstevel@tonic-gate 
3564*0Sstevel@tonic-gate 			error = fdbuf_create(fds, fdlen, &fdbuf);
3565*0Sstevel@tonic-gate 			if (error)
3566*0Sstevel@tonic-gate 				return (error);
3567*0Sstevel@tonic-gate 			mp = fdbuf_allocmsg(size, fdbuf);
3568*0Sstevel@tonic-gate 			if (mp == NULL)
3569*0Sstevel@tonic-gate 				fdbuf_free(fdbuf);
3570*0Sstevel@tonic-gate 		} else {
3571*0Sstevel@tonic-gate 			mp = soallocproto(size, _ALLOC_INTR);
3572*0Sstevel@tonic-gate 		}
3573*0Sstevel@tonic-gate 
3574*0Sstevel@tonic-gate 		if (mp == NULL) {
3575*0Sstevel@tonic-gate 			/*
3576*0Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
3577*0Sstevel@tonic-gate 			 * Let send* return EINTR.
3578*0Sstevel@tonic-gate 			 */
3579*0Sstevel@tonic-gate 			if (first)
3580*0Sstevel@tonic-gate 				return (EINTR);
3581*0Sstevel@tonic-gate 			else
3582*0Sstevel@tonic-gate 				return (0);
3583*0Sstevel@tonic-gate 		}
3584*0Sstevel@tonic-gate 		soappendmsg(mp, &tdr, sizeof (tdr));
3585*0Sstevel@tonic-gate 
3586*0Sstevel@tonic-gate 		if (fdlen != -1) {
3587*0Sstevel@tonic-gate 			ASSERT(fdbuf != NULL);
3588*0Sstevel@tonic-gate 			toh.level = SOL_SOCKET;
3589*0Sstevel@tonic-gate 			toh.name = SO_FILEP;
3590*0Sstevel@tonic-gate 			toh.len = fdbuf->fd_size +
3591*0Sstevel@tonic-gate 				(t_uscalar_t)sizeof (struct T_opthdr);
3592*0Sstevel@tonic-gate 			toh.status = 0;
3593*0Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
3594*0Sstevel@tonic-gate 			soappendmsg(mp, fdbuf, fdbuf->fd_size);
3595*0Sstevel@tonic-gate 			ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3596*0Sstevel@tonic-gate 		}
3597*0Sstevel@tonic-gate 		so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3598*0Sstevel@tonic-gate 		/* At most 3 bytes left in the message */
3599*0Sstevel@tonic-gate 		ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE));
3600*0Sstevel@tonic-gate 		ASSERT(MBLKL(mp) <= (ssize_t)size);
3601*0Sstevel@tonic-gate 
3602*0Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3603*0Sstevel@tonic-gate 
3604*0Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
3605*0Sstevel@tonic-gate 					0, MSG_BAND, 0);
3606*0Sstevel@tonic-gate 		if (error) {
3607*0Sstevel@tonic-gate 			if (!first && error == EWOULDBLOCK)
3608*0Sstevel@tonic-gate 				return (0);
3609*0Sstevel@tonic-gate 			eprintsoline(so, error);
3610*0Sstevel@tonic-gate 			return (error);
3611*0Sstevel@tonic-gate 		}
3612*0Sstevel@tonic-gate 		control = NULL;
3613*0Sstevel@tonic-gate 		first = 0;
3614*0Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
3615*0Sstevel@tonic-gate 			/*
3616*0Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
3617*0Sstevel@tonic-gate 			 * some data have been written. This is consistent
3618*0Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
3619*0Sstevel@tonic-gate 			 */
3620*0Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
3621*0Sstevel@tonic-gate 				tsignal(curthread, SIGPIPE);
3622*0Sstevel@tonic-gate 				eprintsoline(so, error);
3623*0Sstevel@tonic-gate 				return (EPIPE);
3624*0Sstevel@tonic-gate 			}
3625*0Sstevel@tonic-gate 			if (so->so_error != 0) {
3626*0Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
3627*0Sstevel@tonic-gate 				error = sogeterr(so);
3628*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
3629*0Sstevel@tonic-gate 				if (error != 0) {
3630*0Sstevel@tonic-gate 					eprintsoline(so, error);
3631*0Sstevel@tonic-gate 					return (error);
3632*0Sstevel@tonic-gate 				}
3633*0Sstevel@tonic-gate 			}
3634*0Sstevel@tonic-gate 		}
3635*0Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
3636*0Sstevel@tonic-gate 	return (0);
3637*0Sstevel@tonic-gate }
3638*0Sstevel@tonic-gate 
3639*0Sstevel@tonic-gate /*
3640*0Sstevel@tonic-gate  * Sending data on a datagram socket.
3641*0Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
3642*0Sstevel@tonic-gate  *
3643*0Sstevel@tonic-gate  * For AF_UNIX the destination address is translated to an internal
3644*0Sstevel@tonic-gate  * name and the source address is passed as an option.
3645*0Sstevel@tonic-gate  */
3646*0Sstevel@tonic-gate int
3647*0Sstevel@tonic-gate sosend_dgram(struct sonode	*so,
3648*0Sstevel@tonic-gate 		struct sockaddr	*name,
3649*0Sstevel@tonic-gate 		socklen_t	namelen,
3650*0Sstevel@tonic-gate 		struct uio	*uiop,
3651*0Sstevel@tonic-gate 		int		flags)
3652*0Sstevel@tonic-gate {
3653*0Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
3654*0Sstevel@tonic-gate 	mblk_t			*mp;
3655*0Sstevel@tonic-gate 	int			error;
3656*0Sstevel@tonic-gate 	void			*addr;
3657*0Sstevel@tonic-gate 	socklen_t		addrlen;
3658*0Sstevel@tonic-gate 	void			*src;
3659*0Sstevel@tonic-gate 	socklen_t		srclen;
3660*0Sstevel@tonic-gate 	ssize_t			len;
3661*0Sstevel@tonic-gate 
3662*0Sstevel@tonic-gate 	ASSERT(name && namelen);
3663*0Sstevel@tonic-gate 
3664*0Sstevel@tonic-gate 	len = uiop->uio_resid;
3665*0Sstevel@tonic-gate 	if (len > so->so_tidu_size) {
3666*0Sstevel@tonic-gate 		error = EMSGSIZE;
3667*0Sstevel@tonic-gate 		goto done;
3668*0Sstevel@tonic-gate 	}
3669*0Sstevel@tonic-gate 
3670*0Sstevel@tonic-gate 	/*
3671*0Sstevel@tonic-gate 	 * Length and family checks.
3672*0Sstevel@tonic-gate 	 */
3673*0Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
3674*0Sstevel@tonic-gate 	if (error) {
3675*0Sstevel@tonic-gate 		eprintsoline(so, error);
3676*0Sstevel@tonic-gate 		goto done;
3677*0Sstevel@tonic-gate 	}
3678*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
3679*0Sstevel@tonic-gate 		if (so->so_state & SS_FADDR_NOXLATE) {
3680*0Sstevel@tonic-gate 			/*
3681*0Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
3682*0Sstevel@tonic-gate 			 * pass any (transport internal) source address.
3683*0Sstevel@tonic-gate 			 */
3684*0Sstevel@tonic-gate 			addr = name;
3685*0Sstevel@tonic-gate 			addrlen = namelen;
3686*0Sstevel@tonic-gate 			src = NULL;
3687*0Sstevel@tonic-gate 			srclen = 0;
3688*0Sstevel@tonic-gate 		} else {
3689*0Sstevel@tonic-gate 			/*
3690*0Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
3691*0Sstevel@tonic-gate 			 * and translate the remote address.
3692*0Sstevel@tonic-gate 			 *
3693*0Sstevel@tonic-gate 			 * Note that this code does not prevent so_laddr_sa
3694*0Sstevel@tonic-gate 			 * from changing while it is being used. Thus
3695*0Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
3696*0Sstevel@tonic-gate 			 * send the peer might see a partially new and a
3697*0Sstevel@tonic-gate 			 * partially old "from" address.
3698*0Sstevel@tonic-gate 			 */
3699*0Sstevel@tonic-gate 			src = so->so_laddr_sa;
3700*0Sstevel@tonic-gate 			srclen = (socklen_t)so->so_laddr_len;
3701*0Sstevel@tonic-gate 			dprintso(so, 1,
3702*0Sstevel@tonic-gate 				("sosend_dgram UNIX: srclen %d, src %p\n",
3703*0Sstevel@tonic-gate 				srclen, src));
3704*0Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
3705*0Sstevel@tonic-gate 				(flags & MSG_XPG4_2),
3706*0Sstevel@tonic-gate 				&addr, &addrlen);
3707*0Sstevel@tonic-gate 			if (error) {
3708*0Sstevel@tonic-gate 				eprintsoline(so, error);
3709*0Sstevel@tonic-gate 				goto done;
3710*0Sstevel@tonic-gate 			}
3711*0Sstevel@tonic-gate 		}
3712*0Sstevel@tonic-gate 	} else {
3713*0Sstevel@tonic-gate 		addr = name;
3714*0Sstevel@tonic-gate 		addrlen = namelen;
3715*0Sstevel@tonic-gate 		src = NULL;
3716*0Sstevel@tonic-gate 		srclen = 0;
3717*0Sstevel@tonic-gate 	}
3718*0Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
3719*0Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
3720*0Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
3721*0Sstevel@tonic-gate 	if (srclen == 0) {
3722*0Sstevel@tonic-gate 		tudr.OPT_length = 0;
3723*0Sstevel@tonic-gate 		tudr.OPT_offset = 0;
3724*0Sstevel@tonic-gate 
3725*0Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
3726*0Sstevel@tonic-gate 		    addr, addrlen, 0, _ALLOC_INTR);
3727*0Sstevel@tonic-gate 		if (mp == NULL) {
3728*0Sstevel@tonic-gate 			/*
3729*0Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
3730*0Sstevel@tonic-gate 			 * Let send* return EINTR.
3731*0Sstevel@tonic-gate 			 */
3732*0Sstevel@tonic-gate 			error = EINTR;
3733*0Sstevel@tonic-gate 			goto done;
3734*0Sstevel@tonic-gate 		}
3735*0Sstevel@tonic-gate 	} else {
3736*0Sstevel@tonic-gate 		/*
3737*0Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
3738*0Sstevel@tonic-gate 		 * address option.
3739*0Sstevel@tonic-gate 		 */
3740*0Sstevel@tonic-gate 		struct T_opthdr toh;
3741*0Sstevel@tonic-gate 		ssize_t size;
3742*0Sstevel@tonic-gate 
3743*0Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(sizeof (toh) +
3744*0Sstevel@tonic-gate 					_TPI_ALIGN_TOPT(srclen));
3745*0Sstevel@tonic-gate 		tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
3746*0Sstevel@tonic-gate 					_TPI_ALIGN_TOPT(addrlen));
3747*0Sstevel@tonic-gate 
3748*0Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
3749*0Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
3750*0Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
3751*0Sstevel@tonic-gate 		toh.status = 0;
3752*0Sstevel@tonic-gate 
3753*0Sstevel@tonic-gate 		size = tudr.OPT_offset + tudr.OPT_length;
3754*0Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
3755*0Sstevel@tonic-gate 		    addr, addrlen, size, _ALLOC_INTR);
3756*0Sstevel@tonic-gate 		if (mp == NULL) {
3757*0Sstevel@tonic-gate 			/*
3758*0Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
3759*0Sstevel@tonic-gate 			 * Let send* return EINTR.
3760*0Sstevel@tonic-gate 			 */
3761*0Sstevel@tonic-gate 			error = EINTR;
3762*0Sstevel@tonic-gate 			goto done;
3763*0Sstevel@tonic-gate 		}
3764*0Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
3765*0Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
3766*0Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
3767*0Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
3768*0Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3769*0Sstevel@tonic-gate 	}
3770*0Sstevel@tonic-gate 
3771*0Sstevel@tonic-gate #ifdef C2_AUDIT
3772*0Sstevel@tonic-gate 	if (audit_active)
3773*0Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
3774*0Sstevel@tonic-gate #endif /* C2_AUDIT */
3775*0Sstevel@tonic-gate 
3776*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
3777*0Sstevel@tonic-gate done:
3778*0Sstevel@tonic-gate #ifdef SOCK_DEBUG
3779*0Sstevel@tonic-gate 	if (error) {
3780*0Sstevel@tonic-gate 		eprintsoline(so, error);
3781*0Sstevel@tonic-gate 	}
3782*0Sstevel@tonic-gate #endif /* SOCK_DEBUG */
3783*0Sstevel@tonic-gate 	return (error);
3784*0Sstevel@tonic-gate }
3785*0Sstevel@tonic-gate 
3786*0Sstevel@tonic-gate /*
3787*0Sstevel@tonic-gate  * Sending data on a connected stream socket.
3788*0Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
3789*0Sstevel@tonic-gate  */
3790*0Sstevel@tonic-gate int
3791*0Sstevel@tonic-gate sosend_svc(struct sonode *so,
3792*0Sstevel@tonic-gate 	struct uio *uiop,
3793*0Sstevel@tonic-gate 	t_scalar_t prim,
3794*0Sstevel@tonic-gate 	int more,
3795*0Sstevel@tonic-gate 	int sflag)
3796*0Sstevel@tonic-gate {
3797*0Sstevel@tonic-gate 	struct T_data_req	tdr;
3798*0Sstevel@tonic-gate 	mblk_t			*mp;
3799*0Sstevel@tonic-gate 	int			error;
3800*0Sstevel@tonic-gate 	ssize_t			iosize;
3801*0Sstevel@tonic-gate 	int			first = 1;
3802*0Sstevel@tonic-gate 
3803*0Sstevel@tonic-gate 	dprintso(so, 1,
3804*0Sstevel@tonic-gate 		("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
3805*0Sstevel@tonic-gate 		so, uiop->uio_resid, prim, sflag));
3806*0Sstevel@tonic-gate 
3807*0Sstevel@tonic-gate 	/*
3808*0Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
3809*0Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
3810*0Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
3811*0Sstevel@tonic-gate 	 */
3812*0Sstevel@tonic-gate 
3813*0Sstevel@tonic-gate 	do {
3814*0Sstevel@tonic-gate 		/*
3815*0Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
3816*0Sstevel@tonic-gate 		 * message or if the caller passed in "more".
3817*0Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
3818*0Sstevel@tonic-gate 		 */
3819*0Sstevel@tonic-gate 		tdr.PRIM_type = prim;
3820*0Sstevel@tonic-gate 		iosize = so->so_tidu_size;
3821*0Sstevel@tonic-gate 		if (iosize <= 0)
3822*0Sstevel@tonic-gate 			return (EMSGSIZE);
3823*0Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
3824*0Sstevel@tonic-gate 			tdr.MORE_flag = 1;
3825*0Sstevel@tonic-gate 		} else {
3826*0Sstevel@tonic-gate 			if (more)
3827*0Sstevel@tonic-gate 				tdr.MORE_flag = 1;
3828*0Sstevel@tonic-gate 			else
3829*0Sstevel@tonic-gate 				tdr.MORE_flag = 0;
3830*0Sstevel@tonic-gate 			iosize = uiop->uio_resid;
3831*0Sstevel@tonic-gate 		}
3832*0Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
3833*0Sstevel@tonic-gate 			prim, tdr.MORE_flag, iosize));
3834*0Sstevel@tonic-gate 		mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR);
3835*0Sstevel@tonic-gate 		if (mp == NULL) {
3836*0Sstevel@tonic-gate 			/*
3837*0Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
3838*0Sstevel@tonic-gate 			 * Let send* return EINTR.
3839*0Sstevel@tonic-gate 			 */
3840*0Sstevel@tonic-gate 			if (first)
3841*0Sstevel@tonic-gate 				return (EINTR);
3842*0Sstevel@tonic-gate 			else
3843*0Sstevel@tonic-gate 				return (0);
3844*0Sstevel@tonic-gate 		}
3845*0Sstevel@tonic-gate 
3846*0Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
3847*0Sstevel@tonic-gate 					0, sflag | MSG_BAND, 0);
3848*0Sstevel@tonic-gate 		if (error) {
3849*0Sstevel@tonic-gate 			if (!first && error == EWOULDBLOCK)
3850*0Sstevel@tonic-gate 				return (0);
3851*0Sstevel@tonic-gate 			eprintsoline(so, error);
3852*0Sstevel@tonic-gate 			return (error);
3853*0Sstevel@tonic-gate 		}
3854*0Sstevel@tonic-gate 		first = 0;
3855*0Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
3856*0Sstevel@tonic-gate 			/*
3857*0Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
3858*0Sstevel@tonic-gate 			 * some data have been written. This is consistent
3859*0Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
3860*0Sstevel@tonic-gate 			 */
3861*0Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
3862*0Sstevel@tonic-gate 				tsignal(curthread, SIGPIPE);
3863*0Sstevel@tonic-gate 				eprintsoline(so, error);
3864*0Sstevel@tonic-gate 				return (EPIPE);
3865*0Sstevel@tonic-gate 			}
3866*0Sstevel@tonic-gate 			if (so->so_error != 0) {
3867*0Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
3868*0Sstevel@tonic-gate 				error = sogeterr(so);
3869*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
3870*0Sstevel@tonic-gate 				if (error != 0) {
3871*0Sstevel@tonic-gate 					eprintsoline(so, error);
3872*0Sstevel@tonic-gate 					return (error);
3873*0Sstevel@tonic-gate 				}
3874*0Sstevel@tonic-gate 			}
3875*0Sstevel@tonic-gate 		}
3876*0Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
3877*0Sstevel@tonic-gate 	return (0);
3878*0Sstevel@tonic-gate }
3879*0Sstevel@tonic-gate 
3880*0Sstevel@tonic-gate /*
3881*0Sstevel@tonic-gate  * Check the state for errors and call the appropriate send function.
3882*0Sstevel@tonic-gate  *
3883*0Sstevel@tonic-gate  * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
3884*0Sstevel@tonic-gate  * this function issues a setsockopt to toggle SO_DONTROUTE before and
3885*0Sstevel@tonic-gate  * after sending the message.
3886*0Sstevel@tonic-gate  */
3887*0Sstevel@tonic-gate static int
3888*0Sstevel@tonic-gate sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
3889*0Sstevel@tonic-gate {
3890*0Sstevel@tonic-gate 	int		so_state;
3891*0Sstevel@tonic-gate 	int		so_mode;
3892*0Sstevel@tonic-gate 	int		error;
3893*0Sstevel@tonic-gate 	struct sockaddr *name;
3894*0Sstevel@tonic-gate 	t_uscalar_t	namelen;
3895*0Sstevel@tonic-gate 	int		dontroute;
3896*0Sstevel@tonic-gate 	int		flags;
3897*0Sstevel@tonic-gate 
3898*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
3899*0Sstevel@tonic-gate 		so, msg, msg->msg_flags,
3900*0Sstevel@tonic-gate 		pr_state(so->so_state, so->so_mode), so->so_error));
3901*0Sstevel@tonic-gate 
3902*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
3903*0Sstevel@tonic-gate 	so_state = so->so_state;
3904*0Sstevel@tonic-gate 
3905*0Sstevel@tonic-gate 	if (so_state & SS_CANTSENDMORE) {
3906*0Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
3907*0Sstevel@tonic-gate 		tsignal(curthread, SIGPIPE);
3908*0Sstevel@tonic-gate 		return (EPIPE);
3909*0Sstevel@tonic-gate 	}
3910*0Sstevel@tonic-gate 
3911*0Sstevel@tonic-gate 	if (so->so_error != 0) {
3912*0Sstevel@tonic-gate 		error = sogeterr(so);
3913*0Sstevel@tonic-gate 		if (error != 0) {
3914*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
3915*0Sstevel@tonic-gate 			return (error);
3916*0Sstevel@tonic-gate 		}
3917*0Sstevel@tonic-gate 	}
3918*0Sstevel@tonic-gate 
3919*0Sstevel@tonic-gate 	name = (struct sockaddr *)msg->msg_name;
3920*0Sstevel@tonic-gate 	namelen = msg->msg_namelen;
3921*0Sstevel@tonic-gate 
3922*0Sstevel@tonic-gate 	so_mode = so->so_mode;
3923*0Sstevel@tonic-gate 
3924*0Sstevel@tonic-gate 	if (name == NULL) {
3925*0Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED)) {
3926*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
3927*0Sstevel@tonic-gate 			if (so_mode & SM_CONNREQUIRED)
3928*0Sstevel@tonic-gate 				return (ENOTCONN);
3929*0Sstevel@tonic-gate 			else
3930*0Sstevel@tonic-gate 				return (EDESTADDRREQ);
3931*0Sstevel@tonic-gate 		}
3932*0Sstevel@tonic-gate 		if (so_mode & SM_CONNREQUIRED) {
3933*0Sstevel@tonic-gate 			name = NULL;
3934*0Sstevel@tonic-gate 			namelen = 0;
3935*0Sstevel@tonic-gate 		} else {
3936*0Sstevel@tonic-gate 			/*
3937*0Sstevel@tonic-gate 			 * Note that this code does not prevent so_faddr_sa
3938*0Sstevel@tonic-gate 			 * from changing while it is being used. Thus
3939*0Sstevel@tonic-gate 			 * if an "unconnect"+connect occurs concurrently with
3940*0Sstevel@tonic-gate 			 * this send the datagram might be delivered to a
3941*0Sstevel@tonic-gate 			 * garbaled address.
3942*0Sstevel@tonic-gate 			 */
3943*0Sstevel@tonic-gate 			ASSERT(so->so_faddr_sa);
3944*0Sstevel@tonic-gate 			name = so->so_faddr_sa;
3945*0Sstevel@tonic-gate 			namelen = (t_uscalar_t)so->so_faddr_len;
3946*0Sstevel@tonic-gate 		}
3947*0Sstevel@tonic-gate 	} else {
3948*0Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED) &&
3949*0Sstevel@tonic-gate 		    (so_mode & SM_CONNREQUIRED)) {
3950*0Sstevel@tonic-gate 			/* Required but not connected */
3951*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
3952*0Sstevel@tonic-gate 			return (ENOTCONN);
3953*0Sstevel@tonic-gate 		}
3954*0Sstevel@tonic-gate 		/*
3955*0Sstevel@tonic-gate 		 * Ignore the address on connection-oriented sockets.
3956*0Sstevel@tonic-gate 		 * Just like BSD this code does not generate an error for
3957*0Sstevel@tonic-gate 		 * TCP (a CONNREQUIRED socket) when sending to an address
3958*0Sstevel@tonic-gate 		 * passed in with sendto/sendmsg. Instead the data is
3959*0Sstevel@tonic-gate 		 * delivered on the connection as if no address had been
3960*0Sstevel@tonic-gate 		 * supplied.
3961*0Sstevel@tonic-gate 		 */
3962*0Sstevel@tonic-gate 		if ((so_state & SS_ISCONNECTED) &&
3963*0Sstevel@tonic-gate 		    !(so_mode & SM_CONNREQUIRED)) {
3964*0Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
3965*0Sstevel@tonic-gate 			return (EISCONN);
3966*0Sstevel@tonic-gate 		}
3967*0Sstevel@tonic-gate 		if (!(so_state & SS_ISBOUND)) {
3968*0Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
3969*0Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
3970*0Sstevel@tonic-gate 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
3971*0Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
3972*0Sstevel@tonic-gate 			if (error) {
3973*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
3974*0Sstevel@tonic-gate 				eprintsoline(so, error);
3975*0Sstevel@tonic-gate 				return (error);
3976*0Sstevel@tonic-gate 			}
3977*0Sstevel@tonic-gate 		}
3978*0Sstevel@tonic-gate 		/*
3979*0Sstevel@tonic-gate 		 * Handle delayed datagram errors. These are only queued
3980*0Sstevel@tonic-gate 		 * when the application sets SO_DGRAM_ERRIND.
3981*0Sstevel@tonic-gate 		 * Return the error if we are sending to the address
3982*0Sstevel@tonic-gate 		 * that was returned in the last T_UDERROR_IND.
3983*0Sstevel@tonic-gate 		 * If sending to some other address discard the delayed
3984*0Sstevel@tonic-gate 		 * error indication.
3985*0Sstevel@tonic-gate 		 */
3986*0Sstevel@tonic-gate 		if (so->so_delayed_error) {
3987*0Sstevel@tonic-gate 			struct T_uderror_ind	*tudi;
3988*0Sstevel@tonic-gate 			void			*addr;
3989*0Sstevel@tonic-gate 			t_uscalar_t		addrlen;
3990*0Sstevel@tonic-gate 			boolean_t		match = B_FALSE;
3991*0Sstevel@tonic-gate 
3992*0Sstevel@tonic-gate 			ASSERT(so->so_eaddr_mp);
3993*0Sstevel@tonic-gate 			error = so->so_delayed_error;
3994*0Sstevel@tonic-gate 			so->so_delayed_error = 0;
3995*0Sstevel@tonic-gate 			tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr;
3996*0Sstevel@tonic-gate 			addrlen = tudi->DEST_length;
3997*0Sstevel@tonic-gate 			addr = sogetoff(so->so_eaddr_mp,
3998*0Sstevel@tonic-gate 					tudi->DEST_offset,
3999*0Sstevel@tonic-gate 					addrlen, 1);
4000*0Sstevel@tonic-gate 			ASSERT(addr);	/* Checked by strsock_proto */
4001*0Sstevel@tonic-gate 			switch (so->so_family) {
4002*0Sstevel@tonic-gate 			case AF_INET: {
4003*0Sstevel@tonic-gate 				/* Compare just IP address and port */
4004*0Sstevel@tonic-gate 				sin_t *sin1 = (sin_t *)name;
4005*0Sstevel@tonic-gate 				sin_t *sin2 = (sin_t *)addr;
4006*0Sstevel@tonic-gate 
4007*0Sstevel@tonic-gate 				if (addrlen == sizeof (sin_t) &&
4008*0Sstevel@tonic-gate 				    namelen == addrlen &&
4009*0Sstevel@tonic-gate 				    sin1->sin_port == sin2->sin_port &&
4010*0Sstevel@tonic-gate 				    sin1->sin_addr.s_addr ==
4011*0Sstevel@tonic-gate 				    sin2->sin_addr.s_addr)
4012*0Sstevel@tonic-gate 					match = B_TRUE;
4013*0Sstevel@tonic-gate 				break;
4014*0Sstevel@tonic-gate 			}
4015*0Sstevel@tonic-gate 			case AF_INET6: {
4016*0Sstevel@tonic-gate 				/* Compare just IP address and port. Not flow */
4017*0Sstevel@tonic-gate 				sin6_t *sin1 = (sin6_t *)name;
4018*0Sstevel@tonic-gate 				sin6_t *sin2 = (sin6_t *)addr;
4019*0Sstevel@tonic-gate 
4020*0Sstevel@tonic-gate 				if (addrlen == sizeof (sin6_t) &&
4021*0Sstevel@tonic-gate 				    namelen == addrlen &&
4022*0Sstevel@tonic-gate 				    sin1->sin6_port == sin2->sin6_port &&
4023*0Sstevel@tonic-gate 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
4024*0Sstevel@tonic-gate 					&sin2->sin6_addr))
4025*0Sstevel@tonic-gate 					match = B_TRUE;
4026*0Sstevel@tonic-gate 				break;
4027*0Sstevel@tonic-gate 			}
4028*0Sstevel@tonic-gate 			case AF_UNIX:
4029*0Sstevel@tonic-gate 			default:
4030*0Sstevel@tonic-gate 				if (namelen == addrlen &&
4031*0Sstevel@tonic-gate 				    bcmp(name, addr, namelen) == 0)
4032*0Sstevel@tonic-gate 					match = B_TRUE;
4033*0Sstevel@tonic-gate 			}
4034*0Sstevel@tonic-gate 			if (match) {
4035*0Sstevel@tonic-gate 				freemsg(so->so_eaddr_mp);
4036*0Sstevel@tonic-gate 				so->so_eaddr_mp = NULL;
4037*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
4038*0Sstevel@tonic-gate #ifdef DEBUG
4039*0Sstevel@tonic-gate 				dprintso(so, 0,
4040*0Sstevel@tonic-gate 					("sockfs delayed error %d for %s\n",
4041*0Sstevel@tonic-gate 					error,
4042*0Sstevel@tonic-gate 					pr_addr(so->so_family, name, namelen)));
4043*0Sstevel@tonic-gate #endif /* DEBUG */
4044*0Sstevel@tonic-gate 				return (error);
4045*0Sstevel@tonic-gate 			}
4046*0Sstevel@tonic-gate 			freemsg(so->so_eaddr_mp);
4047*0Sstevel@tonic-gate 			so->so_eaddr_mp = NULL;
4048*0Sstevel@tonic-gate 		}
4049*0Sstevel@tonic-gate 	}
4050*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4051*0Sstevel@tonic-gate 
4052*0Sstevel@tonic-gate 	flags = msg->msg_flags;
4053*0Sstevel@tonic-gate 	dontroute = 0;
4054*0Sstevel@tonic-gate 	if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) {
4055*0Sstevel@tonic-gate 		uint32_t	val;
4056*0Sstevel@tonic-gate 
4057*0Sstevel@tonic-gate 		val = 1;
4058*0Sstevel@tonic-gate 		error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4059*0Sstevel@tonic-gate 					&val, (t_uscalar_t)sizeof (val));
4060*0Sstevel@tonic-gate 		if (error)
4061*0Sstevel@tonic-gate 			return (error);
4062*0Sstevel@tonic-gate 		dontroute = 1;
4063*0Sstevel@tonic-gate 	}
4064*0Sstevel@tonic-gate 
4065*0Sstevel@tonic-gate 	if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) {
4066*0Sstevel@tonic-gate 		error = EOPNOTSUPP;
4067*0Sstevel@tonic-gate 		goto done;
4068*0Sstevel@tonic-gate 	}
4069*0Sstevel@tonic-gate 	if (msg->msg_controllen != 0) {
4070*0Sstevel@tonic-gate 		if (!(so_mode & SM_CONNREQUIRED)) {
4071*0Sstevel@tonic-gate 			error = sosend_dgramcmsg(so, name, namelen, uiop,
4072*0Sstevel@tonic-gate 				msg->msg_control, msg->msg_controllen,
4073*0Sstevel@tonic-gate 				flags);
4074*0Sstevel@tonic-gate 		} else {
4075*0Sstevel@tonic-gate 			if (flags & MSG_OOB) {
4076*0Sstevel@tonic-gate 				/* Can't generate T_EXDATA_REQ with options */
4077*0Sstevel@tonic-gate 				error = EOPNOTSUPP;
4078*0Sstevel@tonic-gate 				goto done;
4079*0Sstevel@tonic-gate 			}
4080*0Sstevel@tonic-gate 			error = sosend_svccmsg(so, uiop,
4081*0Sstevel@tonic-gate 				!(flags & MSG_EOR),
4082*0Sstevel@tonic-gate 				msg->msg_control, msg->msg_controllen,
4083*0Sstevel@tonic-gate 				flags);
4084*0Sstevel@tonic-gate 		}
4085*0Sstevel@tonic-gate 		goto done;
4086*0Sstevel@tonic-gate 	}
4087*0Sstevel@tonic-gate 
4088*0Sstevel@tonic-gate 	if (!(so_mode & SM_CONNREQUIRED)) {
4089*0Sstevel@tonic-gate 		/*
4090*0Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
4091*0Sstevel@tonic-gate 		 * from sosend_dgram. This can allow tail-call optimizations.
4092*0Sstevel@tonic-gate 		 */
4093*0Sstevel@tonic-gate 		if (!dontroute) {
4094*0Sstevel@tonic-gate 			return (sosend_dgram(so, name, namelen, uiop, flags));
4095*0Sstevel@tonic-gate 		}
4096*0Sstevel@tonic-gate 		error = sosend_dgram(so, name, namelen, uiop, flags);
4097*0Sstevel@tonic-gate 	} else {
4098*0Sstevel@tonic-gate 		t_scalar_t prim;
4099*0Sstevel@tonic-gate 		int sflag;
4100*0Sstevel@tonic-gate 
4101*0Sstevel@tonic-gate 		/* Ignore msg_name in the connected state */
4102*0Sstevel@tonic-gate 		if (flags & MSG_OOB) {
4103*0Sstevel@tonic-gate 			prim = T_EXDATA_REQ;
4104*0Sstevel@tonic-gate 			/*
4105*0Sstevel@tonic-gate 			 * Send down T_EXDATA_REQ even if there is flow
4106*0Sstevel@tonic-gate 			 * control for data.
4107*0Sstevel@tonic-gate 			 */
4108*0Sstevel@tonic-gate 			sflag = MSG_IGNFLOW;
4109*0Sstevel@tonic-gate 		} else {
4110*0Sstevel@tonic-gate 			if (so_mode & SM_BYTESTREAM) {
4111*0Sstevel@tonic-gate 				/* Byte stream transport - use write */
4112*0Sstevel@tonic-gate 
4113*0Sstevel@tonic-gate 				dprintso(so, 1, ("sotpi_sendmsg: write\n"));
4114*0Sstevel@tonic-gate 				/*
4115*0Sstevel@tonic-gate 				 * If there is no SO_DONTROUTE to turn off
4116*0Sstevel@tonic-gate 				 * return immediately from strwrite. This can
4117*0Sstevel@tonic-gate 				 * allow tail-call optimizations.
4118*0Sstevel@tonic-gate 				 */
4119*0Sstevel@tonic-gate 				if (!dontroute)
4120*0Sstevel@tonic-gate 					return (strwrite(SOTOV(so), uiop,
4121*0Sstevel@tonic-gate 							CRED()));
4122*0Sstevel@tonic-gate 				error = strwrite(SOTOV(so), uiop, CRED());
4123*0Sstevel@tonic-gate 				goto done;
4124*0Sstevel@tonic-gate 			}
4125*0Sstevel@tonic-gate 			prim = T_DATA_REQ;
4126*0Sstevel@tonic-gate 			sflag = 0;
4127*0Sstevel@tonic-gate 		}
4128*0Sstevel@tonic-gate 		/*
4129*0Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
4130*0Sstevel@tonic-gate 		 * from sosend_svc. This can allow tail-call optimizations.
4131*0Sstevel@tonic-gate 		 */
4132*0Sstevel@tonic-gate 		if (!dontroute)
4133*0Sstevel@tonic-gate 			return (sosend_svc(so, uiop, prim,
4134*0Sstevel@tonic-gate 				!(flags & MSG_EOR), sflag));
4135*0Sstevel@tonic-gate 		error = sosend_svc(so, uiop, prim,
4136*0Sstevel@tonic-gate 				!(flags & MSG_EOR), sflag);
4137*0Sstevel@tonic-gate 	}
4138*0Sstevel@tonic-gate 	ASSERT(dontroute);
4139*0Sstevel@tonic-gate done:
4140*0Sstevel@tonic-gate 	if (dontroute) {
4141*0Sstevel@tonic-gate 		uint32_t	val;
4142*0Sstevel@tonic-gate 
4143*0Sstevel@tonic-gate 		val = 0;
4144*0Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4145*0Sstevel@tonic-gate 				&val, (t_uscalar_t)sizeof (val));
4146*0Sstevel@tonic-gate 	}
4147*0Sstevel@tonic-gate 	return (error);
4148*0Sstevel@tonic-gate }
4149*0Sstevel@tonic-gate 
4150*0Sstevel@tonic-gate /*
4151*0Sstevel@tonic-gate  * Update so_faddr by asking the transport (unless AF_UNIX).
4152*0Sstevel@tonic-gate  */
4153*0Sstevel@tonic-gate int
4154*0Sstevel@tonic-gate sotpi_getpeername(struct sonode *so)
4155*0Sstevel@tonic-gate {
4156*0Sstevel@tonic-gate 	struct strbuf	strbuf;
4157*0Sstevel@tonic-gate 	int		error = 0, res;
4158*0Sstevel@tonic-gate 	void		*addr;
4159*0Sstevel@tonic-gate 	t_uscalar_t	addrlen;
4160*0Sstevel@tonic-gate 	k_sigset_t	smask;
4161*0Sstevel@tonic-gate 
4162*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
4163*0Sstevel@tonic-gate 		so, pr_state(so->so_state, so->so_mode)));
4164*0Sstevel@tonic-gate 
4165*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4166*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
4167*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
4168*0Sstevel@tonic-gate 		error = ENOTCONN;
4169*0Sstevel@tonic-gate 		goto done;
4170*0Sstevel@tonic-gate 	}
4171*0Sstevel@tonic-gate 	/* Added this check for X/Open */
4172*0Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
4173*0Sstevel@tonic-gate 		error = EINVAL;
4174*0Sstevel@tonic-gate 		if (xnet_check_print) {
4175*0Sstevel@tonic-gate 			printf("sockfs: X/Open getpeername check => EINVAL\n");
4176*0Sstevel@tonic-gate 		}
4177*0Sstevel@tonic-gate 		goto done;
4178*0Sstevel@tonic-gate 	}
4179*0Sstevel@tonic-gate #ifdef DEBUG
4180*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
4181*0Sstevel@tonic-gate 		pr_addr(so->so_family, so->so_faddr_sa,
4182*0Sstevel@tonic-gate 			(t_uscalar_t)so->so_faddr_len)));
4183*0Sstevel@tonic-gate #endif /* DEBUG */
4184*0Sstevel@tonic-gate 
4185*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX || so->so_family == AF_NCA) {
4186*0Sstevel@tonic-gate 		/* Transport has different name space - return local info */
4187*0Sstevel@tonic-gate 		error = 0;
4188*0Sstevel@tonic-gate 		goto done;
4189*0Sstevel@tonic-gate 	}
4190*0Sstevel@tonic-gate 
4191*0Sstevel@tonic-gate 	ASSERT(so->so_faddr_sa);
4192*0Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
4193*0Sstevel@tonic-gate 	addrlen = (t_uscalar_t)so->so_faddr_maxlen;
4194*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4195*0Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
4196*0Sstevel@tonic-gate 
4197*0Sstevel@tonic-gate 	/*
4198*0Sstevel@tonic-gate 	 * Issue TI_GETPEERNAME with signals masked.
4199*0Sstevel@tonic-gate 	 * Put the result in so_faddr_sa so that getpeername works after
4200*0Sstevel@tonic-gate 	 * a shutdown(output).
4201*0Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
4202*0Sstevel@tonic-gate 	 * back to the socket.
4203*0Sstevel@tonic-gate 	 */
4204*0Sstevel@tonic-gate 	strbuf.buf = addr;
4205*0Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
4206*0Sstevel@tonic-gate 	strbuf.len = 0;
4207*0Sstevel@tonic-gate 
4208*0Sstevel@tonic-gate 	sigintr(&smask, 0);
4209*0Sstevel@tonic-gate 	res = 0;
4210*0Sstevel@tonic-gate 	ASSERT(CRED());
4211*0Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
4212*0Sstevel@tonic-gate 			0, K_TO_K, CRED(), &res);
4213*0Sstevel@tonic-gate 	sigunintr(&smask);
4214*0Sstevel@tonic-gate 
4215*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4216*0Sstevel@tonic-gate 	/*
4217*0Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
4218*0Sstevel@tonic-gate 	 * the getpeername. Instead fallback on the recorded
4219*0Sstevel@tonic-gate 	 * so->so_faddr_sa.
4220*0Sstevel@tonic-gate 	 */
4221*0Sstevel@tonic-gate 	if (error) {
4222*0Sstevel@tonic-gate 		/*
4223*0Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
4224*0Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
4225*0Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
4226*0Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
4227*0Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
4228*0Sstevel@tonic-gate 		 * using soseterror().
4229*0Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
4230*0Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
4231*0Sstevel@tonic-gate 		 * socket:
4232*0Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
4233*0Sstevel@tonic-gate 		 *		getpeername was called.
4234*0Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
4235*0Sstevel@tonic-gate 		 *		side error over the read side error.
4236*0Sstevel@tonic-gate 		 *	ENOTCONN The transport just got disconnected but
4237*0Sstevel@tonic-gate 		 *		sockfs had not yet seen the T_DISCON_IND
4238*0Sstevel@tonic-gate 		 *		when issuing the ioctl.
4239*0Sstevel@tonic-gate 		 */
4240*0Sstevel@tonic-gate 		error = 0;
4241*0Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
4242*0Sstevel@tonic-gate 	    (so->so_state & SS_ISCONNECTED)) {
4243*0Sstevel@tonic-gate 		ASSERT(strbuf.len <= (int)so->so_faddr_maxlen);
4244*0Sstevel@tonic-gate 		so->so_faddr_len = (socklen_t)strbuf.len;
4245*0Sstevel@tonic-gate 		bcopy(addr, so->so_faddr_sa, so->so_faddr_len);
4246*0Sstevel@tonic-gate 		so->so_state |= SS_FADDR_VALID;
4247*0Sstevel@tonic-gate 	}
4248*0Sstevel@tonic-gate 	kmem_free(addr, addrlen);
4249*0Sstevel@tonic-gate #ifdef DEBUG
4250*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
4251*0Sstevel@tonic-gate 			pr_addr(so->so_family, so->so_faddr_sa,
4252*0Sstevel@tonic-gate 				(t_uscalar_t)so->so_faddr_len)));
4253*0Sstevel@tonic-gate #endif /* DEBUG */
4254*0Sstevel@tonic-gate done:
4255*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
4256*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4257*0Sstevel@tonic-gate 	return (error);
4258*0Sstevel@tonic-gate }
4259*0Sstevel@tonic-gate 
4260*0Sstevel@tonic-gate /*
4261*0Sstevel@tonic-gate  * Update so_laddr by asking the transport (unless AF_UNIX).
4262*0Sstevel@tonic-gate  */
4263*0Sstevel@tonic-gate int
4264*0Sstevel@tonic-gate sotpi_getsockname(struct sonode *so)
4265*0Sstevel@tonic-gate {
4266*0Sstevel@tonic-gate 	struct strbuf	strbuf;
4267*0Sstevel@tonic-gate 	int		error = 0, res;
4268*0Sstevel@tonic-gate 	void		*addr;
4269*0Sstevel@tonic-gate 	t_uscalar_t	addrlen;
4270*0Sstevel@tonic-gate 	k_sigset_t	smask;
4271*0Sstevel@tonic-gate 
4272*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
4273*0Sstevel@tonic-gate 		so, pr_state(so->so_state, so->so_mode)));
4274*0Sstevel@tonic-gate 
4275*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4276*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
4277*0Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) {
4278*0Sstevel@tonic-gate 		/* Return an all zero address except for the family */
4279*0Sstevel@tonic-gate 		if (so->so_family == AF_INET)
4280*0Sstevel@tonic-gate 			so->so_laddr_len = (socklen_t)sizeof (sin_t);
4281*0Sstevel@tonic-gate 		else if (so->so_family == AF_INET6)
4282*0Sstevel@tonic-gate 			so->so_laddr_len = (socklen_t)sizeof (sin6_t);
4283*0Sstevel@tonic-gate 		ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
4284*0Sstevel@tonic-gate 		bzero(so->so_laddr_sa, so->so_laddr_len);
4285*0Sstevel@tonic-gate 		/*
4286*0Sstevel@tonic-gate 		 * Can not assume there is a sa_family for all
4287*0Sstevel@tonic-gate 		 * protocol families.
4288*0Sstevel@tonic-gate 		 */
4289*0Sstevel@tonic-gate 		if (so->so_family == AF_INET || so->so_family == AF_INET6)
4290*0Sstevel@tonic-gate 			so->so_laddr_sa->sa_family = so->so_family;
4291*0Sstevel@tonic-gate 	}
4292*0Sstevel@tonic-gate #ifdef DEBUG
4293*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
4294*0Sstevel@tonic-gate 		pr_addr(so->so_family, so->so_laddr_sa,
4295*0Sstevel@tonic-gate 			(t_uscalar_t)so->so_laddr_len)));
4296*0Sstevel@tonic-gate #endif /* DEBUG */
4297*0Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
4298*0Sstevel@tonic-gate 		/* Transport has different name space - return local info */
4299*0Sstevel@tonic-gate 		error = 0;
4300*0Sstevel@tonic-gate 		goto done;
4301*0Sstevel@tonic-gate 	}
4302*0Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
4303*0Sstevel@tonic-gate 	addrlen = (t_uscalar_t)so->so_laddr_maxlen;
4304*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4305*0Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
4306*0Sstevel@tonic-gate 
4307*0Sstevel@tonic-gate 	/*
4308*0Sstevel@tonic-gate 	 * Issue TI_GETMYNAME with signals masked.
4309*0Sstevel@tonic-gate 	 * Put the result in so_laddr_sa so that getsockname works after
4310*0Sstevel@tonic-gate 	 * a shutdown(output).
4311*0Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
4312*0Sstevel@tonic-gate 	 * back to the socket.
4313*0Sstevel@tonic-gate 	 */
4314*0Sstevel@tonic-gate 	strbuf.buf = addr;
4315*0Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
4316*0Sstevel@tonic-gate 	strbuf.len = 0;
4317*0Sstevel@tonic-gate 
4318*0Sstevel@tonic-gate 	sigintr(&smask, 0);
4319*0Sstevel@tonic-gate 	res = 0;
4320*0Sstevel@tonic-gate 	ASSERT(CRED());
4321*0Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
4322*0Sstevel@tonic-gate 			0, K_TO_K, CRED(), &res);
4323*0Sstevel@tonic-gate 	sigunintr(&smask);
4324*0Sstevel@tonic-gate 
4325*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4326*0Sstevel@tonic-gate 	/*
4327*0Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
4328*0Sstevel@tonic-gate 	 * the getsockname. Instead fallback on the recorded
4329*0Sstevel@tonic-gate 	 * so->so_laddr_sa.
4330*0Sstevel@tonic-gate 	 */
4331*0Sstevel@tonic-gate 	if (error) {
4332*0Sstevel@tonic-gate 		/*
4333*0Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
4334*0Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
4335*0Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
4336*0Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
4337*0Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
4338*0Sstevel@tonic-gate 		 * using soseterror().
4339*0Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
4340*0Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
4341*0Sstevel@tonic-gate 		 * socket:
4342*0Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
4343*0Sstevel@tonic-gate 		 *		getsockname was called.
4344*0Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
4345*0Sstevel@tonic-gate 		 *		side error over the read side error.
4346*0Sstevel@tonic-gate 		 */
4347*0Sstevel@tonic-gate 		error = 0;
4348*0Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
4349*0Sstevel@tonic-gate 	    (so->so_state & SS_ISBOUND)) {
4350*0Sstevel@tonic-gate 		ASSERT(strbuf.len <= (int)so->so_laddr_maxlen);
4351*0Sstevel@tonic-gate 		so->so_laddr_len = (socklen_t)strbuf.len;
4352*0Sstevel@tonic-gate 		bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
4353*0Sstevel@tonic-gate 		so->so_state |= SS_LADDR_VALID;
4354*0Sstevel@tonic-gate 	}
4355*0Sstevel@tonic-gate 	kmem_free(addr, addrlen);
4356*0Sstevel@tonic-gate #ifdef DEBUG
4357*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
4358*0Sstevel@tonic-gate 			pr_addr(so->so_family, so->so_laddr_sa,
4359*0Sstevel@tonic-gate 				(t_uscalar_t)so->so_laddr_len)));
4360*0Sstevel@tonic-gate #endif /* DEBUG */
4361*0Sstevel@tonic-gate done:
4362*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
4363*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4364*0Sstevel@tonic-gate 	return (error);
4365*0Sstevel@tonic-gate }
4366*0Sstevel@tonic-gate 
4367*0Sstevel@tonic-gate /*
4368*0Sstevel@tonic-gate  * Get socket options. For SOL_SOCKET options some options are handled
4369*0Sstevel@tonic-gate  * by the sockfs while others use the value recorded in the sonode as a
4370*0Sstevel@tonic-gate  * fallback should the T_SVR4_OPTMGMT_REQ fail.
4371*0Sstevel@tonic-gate  *
4372*0Sstevel@tonic-gate  * On the return most *optlenp bytes are copied to optval.
4373*0Sstevel@tonic-gate  */
4374*0Sstevel@tonic-gate int
4375*0Sstevel@tonic-gate sotpi_getsockopt(struct sonode *so, int level, int option_name,
4376*0Sstevel@tonic-gate 		void *optval, socklen_t *optlenp, int flags)
4377*0Sstevel@tonic-gate {
4378*0Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
4379*0Sstevel@tonic-gate 	struct T_optmgmt_ack	*optmgmt_ack;
4380*0Sstevel@tonic-gate 	struct opthdr		oh;
4381*0Sstevel@tonic-gate 	struct opthdr		*opt_res;
4382*0Sstevel@tonic-gate 	mblk_t			*mp = NULL;
4383*0Sstevel@tonic-gate 	int			error = 0;
4384*0Sstevel@tonic-gate 	void			*option = NULL;	/* Set if fallback value */
4385*0Sstevel@tonic-gate 	t_uscalar_t		maxlen = *optlenp;
4386*0Sstevel@tonic-gate 	t_uscalar_t		len;
4387*0Sstevel@tonic-gate 	uint32_t		value;
4388*0Sstevel@tonic-gate 
4389*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
4390*0Sstevel@tonic-gate 			so, level, option_name, optval, optlenp,
4391*0Sstevel@tonic-gate 			pr_state(so->so_state, so->so_mode)));
4392*0Sstevel@tonic-gate 
4393*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4394*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
4395*0Sstevel@tonic-gate 
4396*0Sstevel@tonic-gate 	/*
4397*0Sstevel@tonic-gate 	 * Check for SOL_SOCKET options.
4398*0Sstevel@tonic-gate 	 * Certain SOL_SOCKET options are returned directly whereas
4399*0Sstevel@tonic-gate 	 * others only provide a default (fallback) value should
4400*0Sstevel@tonic-gate 	 * the T_SVR4_OPTMGMT_REQ fail.
4401*0Sstevel@tonic-gate 	 */
4402*0Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
4403*0Sstevel@tonic-gate 		/* Check parameters */
4404*0Sstevel@tonic-gate 		switch (option_name) {
4405*0Sstevel@tonic-gate 		case SO_TYPE:
4406*0Sstevel@tonic-gate 		case SO_ERROR:
4407*0Sstevel@tonic-gate 		case SO_DEBUG:
4408*0Sstevel@tonic-gate 		case SO_ACCEPTCONN:
4409*0Sstevel@tonic-gate 		case SO_REUSEADDR:
4410*0Sstevel@tonic-gate 		case SO_KEEPALIVE:
4411*0Sstevel@tonic-gate 		case SO_DONTROUTE:
4412*0Sstevel@tonic-gate 		case SO_BROADCAST:
4413*0Sstevel@tonic-gate 		case SO_USELOOPBACK:
4414*0Sstevel@tonic-gate 		case SO_OOBINLINE:
4415*0Sstevel@tonic-gate 		case SO_SNDBUF:
4416*0Sstevel@tonic-gate 		case SO_RCVBUF:
4417*0Sstevel@tonic-gate #ifdef notyet
4418*0Sstevel@tonic-gate 		case SO_SNDLOWAT:
4419*0Sstevel@tonic-gate 		case SO_RCVLOWAT:
4420*0Sstevel@tonic-gate 		case SO_SNDTIMEO:
4421*0Sstevel@tonic-gate 		case SO_RCVTIMEO:
4422*0Sstevel@tonic-gate #endif /* notyet */
4423*0Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
4424*0Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
4425*0Sstevel@tonic-gate 				error = EINVAL;
4426*0Sstevel@tonic-gate 				eprintsoline(so, error);
4427*0Sstevel@tonic-gate 				goto done2;
4428*0Sstevel@tonic-gate 			}
4429*0Sstevel@tonic-gate 			break;
4430*0Sstevel@tonic-gate 		case SO_LINGER:
4431*0Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
4432*0Sstevel@tonic-gate 				error = EINVAL;
4433*0Sstevel@tonic-gate 				eprintsoline(so, error);
4434*0Sstevel@tonic-gate 				goto done2;
4435*0Sstevel@tonic-gate 			}
4436*0Sstevel@tonic-gate 			break;
4437*0Sstevel@tonic-gate 		}
4438*0Sstevel@tonic-gate 
4439*0Sstevel@tonic-gate 		len = (t_uscalar_t)sizeof (uint32_t);	/* Default */
4440*0Sstevel@tonic-gate 
4441*0Sstevel@tonic-gate 		switch (option_name) {
4442*0Sstevel@tonic-gate 		case SO_TYPE:
4443*0Sstevel@tonic-gate 			value = so->so_type;
4444*0Sstevel@tonic-gate 			option = &value;
4445*0Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
4446*0Sstevel@tonic-gate 
4447*0Sstevel@tonic-gate 		case SO_ERROR:
4448*0Sstevel@tonic-gate 			value = sogeterr(so);
4449*0Sstevel@tonic-gate 			option = &value;
4450*0Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
4451*0Sstevel@tonic-gate 
4452*0Sstevel@tonic-gate 		case SO_ACCEPTCONN:
4453*0Sstevel@tonic-gate 			if (so->so_state & SS_ACCEPTCONN)
4454*0Sstevel@tonic-gate 				value = SO_ACCEPTCONN;
4455*0Sstevel@tonic-gate 			else
4456*0Sstevel@tonic-gate 				value = 0;
4457*0Sstevel@tonic-gate #ifdef DEBUG
4458*0Sstevel@tonic-gate 			if (value) {
4459*0Sstevel@tonic-gate 				dprintso(so, 1,
4460*0Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
4461*0Sstevel@tonic-gate 				    option_name));
4462*0Sstevel@tonic-gate 			} else {
4463*0Sstevel@tonic-gate 				dprintso(so, 1,
4464*0Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
4465*0Sstevel@tonic-gate 				    option_name));
4466*0Sstevel@tonic-gate 			}
4467*0Sstevel@tonic-gate #endif /* DEBUG */
4468*0Sstevel@tonic-gate 			option = &value;
4469*0Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
4470*0Sstevel@tonic-gate 
4471*0Sstevel@tonic-gate 		case SO_DEBUG:
4472*0Sstevel@tonic-gate 		case SO_REUSEADDR:
4473*0Sstevel@tonic-gate 		case SO_KEEPALIVE:
4474*0Sstevel@tonic-gate 		case SO_DONTROUTE:
4475*0Sstevel@tonic-gate 		case SO_BROADCAST:
4476*0Sstevel@tonic-gate 		case SO_USELOOPBACK:
4477*0Sstevel@tonic-gate 		case SO_OOBINLINE:
4478*0Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
4479*0Sstevel@tonic-gate 			value = (so->so_options & option_name);
4480*0Sstevel@tonic-gate #ifdef DEBUG
4481*0Sstevel@tonic-gate 			if (value) {
4482*0Sstevel@tonic-gate 				dprintso(so, 1,
4483*0Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
4484*0Sstevel@tonic-gate 				    option_name));
4485*0Sstevel@tonic-gate 			} else {
4486*0Sstevel@tonic-gate 				dprintso(so, 1,
4487*0Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
4488*0Sstevel@tonic-gate 				    option_name));
4489*0Sstevel@tonic-gate 			}
4490*0Sstevel@tonic-gate #endif /* DEBUG */
4491*0Sstevel@tonic-gate 			option = &value;
4492*0Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
4493*0Sstevel@tonic-gate 
4494*0Sstevel@tonic-gate 		/*
4495*0Sstevel@tonic-gate 		 * The following options are only returned by sockfs when the
4496*0Sstevel@tonic-gate 		 * T_SVR4_OPTMGMT_REQ fails.
4497*0Sstevel@tonic-gate 		 */
4498*0Sstevel@tonic-gate 		case SO_LINGER:
4499*0Sstevel@tonic-gate 			option = &so->so_linger;
4500*0Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (struct linger);
4501*0Sstevel@tonic-gate 			break;
4502*0Sstevel@tonic-gate 		case SO_SNDBUF: {
4503*0Sstevel@tonic-gate 			ssize_t lvalue;
4504*0Sstevel@tonic-gate 
4505*0Sstevel@tonic-gate 			/*
4506*0Sstevel@tonic-gate 			 * If the option has not been set then get a default
4507*0Sstevel@tonic-gate 			 * value from the read queue. This value is
4508*0Sstevel@tonic-gate 			 * returned if the transport fails
4509*0Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
4510*0Sstevel@tonic-gate 			 */
4511*0Sstevel@tonic-gate 			lvalue = so->so_sndbuf;
4512*0Sstevel@tonic-gate 			if (lvalue == 0) {
4513*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
4514*0Sstevel@tonic-gate 				(void) strqget(strvp2wq(SOTOV(so))->q_next,
4515*0Sstevel@tonic-gate 						QHIWAT, 0, &lvalue);
4516*0Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
4517*0Sstevel@tonic-gate 				dprintso(so, 1,
4518*0Sstevel@tonic-gate 				    ("got SO_SNDBUF %ld from q\n", lvalue));
4519*0Sstevel@tonic-gate 			}
4520*0Sstevel@tonic-gate 			value = (int)lvalue;
4521*0Sstevel@tonic-gate 			option = &value;
4522*0Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_sndbuf);
4523*0Sstevel@tonic-gate 			break;
4524*0Sstevel@tonic-gate 		}
4525*0Sstevel@tonic-gate 		case SO_RCVBUF: {
4526*0Sstevel@tonic-gate 			ssize_t lvalue;
4527*0Sstevel@tonic-gate 
4528*0Sstevel@tonic-gate 			/*
4529*0Sstevel@tonic-gate 			 * If the option has not been set then get a default
4530*0Sstevel@tonic-gate 			 * value from the read queue. This value is
4531*0Sstevel@tonic-gate 			 * returned if the transport fails
4532*0Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
4533*0Sstevel@tonic-gate 			 *
4534*0Sstevel@tonic-gate 			 * XXX If SO_RCVBUF has been set and this is an
4535*0Sstevel@tonic-gate 			 * XPG 4.2 application then do not ask the transport
4536*0Sstevel@tonic-gate 			 * since the transport might adjust the value and not
4537*0Sstevel@tonic-gate 			 * return exactly what was set by the application.
4538*0Sstevel@tonic-gate 			 * For non-XPG 4.2 application we return the value
4539*0Sstevel@tonic-gate 			 * that the transport is actually using.
4540*0Sstevel@tonic-gate 			 */
4541*0Sstevel@tonic-gate 			lvalue = so->so_rcvbuf;
4542*0Sstevel@tonic-gate 			if (lvalue == 0) {
4543*0Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
4544*0Sstevel@tonic-gate 				(void) strqget(RD(strvp2wq(SOTOV(so))),
4545*0Sstevel@tonic-gate 						QHIWAT, 0, &lvalue);
4546*0Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
4547*0Sstevel@tonic-gate 				dprintso(so, 1,
4548*0Sstevel@tonic-gate 				    ("got SO_RCVBUF %ld from q\n", lvalue));
4549*0Sstevel@tonic-gate 			} else if (flags & _SOGETSOCKOPT_XPG4_2) {
4550*0Sstevel@tonic-gate 				value = (int)lvalue;
4551*0Sstevel@tonic-gate 				option = &value;
4552*0Sstevel@tonic-gate 				goto copyout;	/* skip asking transport */
4553*0Sstevel@tonic-gate 			}
4554*0Sstevel@tonic-gate 			value = (int)lvalue;
4555*0Sstevel@tonic-gate 			option = &value;
4556*0Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_rcvbuf);
4557*0Sstevel@tonic-gate 			break;
4558*0Sstevel@tonic-gate 		}
4559*0Sstevel@tonic-gate #ifdef notyet
4560*0Sstevel@tonic-gate 		/*
4561*0Sstevel@tonic-gate 		 * We do not implement the semantics of these options
4562*0Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
4563*0Sstevel@tonic-gate 		 */
4564*0Sstevel@tonic-gate 		case SO_SNDLOWAT:
4565*0Sstevel@tonic-gate 			value = so->so_sndlowat;
4566*0Sstevel@tonic-gate 			option = &value;
4567*0Sstevel@tonic-gate 			break;
4568*0Sstevel@tonic-gate 		case SO_RCVLOWAT:
4569*0Sstevel@tonic-gate 			value = so->so_rcvlowat;
4570*0Sstevel@tonic-gate 			option = &value;
4571*0Sstevel@tonic-gate 			break;
4572*0Sstevel@tonic-gate 		case SO_SNDTIMEO:
4573*0Sstevel@tonic-gate 			value = so->so_sndtimeo;
4574*0Sstevel@tonic-gate 			option = &value;
4575*0Sstevel@tonic-gate 			break;
4576*0Sstevel@tonic-gate 		case SO_RCVTIMEO:
4577*0Sstevel@tonic-gate 			value = so->so_rcvtimeo;
4578*0Sstevel@tonic-gate 			option = &value;
4579*0Sstevel@tonic-gate 			break;
4580*0Sstevel@tonic-gate #endif /* notyet */
4581*0Sstevel@tonic-gate 		}
4582*0Sstevel@tonic-gate 	}
4583*0Sstevel@tonic-gate 
4584*0Sstevel@tonic-gate 	if (so->so_family == AF_NCA) {
4585*0Sstevel@tonic-gate 		goto done2;
4586*0Sstevel@tonic-gate 	}
4587*0Sstevel@tonic-gate 
4588*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4589*0Sstevel@tonic-gate 
4590*0Sstevel@tonic-gate 	/* Send request */
4591*0Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
4592*0Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_CHECK;
4593*0Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen);
4594*0Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
4595*0Sstevel@tonic-gate 
4596*0Sstevel@tonic-gate 	oh.level = level;
4597*0Sstevel@tonic-gate 	oh.name = option_name;
4598*0Sstevel@tonic-gate 	oh.len = maxlen;
4599*0Sstevel@tonic-gate 
4600*0Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
4601*0Sstevel@tonic-gate 	    &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP);
4602*0Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
4603*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
4604*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
4605*0Sstevel@tonic-gate 	mp = NULL;
4606*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4607*0Sstevel@tonic-gate 	if (error) {
4608*0Sstevel@tonic-gate 		eprintsoline(so, error);
4609*0Sstevel@tonic-gate 		goto done2;
4610*0Sstevel@tonic-gate 	}
4611*0Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
4612*0Sstevel@tonic-gate 	    (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0);
4613*0Sstevel@tonic-gate 	if (error) {
4614*0Sstevel@tonic-gate 		if (option != NULL) {
4615*0Sstevel@tonic-gate 			/* We have a fallback value */
4616*0Sstevel@tonic-gate 			error = 0;
4617*0Sstevel@tonic-gate 			goto copyout;
4618*0Sstevel@tonic-gate 		}
4619*0Sstevel@tonic-gate 		eprintsoline(so, error);
4620*0Sstevel@tonic-gate 		goto done2;
4621*0Sstevel@tonic-gate 	}
4622*0Sstevel@tonic-gate 	ASSERT(mp);
4623*0Sstevel@tonic-gate 	optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
4624*0Sstevel@tonic-gate 	opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset,
4625*0Sstevel@tonic-gate 			optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE);
4626*0Sstevel@tonic-gate 	if (opt_res == NULL) {
4627*0Sstevel@tonic-gate 		if (option != NULL) {
4628*0Sstevel@tonic-gate 			/* We have a fallback value */
4629*0Sstevel@tonic-gate 			error = 0;
4630*0Sstevel@tonic-gate 			goto copyout;
4631*0Sstevel@tonic-gate 		}
4632*0Sstevel@tonic-gate 		error = EPROTO;
4633*0Sstevel@tonic-gate 		eprintsoline(so, error);
4634*0Sstevel@tonic-gate 		goto done;
4635*0Sstevel@tonic-gate 	}
4636*0Sstevel@tonic-gate 	option = &opt_res[1];
4637*0Sstevel@tonic-gate 
4638*0Sstevel@tonic-gate 	/* check to ensure that the option is within bounds */
4639*0Sstevel@tonic-gate 	if (((uintptr_t)option + opt_res->len < (uintptr_t)option) ||
4640*0Sstevel@tonic-gate 		(uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) {
4641*0Sstevel@tonic-gate 		if (option != NULL) {
4642*0Sstevel@tonic-gate 			/* We have a fallback value */
4643*0Sstevel@tonic-gate 			error = 0;
4644*0Sstevel@tonic-gate 			goto copyout;
4645*0Sstevel@tonic-gate 		}
4646*0Sstevel@tonic-gate 		error = EPROTO;
4647*0Sstevel@tonic-gate 		eprintsoline(so, error);
4648*0Sstevel@tonic-gate 		goto done;
4649*0Sstevel@tonic-gate 	}
4650*0Sstevel@tonic-gate 
4651*0Sstevel@tonic-gate 	len = opt_res->len;
4652*0Sstevel@tonic-gate 
4653*0Sstevel@tonic-gate copyout: {
4654*0Sstevel@tonic-gate 		t_uscalar_t size = MIN(len, maxlen);
4655*0Sstevel@tonic-gate 		bcopy(option, optval, size);
4656*0Sstevel@tonic-gate 		bcopy(&size, optlenp, sizeof (size));
4657*0Sstevel@tonic-gate 	}
4658*0Sstevel@tonic-gate done:
4659*0Sstevel@tonic-gate 	freemsg(mp);
4660*0Sstevel@tonic-gate done2:
4661*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
4662*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4663*0Sstevel@tonic-gate 	return (error);
4664*0Sstevel@tonic-gate }
4665*0Sstevel@tonic-gate 
4666*0Sstevel@tonic-gate /*
4667*0Sstevel@tonic-gate  * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
4668*0Sstevel@tonic-gate  * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
4669*0Sstevel@tonic-gate  * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
4670*0Sstevel@tonic-gate  * setsockopt has to work even if the transport does not support the option.
4671*0Sstevel@tonic-gate  */
4672*0Sstevel@tonic-gate int
4673*0Sstevel@tonic-gate sotpi_setsockopt(struct sonode *so, int level, int option_name,
4674*0Sstevel@tonic-gate 	const void *optval, t_uscalar_t optlen)
4675*0Sstevel@tonic-gate {
4676*0Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
4677*0Sstevel@tonic-gate 	struct opthdr		oh;
4678*0Sstevel@tonic-gate 	mblk_t			*mp;
4679*0Sstevel@tonic-gate 	int			error = 0;
4680*0Sstevel@tonic-gate 	boolean_t		handled = B_FALSE;
4681*0Sstevel@tonic-gate 
4682*0Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
4683*0Sstevel@tonic-gate 			so, level, option_name, optval, optlen,
4684*0Sstevel@tonic-gate 			pr_state(so->so_state, so->so_mode)));
4685*0Sstevel@tonic-gate 
4686*0Sstevel@tonic-gate 
4687*0Sstevel@tonic-gate 	/* X/Open requires this check */
4688*0Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
4689*0Sstevel@tonic-gate 		if (xnet_check_print)
4690*0Sstevel@tonic-gate 			printf("sockfs: X/Open setsockopt check => EINVAL\n");
4691*0Sstevel@tonic-gate 		return (EINVAL);
4692*0Sstevel@tonic-gate 	}
4693*0Sstevel@tonic-gate 
4694*0Sstevel@tonic-gate 	/* Caller allocates aligned optval, or passes null */
4695*0Sstevel@tonic-gate 	ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
4696*0Sstevel@tonic-gate 	/* If optval is null optlen is 0, and vice-versa */
4697*0Sstevel@tonic-gate 	ASSERT(optval != NULL || optlen == 0);
4698*0Sstevel@tonic-gate 	ASSERT(optlen != 0 || optval == NULL);
4699*0Sstevel@tonic-gate 
4700*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4701*0Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
4702*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
4703*0Sstevel@tonic-gate 
4704*0Sstevel@tonic-gate 	if (so->so_family == AF_NCA) {
4705*0Sstevel@tonic-gate 		/* Ignore any flow control problems with the transport. */
4706*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
4707*0Sstevel@tonic-gate 		goto done;
4708*0Sstevel@tonic-gate 	}
4709*0Sstevel@tonic-gate 
4710*0Sstevel@tonic-gate 	/*
4711*0Sstevel@tonic-gate 	 * For SOCKET or TCP level options, try to set it here itself
4712*0Sstevel@tonic-gate 	 * provided socket has not been popped and we know the tcp
4713*0Sstevel@tonic-gate 	 * structure (stored in so_priv).
4714*0Sstevel@tonic-gate 	 */
4715*0Sstevel@tonic-gate 	if ((level == SOL_SOCKET || level == IPPROTO_TCP) &&
4716*0Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
4717*0Sstevel@tonic-gate 	    (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) {
4718*0Sstevel@tonic-gate 		tcp_t		*tcp = so->so_priv;
4719*0Sstevel@tonic-gate 		boolean_t	onoff;
4720*0Sstevel@tonic-gate 
4721*0Sstevel@tonic-gate #define	intvalue	(*(int32_t *)optval)
4722*0Sstevel@tonic-gate 
4723*0Sstevel@tonic-gate 		switch (level) {
4724*0Sstevel@tonic-gate 		case SOL_SOCKET:
4725*0Sstevel@tonic-gate 			switch (option_name) {		/* Check length param */
4726*0Sstevel@tonic-gate 			case SO_DEBUG:
4727*0Sstevel@tonic-gate 			case SO_REUSEADDR:
4728*0Sstevel@tonic-gate 			case SO_DONTROUTE:
4729*0Sstevel@tonic-gate 			case SO_BROADCAST:
4730*0Sstevel@tonic-gate 			case SO_USELOOPBACK:
4731*0Sstevel@tonic-gate 			case SO_OOBINLINE:
4732*0Sstevel@tonic-gate 			case SO_DGRAM_ERRIND:
4733*0Sstevel@tonic-gate 				if (optlen != (t_uscalar_t)sizeof (int32_t)) {
4734*0Sstevel@tonic-gate 					error = EINVAL;
4735*0Sstevel@tonic-gate 					eprintsoline(so, error);
4736*0Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
4737*0Sstevel@tonic-gate 					goto done2;
4738*0Sstevel@tonic-gate 				}
4739*0Sstevel@tonic-gate 				ASSERT(optval);
4740*0Sstevel@tonic-gate 				onoff = intvalue != 0;
4741*0Sstevel@tonic-gate 				handled = B_TRUE;
4742*0Sstevel@tonic-gate 				break;
4743*0Sstevel@tonic-gate 			case SO_LINGER:
4744*0Sstevel@tonic-gate 				if (optlen !=
4745*0Sstevel@tonic-gate 				    (t_uscalar_t)sizeof (struct linger)) {
4746*0Sstevel@tonic-gate 					error = EINVAL;
4747*0Sstevel@tonic-gate 					eprintsoline(so, error);
4748*0Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
4749*0Sstevel@tonic-gate 					goto done2;
4750*0Sstevel@tonic-gate 				}
4751*0Sstevel@tonic-gate 				ASSERT(optval);
4752*0Sstevel@tonic-gate 				handled = B_TRUE;
4753*0Sstevel@tonic-gate 				break;
4754*0Sstevel@tonic-gate 			}
4755*0Sstevel@tonic-gate 
4756*0Sstevel@tonic-gate 			switch (option_name) {			/* Do actions */
4757*0Sstevel@tonic-gate 			case SO_LINGER: {
4758*0Sstevel@tonic-gate 				struct linger *lgr = (struct linger *)optval;
4759*0Sstevel@tonic-gate 
4760*0Sstevel@tonic-gate 				if (lgr->l_onoff) {
4761*0Sstevel@tonic-gate 					tcp->tcp_linger = 1;
4762*0Sstevel@tonic-gate 					tcp->tcp_lingertime = lgr->l_linger;
4763*0Sstevel@tonic-gate 					so->so_linger.l_onoff = SO_LINGER;
4764*0Sstevel@tonic-gate 					so->so_options |= SO_LINGER;
4765*0Sstevel@tonic-gate 				} else {
4766*0Sstevel@tonic-gate 					tcp->tcp_linger = 0;
4767*0Sstevel@tonic-gate 					tcp->tcp_lingertime = 0;
4768*0Sstevel@tonic-gate 					so->so_linger.l_onoff = 0;
4769*0Sstevel@tonic-gate 					so->so_options &= ~SO_LINGER;
4770*0Sstevel@tonic-gate 				}
4771*0Sstevel@tonic-gate 				so->so_linger.l_linger = lgr->l_linger;
4772*0Sstevel@tonic-gate 				handled = B_TRUE;
4773*0Sstevel@tonic-gate 				break;
4774*0Sstevel@tonic-gate 			}
4775*0Sstevel@tonic-gate 			case SO_DEBUG:
4776*0Sstevel@tonic-gate 				tcp->tcp_debug = onoff;
4777*0Sstevel@tonic-gate #ifdef SOCK_TEST
4778*0Sstevel@tonic-gate 				if (intvalue & 2)
4779*0Sstevel@tonic-gate 					sock_test_timelimit = 10 * hz;
4780*0Sstevel@tonic-gate 				else
4781*0Sstevel@tonic-gate 					sock_test_timelimit = 0;
4782*0Sstevel@tonic-gate 
4783*0Sstevel@tonic-gate 				if (intvalue & 4)
4784*0Sstevel@tonic-gate 					do_useracc = 0;
4785*0Sstevel@tonic-gate 				else
4786*0Sstevel@tonic-gate 					do_useracc = 1;
4787*0Sstevel@tonic-gate #endif /* SOCK_TEST */
4788*0Sstevel@tonic-gate 				break;
4789*0Sstevel@tonic-gate 			case SO_DONTROUTE:
4790*0Sstevel@tonic-gate 				/*
4791*0Sstevel@tonic-gate 				 * SO_DONTROUTE, SO_USELOOPBACK and
4792*0Sstevel@tonic-gate 				 * SO_BROADCAST are only of interest to IP.
4793*0Sstevel@tonic-gate 				 * We track them here only so
4794*0Sstevel@tonic-gate 				 * that we can report their current value.
4795*0Sstevel@tonic-gate 				 */
4796*0Sstevel@tonic-gate 				tcp->tcp_dontroute = onoff;
4797*0Sstevel@tonic-gate 				if (onoff)
4798*0Sstevel@tonic-gate 					so->so_options |= option_name;
4799*0Sstevel@tonic-gate 				else
4800*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4801*0Sstevel@tonic-gate 				break;
4802*0Sstevel@tonic-gate 			case SO_USELOOPBACK:
4803*0Sstevel@tonic-gate 				tcp->tcp_useloopback = onoff;
4804*0Sstevel@tonic-gate 				if (onoff)
4805*0Sstevel@tonic-gate 					so->so_options |= option_name;
4806*0Sstevel@tonic-gate 				else
4807*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4808*0Sstevel@tonic-gate 				break;
4809*0Sstevel@tonic-gate 			case SO_BROADCAST:
4810*0Sstevel@tonic-gate 				tcp->tcp_broadcast = onoff;
4811*0Sstevel@tonic-gate 				if (onoff)
4812*0Sstevel@tonic-gate 					so->so_options |= option_name;
4813*0Sstevel@tonic-gate 				else
4814*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4815*0Sstevel@tonic-gate 				break;
4816*0Sstevel@tonic-gate 			case SO_REUSEADDR:
4817*0Sstevel@tonic-gate 				tcp->tcp_reuseaddr = onoff;
4818*0Sstevel@tonic-gate 				if (onoff)
4819*0Sstevel@tonic-gate 					so->so_options |= option_name;
4820*0Sstevel@tonic-gate 				else
4821*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4822*0Sstevel@tonic-gate 				break;
4823*0Sstevel@tonic-gate 			case SO_OOBINLINE:
4824*0Sstevel@tonic-gate 				tcp->tcp_oobinline = onoff;
4825*0Sstevel@tonic-gate 				if (onoff)
4826*0Sstevel@tonic-gate 					so->so_options |= option_name;
4827*0Sstevel@tonic-gate 				else
4828*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4829*0Sstevel@tonic-gate 				break;
4830*0Sstevel@tonic-gate 			case SO_DGRAM_ERRIND:
4831*0Sstevel@tonic-gate 				tcp->tcp_dgram_errind = onoff;
4832*0Sstevel@tonic-gate 				if (onoff)
4833*0Sstevel@tonic-gate 					so->so_options |= option_name;
4834*0Sstevel@tonic-gate 				else
4835*0Sstevel@tonic-gate 					so->so_options &= ~option_name;
4836*0Sstevel@tonic-gate 				break;
4837*0Sstevel@tonic-gate 			}
4838*0Sstevel@tonic-gate 			break;
4839*0Sstevel@tonic-gate 		case IPPROTO_TCP:
4840*0Sstevel@tonic-gate 			switch (option_name) {
4841*0Sstevel@tonic-gate 			case TCP_NODELAY:
4842*0Sstevel@tonic-gate 				if (optlen != (t_uscalar_t)sizeof (int32_t)) {
4843*0Sstevel@tonic-gate 					error = EINVAL;
4844*0Sstevel@tonic-gate 					eprintsoline(so, error);
4845*0Sstevel@tonic-gate 					mutex_enter(&so->so_lock);
4846*0Sstevel@tonic-gate 					goto done2;
4847*0Sstevel@tonic-gate 				}
4848*0Sstevel@tonic-gate 				ASSERT(optval);
4849*0Sstevel@tonic-gate 				tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss;
4850*0Sstevel@tonic-gate 				handled = B_TRUE;
4851*0Sstevel@tonic-gate 				break;
4852*0Sstevel@tonic-gate 			}
4853*0Sstevel@tonic-gate 			break;
4854*0Sstevel@tonic-gate 		default:
4855*0Sstevel@tonic-gate 			handled = B_FALSE;
4856*0Sstevel@tonic-gate 			break;
4857*0Sstevel@tonic-gate 		}
4858*0Sstevel@tonic-gate 	}
4859*0Sstevel@tonic-gate 
4860*0Sstevel@tonic-gate 	if (handled) {
4861*0Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
4862*0Sstevel@tonic-gate 		goto done2;
4863*0Sstevel@tonic-gate 	}
4864*0Sstevel@tonic-gate 
4865*0Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
4866*0Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_NEGOTIATE;
4867*0Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen;
4868*0Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
4869*0Sstevel@tonic-gate 
4870*0Sstevel@tonic-gate 	oh.level = level;
4871*0Sstevel@tonic-gate 	oh.name = option_name;
4872*0Sstevel@tonic-gate 	oh.len = optlen;
4873*0Sstevel@tonic-gate 
4874*0Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
4875*0Sstevel@tonic-gate 	    &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP);
4876*0Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
4877*0Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
4878*0Sstevel@tonic-gate 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
4879*0Sstevel@tonic-gate 	mp = NULL;
4880*0Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
4881*0Sstevel@tonic-gate 	if (error) {
4882*0Sstevel@tonic-gate 		eprintsoline(so, error);
4883*0Sstevel@tonic-gate 		goto done;
4884*0Sstevel@tonic-gate 	}
4885*0Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
4886*0Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
4887*0Sstevel@tonic-gate 	if (error) {
4888*0Sstevel@tonic-gate 		eprintsoline(so, error);
4889*0Sstevel@tonic-gate 		goto done;
4890*0Sstevel@tonic-gate 	}
4891*0Sstevel@tonic-gate 	ASSERT(mp);
4892*0Sstevel@tonic-gate 	/* No need to verify T_optmgmt_ack */
4893*0Sstevel@tonic-gate 	freemsg(mp);
4894*0Sstevel@tonic-gate done:
4895*0Sstevel@tonic-gate 	/*
4896*0Sstevel@tonic-gate 	 * Check for SOL_SOCKET options and record their values.
4897*0Sstevel@tonic-gate 	 * If we know about a SOL_SOCKET parameter and the transport
4898*0Sstevel@tonic-gate 	 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
4899*0Sstevel@tonic-gate 	 * EPROTO) we let the setsockopt succeed.
4900*0Sstevel@tonic-gate 	 */
4901*0Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
4902*0Sstevel@tonic-gate 		/* Check parameters */
4903*0Sstevel@tonic-gate 		switch (option_name) {
4904*0Sstevel@tonic-gate 		case SO_DEBUG:
4905*0Sstevel@tonic-gate 		case SO_REUSEADDR:
4906*0Sstevel@tonic-gate 		case SO_KEEPALIVE:
4907*0Sstevel@tonic-gate 		case SO_DONTROUTE:
4908*0Sstevel@tonic-gate 		case SO_BROADCAST:
4909*0Sstevel@tonic-gate 		case SO_USELOOPBACK:
4910*0Sstevel@tonic-gate 		case SO_OOBINLINE:
4911*0Sstevel@tonic-gate 		case SO_SNDBUF:
4912*0Sstevel@tonic-gate 		case SO_RCVBUF:
4913*0Sstevel@tonic-gate #ifdef notyet
4914*0Sstevel@tonic-gate 		case SO_SNDLOWAT:
4915*0Sstevel@tonic-gate 		case SO_RCVLOWAT:
4916*0Sstevel@tonic-gate 		case SO_SNDTIMEO:
4917*0Sstevel@tonic-gate 		case SO_RCVTIMEO:
4918*0Sstevel@tonic-gate #endif /* notyet */
4919*0Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
4920*0Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (int32_t)) {
4921*0Sstevel@tonic-gate 				error = EINVAL;
4922*0Sstevel@tonic-gate 				eprintsoline(so, error);
4923*0Sstevel@tonic-gate 				goto done2;
4924*0Sstevel@tonic-gate 			}
4925*0Sstevel@tonic-gate 			ASSERT(optval);
4926*0Sstevel@tonic-gate 			handled = B_TRUE;
4927*0Sstevel@tonic-gate 			break;
4928*0Sstevel@tonic-gate 		case SO_LINGER:
4929*0Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (struct linger)) {
4930*0Sstevel@tonic-gate 				error = EINVAL;
4931*0Sstevel@tonic-gate 				eprintsoline(so, error);
4932*0Sstevel@tonic-gate 				goto done2;
4933*0Sstevel@tonic-gate 			}
4934*0Sstevel@tonic-gate 			ASSERT(optval);
4935*0Sstevel@tonic-gate 			handled = B_TRUE;
4936*0Sstevel@tonic-gate 			break;
4937*0Sstevel@tonic-gate 		}
4938*0Sstevel@tonic-gate 
4939*0Sstevel@tonic-gate #define	intvalue	(*(int32_t *)optval)
4940*0Sstevel@tonic-gate 
4941*0Sstevel@tonic-gate 		switch (option_name) {
4942*0Sstevel@tonic-gate 		case SO_TYPE:
4943*0Sstevel@tonic-gate 		case SO_ERROR:
4944*0Sstevel@tonic-gate 		case SO_ACCEPTCONN:
4945*0Sstevel@tonic-gate 			/* Can't be set */
4946*0Sstevel@tonic-gate 			error = ENOPROTOOPT;
4947*0Sstevel@tonic-gate 			goto done2;
4948*0Sstevel@tonic-gate 		case SO_LINGER: {
4949*0Sstevel@tonic-gate 			struct linger *l = (struct linger *)optval;
4950*0Sstevel@tonic-gate 
4951*0Sstevel@tonic-gate 			so->so_linger.l_linger = l->l_linger;
4952*0Sstevel@tonic-gate 			if (l->l_onoff) {
4953*0Sstevel@tonic-gate 				so->so_linger.l_onoff = SO_LINGER;
4954*0Sstevel@tonic-gate 				so->so_options |= SO_LINGER;
4955*0Sstevel@tonic-gate 			} else {
4956*0Sstevel@tonic-gate 				so->so_linger.l_onoff = 0;
4957*0Sstevel@tonic-gate 				so->so_options &= ~SO_LINGER;
4958*0Sstevel@tonic-gate 			}
4959*0Sstevel@tonic-gate 			break;
4960*0Sstevel@tonic-gate 		}
4961*0Sstevel@tonic-gate 
4962*0Sstevel@tonic-gate 		case SO_DEBUG:
4963*0Sstevel@tonic-gate #ifdef SOCK_TEST
4964*0Sstevel@tonic-gate 			if (intvalue & 2)
4965*0Sstevel@tonic-gate 				sock_test_timelimit = 10 * hz;
4966*0Sstevel@tonic-gate 			else
4967*0Sstevel@tonic-gate 				sock_test_timelimit = 0;
4968*0Sstevel@tonic-gate 
4969*0Sstevel@tonic-gate 			if (intvalue & 4)
4970*0Sstevel@tonic-gate 				do_useracc = 0;
4971*0Sstevel@tonic-gate 			else
4972*0Sstevel@tonic-gate 				do_useracc = 1;
4973*0Sstevel@tonic-gate #endif /* SOCK_TEST */
4974*0Sstevel@tonic-gate 			/* FALLTHRU */
4975*0Sstevel@tonic-gate 		case SO_REUSEADDR:
4976*0Sstevel@tonic-gate 		case SO_KEEPALIVE:
4977*0Sstevel@tonic-gate 		case SO_DONTROUTE:
4978*0Sstevel@tonic-gate 		case SO_BROADCAST:
4979*0Sstevel@tonic-gate 		case SO_USELOOPBACK:
4980*0Sstevel@tonic-gate 		case SO_OOBINLINE:
4981*0Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
4982*0Sstevel@tonic-gate 			if (intvalue != 0) {
4983*0Sstevel@tonic-gate 				dprintso(so, 1,
4984*0Sstevel@tonic-gate 					("sotpi_setsockopt: setting 0x%x\n",
4985*0Sstevel@tonic-gate 					option_name));
4986*0Sstevel@tonic-gate 				so->so_options |= option_name;
4987*0Sstevel@tonic-gate 			} else {
4988*0Sstevel@tonic-gate 				dprintso(so, 1,
4989*0Sstevel@tonic-gate 					("sotpi_setsockopt: clearing 0x%x\n",
4990*0Sstevel@tonic-gate 					option_name));
4991*0Sstevel@tonic-gate 				so->so_options &= ~option_name;
4992*0Sstevel@tonic-gate 			}
4993*0Sstevel@tonic-gate 			break;
4994*0Sstevel@tonic-gate 		/*
4995*0Sstevel@tonic-gate 		 * The following options are only returned by us when the
4996*0Sstevel@tonic-gate 		 * T_SVR4_OPTMGMT_REQ fails.
4997*0Sstevel@tonic-gate 		 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
4998*0Sstevel@tonic-gate 		 * since the transport might adjust the value and not
4999*0Sstevel@tonic-gate 		 * return exactly what was set by the application.
5000*0Sstevel@tonic-gate 		 */
5001*0Sstevel@tonic-gate 		case SO_SNDBUF:
5002*0Sstevel@tonic-gate 			so->so_sndbuf = intvalue;
5003*0Sstevel@tonic-gate 			break;
5004*0Sstevel@tonic-gate 		case SO_RCVBUF:
5005*0Sstevel@tonic-gate 			so->so_rcvbuf = intvalue;
5006*0Sstevel@tonic-gate 			break;
5007*0Sstevel@tonic-gate #ifdef notyet
5008*0Sstevel@tonic-gate 		/*
5009*0Sstevel@tonic-gate 		 * We do not implement the semantics of these options
5010*0Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
5011*0Sstevel@tonic-gate 		 */
5012*0Sstevel@tonic-gate 		case SO_SNDLOWAT:
5013*0Sstevel@tonic-gate 			so->so_sndlowat = intvalue;
5014*0Sstevel@tonic-gate 			break;
5015*0Sstevel@tonic-gate 		case SO_RCVLOWAT:
5016*0Sstevel@tonic-gate 			so->so_rcvlowat = intvalue;
5017*0Sstevel@tonic-gate 			break;
5018*0Sstevel@tonic-gate 		case SO_SNDTIMEO:
5019*0Sstevel@tonic-gate 			so->so_sndtimeo = intvalue;
5020*0Sstevel@tonic-gate 			break;
5021*0Sstevel@tonic-gate 		case SO_RCVTIMEO:
5022*0Sstevel@tonic-gate 			so->so_rcvtimeo = intvalue;
5023*0Sstevel@tonic-gate 			break;
5024*0Sstevel@tonic-gate #endif /* notyet */
5025*0Sstevel@tonic-gate 		}
5026*0Sstevel@tonic-gate #undef	intvalue
5027*0Sstevel@tonic-gate 
5028*0Sstevel@tonic-gate 		if (error) {
5029*0Sstevel@tonic-gate 			if ((error == ENOPROTOOPT || error == EPROTO ||
5030*0Sstevel@tonic-gate 			    error == EINVAL) && handled) {
5031*0Sstevel@tonic-gate 				dprintso(so, 1,
5032*0Sstevel@tonic-gate 				    ("setsockopt: ignoring error %d for 0x%x\n",
5033*0Sstevel@tonic-gate 				    error, option_name));
5034*0Sstevel@tonic-gate 				error = 0;
5035*0Sstevel@tonic-gate 			}
5036*0Sstevel@tonic-gate 		}
5037*0Sstevel@tonic-gate 	}
5038*0Sstevel@tonic-gate done2:
5039*0Sstevel@tonic-gate ret:
5040*0Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
5041*0Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
5042*0Sstevel@tonic-gate 	return (error);
5043*0Sstevel@tonic-gate }
5044