10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51548Srshoaib  * Common Development and Distribution License (the "License").
61548Srshoaib  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211548Srshoaib 
220Sstevel@tonic-gate /*
236707Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate /*
310Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
320Sstevel@tonic-gate  * The Regents of the University of California
330Sstevel@tonic-gate  * All Rights Reserved
340Sstevel@tonic-gate  *
350Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
360Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
370Sstevel@tonic-gate  * contributors.
380Sstevel@tonic-gate  */
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #ifndef _SYS_SOCKETVAR_H
410Sstevel@tonic-gate #define	_SYS_SOCKETVAR_H
420Sstevel@tonic-gate 
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <sys/stream.h>
450Sstevel@tonic-gate #include <sys/t_lock.h>
460Sstevel@tonic-gate #include <sys/cred.h>
470Sstevel@tonic-gate #include <sys/vnode.h>
480Sstevel@tonic-gate #include <sys/file.h>
490Sstevel@tonic-gate #include <sys/param.h>
500Sstevel@tonic-gate #include <sys/zone.h>
518348SEric.Yu@Sun.COM #include <sys/sdt.h>
528348SEric.Yu@Sun.COM #include <sys/modctl.h>
538348SEric.Yu@Sun.COM #include <sys/atomic.h>
548348SEric.Yu@Sun.COM #include <sys/socket.h>
558348SEric.Yu@Sun.COM #include <sys/ksocket.h>
566707Sbrutus #include <sys/sodirect.h>
570Sstevel@tonic-gate 
580Sstevel@tonic-gate #ifdef	__cplusplus
590Sstevel@tonic-gate extern "C" {
600Sstevel@tonic-gate #endif
610Sstevel@tonic-gate 
620Sstevel@tonic-gate /*
630Sstevel@tonic-gate  * Internal representation of the address used to represent addresses
640Sstevel@tonic-gate  * in the loopback transport for AF_UNIX. While the sockaddr_un is used
650Sstevel@tonic-gate  * as the sockfs layer address for AF_UNIX the pathnames contained in
660Sstevel@tonic-gate  * these addresses are not unique (due to relative pathnames) thus can not
670Sstevel@tonic-gate  * be used in the transport.
680Sstevel@tonic-gate  *
690Sstevel@tonic-gate  * The transport level address consists of a magic number (used to separate the
700Sstevel@tonic-gate  * name space for specific and implicit binds). For a specific bind
710Sstevel@tonic-gate  * this is followed by a "vnode *" which ensures that all specific binds
720Sstevel@tonic-gate  * have a unique transport level address. For implicit binds the latter
730Sstevel@tonic-gate  * part of the address is a byte string (of the same length as a pointer)
740Sstevel@tonic-gate  * that is assigned by the loopback transport.
750Sstevel@tonic-gate  *
760Sstevel@tonic-gate  * The uniqueness assumes that the loopback transport has a separate namespace
770Sstevel@tonic-gate  * for sockets in order to avoid name conflicts with e.g. TLI use of the
780Sstevel@tonic-gate  * same transport.
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate struct so_ux_addr {
810Sstevel@tonic-gate 	void	*soua_vp;	/* vnode pointer or assigned by tl */
820Sstevel@tonic-gate 	uint_t	soua_magic;	/* See below */
830Sstevel@tonic-gate };
840Sstevel@tonic-gate 
850Sstevel@tonic-gate #define	SOU_MAGIC_EXPLICIT	0x75787670	/* "uxvp" */
860Sstevel@tonic-gate #define	SOU_MAGIC_IMPLICIT	0x616e6f6e	/* "anon" */
870Sstevel@tonic-gate 
880Sstevel@tonic-gate struct sockaddr_ux {
890Sstevel@tonic-gate 	sa_family_t		sou_family;	/* AF_UNIX */
900Sstevel@tonic-gate 	struct so_ux_addr	sou_addr;
910Sstevel@tonic-gate };
920Sstevel@tonic-gate 
938348SEric.Yu@Sun.COM #if defined(_KERNEL) || defined(_KMEMUSER)
948348SEric.Yu@Sun.COM 
958348SEric.Yu@Sun.COM #include <sys/socket_proto.h>
968348SEric.Yu@Sun.COM 
970Sstevel@tonic-gate typedef struct sonodeops sonodeops_t;
98741Smasputra typedef struct sonode sonode_t;
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate  * The sonode represents a socket. A sonode never exist in the file system
1020Sstevel@tonic-gate  * name space and can not be opened using open() - only the socket, socketpair
1030Sstevel@tonic-gate  * and accept calls create sonodes.
1040Sstevel@tonic-gate  *
1058348SEric.Yu@Sun.COM  * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and
1068348SEric.Yu@Sun.COM  * SOREADLOCKED flags in so_flag. The mutex protects all the state in the
1078348SEric.Yu@Sun.COM  * sonode. It is expected that the underlying transport protocol serializes
1088348SEric.Yu@Sun.COM  * socket operations, so sockfs will not normally not single-thread
1098348SEric.Yu@Sun.COM  * operations. However, certain sockets, including TPI based ones, can only
1108348SEric.Yu@Sun.COM  * handle one control operation at a time. The SOLOCKED flag is used to
1118348SEric.Yu@Sun.COM  * single-thread operations from sockfs users to prevent e.g. multiple bind()
1128348SEric.Yu@Sun.COM  * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is
1138348SEric.Yu@Sun.COM  * used to ensure that only one thread sleeps in kstrgetmsg for a given
1148348SEric.Yu@Sun.COM  * sonode. This is needed to ensure atomic operation for things like
1158348SEric.Yu@Sun.COM  * MSG_WAITALL.
1160Sstevel@tonic-gate  *
1178348SEric.Yu@Sun.COM  * The so_fallback_rwlock is used to ensure that for sockets that can
1188348SEric.Yu@Sun.COM  * fall back to TPI, the fallback is not initiated until all pending
1198348SEric.Yu@Sun.COM  * operations have completed.
1200Sstevel@tonic-gate  *
1210Sstevel@tonic-gate  * Note that so_lock is sometimes held across calls that might go to sleep
1220Sstevel@tonic-gate  * (kmem_alloc and soallocproto*). This implies that no other lock in
1230Sstevel@tonic-gate  * the system should be held when calling into sockfs; from the system call
1248348SEric.Yu@Sun.COM  * side or from strrput (in case of TPI based sockets). If locks are held
1258348SEric.Yu@Sun.COM  * while calling into sockfs the system might hang when running low on memory.
1260Sstevel@tonic-gate  */
1270Sstevel@tonic-gate struct sonode {
1280Sstevel@tonic-gate 	struct	vnode	*so_vnode;	/* vnode associated with this sonode */
1290Sstevel@tonic-gate 
1308348SEric.Yu@Sun.COM 	sonodeops_t 	*so_ops;	/* operations vector for this sonode */
1318348SEric.Yu@Sun.COM 	void		*so_priv;	/* sonode private data */
1320Sstevel@tonic-gate 
1338348SEric.Yu@Sun.COM 	krwlock_t	so_fallback_rwlock;
1340Sstevel@tonic-gate 	kmutex_t	so_lock;	/* protects sonode fields */
1358348SEric.Yu@Sun.COM 
1360Sstevel@tonic-gate 	kcondvar_t	so_state_cv;	/* synchronize state changes */
1370Sstevel@tonic-gate 	kcondvar_t	so_want_cv;	/* wait due to SOLOCKED */
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	/* These fields are protected by so_lock */
1400Sstevel@tonic-gate 
1418348SEric.Yu@Sun.COM 	uint_t		so_state;	/* internal state flags SS_*, below */
1428348SEric.Yu@Sun.COM 	uint_t		so_mode;	/* characteristics on socket. SM_* */
1438348SEric.Yu@Sun.COM 	ushort_t 	so_flag;	/* flags, see below */
1448348SEric.Yu@Sun.COM 	int		so_count;	/* count of opened references */
1450Sstevel@tonic-gate 
1468348SEric.Yu@Sun.COM 	sock_connid_t	so_proto_connid; /* protocol generation number */
1470Sstevel@tonic-gate 
1488348SEric.Yu@Sun.COM 	ushort_t 	so_error;	/* error affecting connection */
1498348SEric.Yu@Sun.COM 
1508348SEric.Yu@Sun.COM 	struct sockparams *so_sockparams;	/* vnode or socket module */
1510Sstevel@tonic-gate 	/* Needed to recreate the same socket for accept */
1520Sstevel@tonic-gate 	short	so_family;
1530Sstevel@tonic-gate 	short	so_type;
1540Sstevel@tonic-gate 	short	so_protocol;
1550Sstevel@tonic-gate 	short	so_version;		/* From so_socket call */
1568348SEric.Yu@Sun.COM 
1578348SEric.Yu@Sun.COM 	/* Accept queue */
1588348SEric.Yu@Sun.COM 	kmutex_t	so_acceptq_lock;	/* protects accept queue */
1598348SEric.Yu@Sun.COM 	struct sonode	*so_acceptq_next;	/* acceptq list node */
1608348SEric.Yu@Sun.COM 	struct sonode 	*so_acceptq_head;
1618348SEric.Yu@Sun.COM 	struct sonode	**so_acceptq_tail;
1628348SEric.Yu@Sun.COM 	unsigned int	so_acceptq_len;
1638348SEric.Yu@Sun.COM 	unsigned int	so_backlog;		/* Listen backlog */
1648348SEric.Yu@Sun.COM 	kcondvar_t	so_acceptq_cv;		/* wait for new conn. */
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate 	/* Options */
1670Sstevel@tonic-gate 	short	so_options;		/* From socket call, see socket.h */
1680Sstevel@tonic-gate 	struct linger	so_linger;	/* SO_LINGER value */
1698348SEric.Yu@Sun.COM #define	so_sndbuf	so_proto_props.sopp_txhiwat	/* SO_SNDBUF value */
1708348SEric.Yu@Sun.COM #define	so_sndlowat	so_proto_props.sopp_txlowat	/* tx low water mark */
1718348SEric.Yu@Sun.COM #define	so_rcvbuf	so_proto_props.sopp_rxhiwat	/* SO_RCVBUF value */
1728348SEric.Yu@Sun.COM #define	so_rcvlowat	so_proto_props.sopp_rxlowat	/* rx low water mark */
1738348SEric.Yu@Sun.COM #define	so_max_addr_len	so_proto_props.sopp_maxaddrlen
1748348SEric.Yu@Sun.COM #define	so_minpsz	so_proto_props.sopp_minpsz
1758348SEric.Yu@Sun.COM #define	so_maxpsz	so_proto_props.sopp_maxpsz
1760Sstevel@tonic-gate 
1778348SEric.Yu@Sun.COM 	clock_t	so_sndtimeo;		/* send timeout */
1788348SEric.Yu@Sun.COM 	clock_t	so_rcvtimeo;		/* recv timeout */
1798348SEric.Yu@Sun.COM 
1800Sstevel@tonic-gate 	mblk_t	*so_oobmsg;		/* outofline oob data */
1818348SEric.Yu@Sun.COM 	ssize_t	so_oobmark;		/* offset of the oob data */
1828348SEric.Yu@Sun.COM 
1830Sstevel@tonic-gate 	pid_t	so_pgrp;		/* pgrp for signals */
1840Sstevel@tonic-gate 
1850Sstevel@tonic-gate 	cred_t		*so_peercred;	/* connected socket peer cred */
1860Sstevel@tonic-gate 	pid_t		so_cpid;	/* connected socket peer cached pid */
1870Sstevel@tonic-gate 	zoneid_t	so_zoneid;	/* opener's zoneid */
1880Sstevel@tonic-gate 
1898348SEric.Yu@Sun.COM 	struct pollhead	so_poll_list;	/* common pollhead */
1908348SEric.Yu@Sun.COM 	short		so_pollev;	/* events that should be generated */
1918348SEric.Yu@Sun.COM 
1928348SEric.Yu@Sun.COM 	/* Receive */
1938348SEric.Yu@Sun.COM 	unsigned int	so_rcv_queued;
1948348SEric.Yu@Sun.COM 	mblk_t		*so_rcv_q_head;
1958348SEric.Yu@Sun.COM 	mblk_t		*so_rcv_q_last_head;
1968348SEric.Yu@Sun.COM 	mblk_t		*so_rcv_head;		/* 1st mblk in the list */
1978348SEric.Yu@Sun.COM 	mblk_t		*so_rcv_last_head;	/* last mblk in b_next chain */
1988348SEric.Yu@Sun.COM 	kcondvar_t	so_rcv_cv;
1998348SEric.Yu@Sun.COM 	uint_t		so_rcv_wanted;	/* # of bytes wanted by app */
2008348SEric.Yu@Sun.COM 	timeout_id_t	so_rcv_timer_tid;
2018348SEric.Yu@Sun.COM 
2028348SEric.Yu@Sun.COM #define	so_rcv_thresh	so_proto_props.sopp_rcvthresh
2038348SEric.Yu@Sun.COM #define	so_rcv_timer_interval so_proto_props.sopp_rcvtimer
2040Sstevel@tonic-gate 
2058348SEric.Yu@Sun.COM 	kcondvar_t	so_snd_cv;
2068399SRao.Shoaib@Sun.COM 	uint32_t
2078399SRao.Shoaib@Sun.COM 		so_snd_qfull: 1,	/* Transmit full */
2088399SRao.Shoaib@Sun.COM 		so_rcv_wakeup: 1,
2098399SRao.Shoaib@Sun.COM 		so_snd_wakeup: 1,
2108399SRao.Shoaib@Sun.COM 		so_not_str: 1,	/* B_TRUE if not streams based socket */
2118399SRao.Shoaib@Sun.COM 		so_pad_to_bit_31: 28;
212898Skais 
2138348SEric.Yu@Sun.COM 	/* Communication channel with protocol */
2148348SEric.Yu@Sun.COM 	sock_lower_handle_t	so_proto_handle;
2158348SEric.Yu@Sun.COM 	sock_downcalls_t 	*so_downcalls;
2168348SEric.Yu@Sun.COM 
2178348SEric.Yu@Sun.COM 	struct sock_proto_props	so_proto_props; /* protocol settings */
2188348SEric.Yu@Sun.COM 	boolean_t		so_flowctrld;	/* Flow controlled */
2198348SEric.Yu@Sun.COM 	uint_t			so_copyflag;	/* Copy related flag */
2208348SEric.Yu@Sun.COM 	kcondvar_t		so_copy_cv;	/* Copy cond variable */
2218348SEric.Yu@Sun.COM 
2228348SEric.Yu@Sun.COM 	/* kernel sockets */
2238348SEric.Yu@Sun.COM 	ksocket_callbacks_t 	so_ksock_callbacks;
2248348SEric.Yu@Sun.COM 	void			*so_ksock_cb_arg;	/* callback argument */
2258348SEric.Yu@Sun.COM 	kcondvar_t		so_closing_cv;
2266707Sbrutus 
2276707Sbrutus 	/* != NULL for sodirect_t enabled socket */
2288348SEric.Yu@Sun.COM 	sodirect_t		*so_direct;
2290Sstevel@tonic-gate };
2300Sstevel@tonic-gate 
2318348SEric.Yu@Sun.COM /*
2328348SEric.Yu@Sun.COM  * We do an initial check for events without holding locks. However,
2338348SEric.Yu@Sun.COM  * if there are no event available, then we redo the check for POLLIN
2348348SEric.Yu@Sun.COM  * events under the lock.
2358348SEric.Yu@Sun.COM  */
2368348SEric.Yu@Sun.COM #define	SO_HAVE_DATA(so)						\
2378348SEric.Yu@Sun.COM 	((so)->so_rcv_timer_tid == 0 && (so->so_rcv_queued > 0)) ||	\
2388348SEric.Yu@Sun.COM 	((so)->so_rcv_queued > (so)->so_rcv_thresh) ||			\
2398348SEric.Yu@Sun.COM 	((so)->so_state & SS_CANTRCVMORE)
2408348SEric.Yu@Sun.COM 
2418348SEric.Yu@Sun.COM /*
2428348SEric.Yu@Sun.COM  * Events handled by the protocol (in case sd_poll is set)
2438348SEric.Yu@Sun.COM  */
2448348SEric.Yu@Sun.COM #define	SO_PROTO_POLLEV		(POLLIN|POLLRDNORM|POLLRDBAND)
2458348SEric.Yu@Sun.COM 
2468348SEric.Yu@Sun.COM 
2478348SEric.Yu@Sun.COM #endif /* _KERNEL || _KMEMUSER */
2488348SEric.Yu@Sun.COM 
2490Sstevel@tonic-gate /* flags */
2500Sstevel@tonic-gate #define	SOMOD		0x0001		/* update socket modification time */
2510Sstevel@tonic-gate #define	SOACC		0x0002		/* update socket access time */
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate #define	SOLOCKED	0x0010		/* use to serialize open/closes */
2540Sstevel@tonic-gate #define	SOREADLOCKED	0x0020		/* serialize kstrgetmsg calls */
2550Sstevel@tonic-gate #define	SOWANT		0x0040		/* some process waiting on lock */
2560Sstevel@tonic-gate #define	SOCLONE		0x0080		/* child of clone driver */
2570Sstevel@tonic-gate #define	SOASYNC_UNBIND	0x0100		/* wait for ACK of async unbind */
2580Sstevel@tonic-gate 
2598399SRao.Shoaib@Sun.COM #define	SOCK_IS_NONSTR(so)	((so)->so_not_str)
2608348SEric.Yu@Sun.COM 
2610Sstevel@tonic-gate /*
2620Sstevel@tonic-gate  * Socket state bits.
2630Sstevel@tonic-gate  */
2640Sstevel@tonic-gate #define	SS_ISCONNECTED		0x00000001 /* socket connected to a peer */
2650Sstevel@tonic-gate #define	SS_ISCONNECTING		0x00000002 /* in process, connecting to peer */
2660Sstevel@tonic-gate #define	SS_ISDISCONNECTING	0x00000004 /* in process of disconnecting */
2670Sstevel@tonic-gate #define	SS_CANTSENDMORE		0x00000008 /* can't send more data to peer */
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate #define	SS_CANTRCVMORE		0x00000010 /* can't receive more data */
2700Sstevel@tonic-gate #define	SS_ISBOUND		0x00000020 /* socket is bound */
2710Sstevel@tonic-gate #define	SS_NDELAY		0x00000040 /* FNDELAY non-blocking */
2720Sstevel@tonic-gate #define	SS_NONBLOCK		0x00000080 /* O_NONBLOCK non-blocking */
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate #define	SS_ASYNC		0x00000100 /* async i/o notify */
2750Sstevel@tonic-gate #define	SS_ACCEPTCONN		0x00000200 /* listen done */
2768348SEric.Yu@Sun.COM /*	unused			0x00000400 */	/* was SS_HASCONNIND */
2770Sstevel@tonic-gate #define	SS_SAVEDEOR		0x00000800 /* Saved MSG_EOR rcv side state */
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate #define	SS_RCVATMARK		0x00001000 /* at mark on input */
2800Sstevel@tonic-gate #define	SS_OOBPEND		0x00002000 /* OOB pending or present - poll */
2810Sstevel@tonic-gate #define	SS_HAVEOOBDATA		0x00004000 /* OOB data present */
2820Sstevel@tonic-gate #define	SS_HADOOBDATA		0x00008000 /* OOB data consumed */
2838348SEric.Yu@Sun.COM #define	SS_CLOSING		0x00010000 /* in process of closing */
2840Sstevel@tonic-gate 
2858348SEric.Yu@Sun.COM /*	unused			0x00020000 */	/* was SS_FADDR_NOXLATE */
2868348SEric.Yu@Sun.COM /*	unused			0x00040000 */	/* was SS_HASDATA */
2878348SEric.Yu@Sun.COM /*	unused 			0x00080000 */	/* was SS_DONEREAD */
2888348SEric.Yu@Sun.COM /*	unused 			0x00100000 */	/* was SS_MOREDATA */
2898348SEric.Yu@Sun.COM /*	unused 			0x00200000 */	/* was SS_DIRECT */
2900Sstevel@tonic-gate 
2916707Sbrutus #define	SS_SODIRECT		0x00400000 /* transport supports sodirect */
2920Sstevel@tonic-gate 
2938348SEric.Yu@Sun.COM /*	unused			0x01000000 */	/* was SS_LADDR_VALID */
2948348SEric.Yu@Sun.COM /*	unused			0x02000000 */	/* was SS_FADDR_VALID */
2958348SEric.Yu@Sun.COM 
2968348SEric.Yu@Sun.COM #define	SS_SENTLASTREADSIG	0x10000000 /* last rx signal has been sent */
2978348SEric.Yu@Sun.COM #define	SS_SENTLASTWRITESIG	0x20000000 /* last tx signal has been sent */
2988348SEric.Yu@Sun.COM 
2998348SEric.Yu@Sun.COM #define	SS_FALLBACK_PENDING	0x40000000
3008348SEric.Yu@Sun.COM #define	SS_FALLBACK_COMP	0x80000000
3018348SEric.Yu@Sun.COM 
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate /* Set of states when the socket can't be rebound */
3040Sstevel@tonic-gate #define	SS_CANTREBIND	(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
3050Sstevel@tonic-gate 			    SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate /*
3088348SEric.Yu@Sun.COM  * Sockets that can fall back to TPI must ensure that fall back is not
3098348SEric.Yu@Sun.COM  * initiated while a thread is using a socket.
3108348SEric.Yu@Sun.COM  */
3118348SEric.Yu@Sun.COM #define	SO_BLOCK_FALLBACK(so, fn) {			\
3128348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&(so)->so_lock));		\
3138348SEric.Yu@Sun.COM 	rw_enter(&(so)->so_fallback_rwlock, RW_READER);	\
3148348SEric.Yu@Sun.COM 	if ((so)->so_state & SS_FALLBACK_COMP) {	\
3158348SEric.Yu@Sun.COM 		rw_exit(&(so)->so_fallback_rwlock);	\
3168348SEric.Yu@Sun.COM 		return (fn);				\
3178348SEric.Yu@Sun.COM 	}						\
3188348SEric.Yu@Sun.COM }
3198348SEric.Yu@Sun.COM 
3208348SEric.Yu@Sun.COM #define	SO_UNBLOCK_FALLBACK(so)	{			\
3218348SEric.Yu@Sun.COM 	rw_exit(&(so)->so_fallback_rwlock);		\
3228348SEric.Yu@Sun.COM }
3238348SEric.Yu@Sun.COM 
3248348SEric.Yu@Sun.COM /* Poll events */
3258348SEric.Yu@Sun.COM #define	SO_POLLEV_IN		0x1	/* POLLIN wakeup needed */
3268348SEric.Yu@Sun.COM #define	SO_POLLEV_ALWAYS	0x2	/* wakeups */
3278348SEric.Yu@Sun.COM 
3288348SEric.Yu@Sun.COM /*
3290Sstevel@tonic-gate  * Characteristics of sockets. Not changed after the socket is created.
3300Sstevel@tonic-gate  */
3310Sstevel@tonic-gate #define	SM_PRIV			0x001	/* privileged for broadcast, raw... */
3320Sstevel@tonic-gate #define	SM_ATOMIC		0x002	/* atomic data transmission */
3330Sstevel@tonic-gate #define	SM_ADDR			0x004	/* addresses given with messages */
3340Sstevel@tonic-gate #define	SM_CONNREQUIRED		0x008	/* connection required by protocol */
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate #define	SM_FDPASSING		0x010	/* passes file descriptors */
3370Sstevel@tonic-gate #define	SM_EXDATA		0x020	/* Can handle T_EXDATA_REQ */
3380Sstevel@tonic-gate #define	SM_OPTDATA		0x040	/* Can handle T_OPTDATA_REQ */
3390Sstevel@tonic-gate #define	SM_BYTESTREAM		0x080	/* Byte stream - can use M_DATA */
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate #define	SM_ACCEPTOR_ID		0x100	/* so_acceptor_id is valid */
3420Sstevel@tonic-gate 
3438348SEric.Yu@Sun.COM #define	SM_KERNEL		0x200	/* kernel socket */
3448348SEric.Yu@Sun.COM 
345*8401SAnders.Persson@Sun.COM /* The modes below are only for non-streams sockets */
3468348SEric.Yu@Sun.COM #define	SM_ACCEPTSUPP		0x400	/* can handle accept() */
347*8401SAnders.Persson@Sun.COM #define	SM_SENDFILESUPP		0x800	/* Private: proto supp sendfile  */
3488348SEric.Yu@Sun.COM 
3490Sstevel@tonic-gate /*
3500Sstevel@tonic-gate  * Socket versions. Used by the socket library when calling _so_socket().
3510Sstevel@tonic-gate  */
3520Sstevel@tonic-gate #define	SOV_STREAM	0	/* Not a socket - just a stream */
3530Sstevel@tonic-gate #define	SOV_DEFAULT	1	/* Select based on so_default_version */
3540Sstevel@tonic-gate #define	SOV_SOCKSTREAM	2	/* Socket plus streams operations */
3550Sstevel@tonic-gate #define	SOV_SOCKBSD	3	/* Socket with no streams operations */
3560Sstevel@tonic-gate #define	SOV_XPG4_2	4	/* Xnet socket */
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate #if defined(_KERNEL) || defined(_KMEMUSER)
3598348SEric.Yu@Sun.COM 
3600Sstevel@tonic-gate /*
3618348SEric.Yu@Sun.COM  * sonode create and destroy functions.
3628348SEric.Yu@Sun.COM  */
3638348SEric.Yu@Sun.COM typedef struct sonode *(*so_create_func_t)(struct sockparams *,
3648348SEric.Yu@Sun.COM     int, int, int, int, int, int *, cred_t *);
3658348SEric.Yu@Sun.COM typedef void (*so_destroy_func_t)(struct sonode *);
3668348SEric.Yu@Sun.COM 
3678348SEric.Yu@Sun.COM /* STREAM device information */
3688348SEric.Yu@Sun.COM typedef struct sdev_info {
3698348SEric.Yu@Sun.COM 	char	*sd_devpath;
3708348SEric.Yu@Sun.COM 	int	sd_devpathlen; /* Is 0 if sp_devpath is a static string */
3718348SEric.Yu@Sun.COM 	vnode_t	*sd_vnode;
3728348SEric.Yu@Sun.COM } sdev_info_t;
3738348SEric.Yu@Sun.COM 
3748348SEric.Yu@Sun.COM #define	SOCKMOD_VERSION		1
3758348SEric.Yu@Sun.COM /* name of the TPI pseudo socket module */
3768348SEric.Yu@Sun.COM #define	SOTPI_SMOD_NAME		"socktpi"
3778348SEric.Yu@Sun.COM 
3788348SEric.Yu@Sun.COM typedef struct __smod_priv_s {
3798348SEric.Yu@Sun.COM 	so_create_func_t	smodp_sock_create_func;
3808348SEric.Yu@Sun.COM 	so_destroy_func_t	smodp_sock_destroy_func;
3818348SEric.Yu@Sun.COM 	so_proto_fallback_func_t smodp_proto_fallback_func;
3828348SEric.Yu@Sun.COM } __smod_priv_t;
3838348SEric.Yu@Sun.COM 
3848348SEric.Yu@Sun.COM /*
3858348SEric.Yu@Sun.COM  * Socket module register information
3868348SEric.Yu@Sun.COM  */
3878348SEric.Yu@Sun.COM typedef struct smod_reg_s {
3888348SEric.Yu@Sun.COM 	int		smod_version;
3898348SEric.Yu@Sun.COM 	char		*smod_name;
3908348SEric.Yu@Sun.COM 	size_t		smod_uc_version;
3918348SEric.Yu@Sun.COM 	size_t		smod_dc_version;
3928348SEric.Yu@Sun.COM 	so_proto_create_func_t	smod_proto_create_func;
3938348SEric.Yu@Sun.COM 
3948348SEric.Yu@Sun.COM 	/* __smod_priv_data must be NULL */
3958348SEric.Yu@Sun.COM 	__smod_priv_t	*__smod_priv;
3968348SEric.Yu@Sun.COM } smod_reg_t;
3978348SEric.Yu@Sun.COM 
3988348SEric.Yu@Sun.COM /*
3998348SEric.Yu@Sun.COM  * Socket module information
4008348SEric.Yu@Sun.COM  */
4018348SEric.Yu@Sun.COM typedef struct smod_info {
4028348SEric.Yu@Sun.COM 	int		smod_version;
4038348SEric.Yu@Sun.COM 	char		*smod_name;
4048348SEric.Yu@Sun.COM 	uint_t		smod_refcnt;		/* # of entries */
4058348SEric.Yu@Sun.COM 	size_t		smod_uc_version; 	/* upcall version */
4068348SEric.Yu@Sun.COM 	size_t		smod_dc_version;	/* down call version */
4078348SEric.Yu@Sun.COM 	so_proto_create_func_t	smod_proto_create_func;
4088348SEric.Yu@Sun.COM 	so_proto_fallback_func_t smod_proto_fallback_func;
4098348SEric.Yu@Sun.COM 	so_create_func_t	smod_sock_create_func;
4108348SEric.Yu@Sun.COM 	so_destroy_func_t	smod_sock_destroy_func;
4118348SEric.Yu@Sun.COM 	list_node_t	smod_node;
4128348SEric.Yu@Sun.COM } smod_info_t;
4138348SEric.Yu@Sun.COM 
4148348SEric.Yu@Sun.COM /*
4158348SEric.Yu@Sun.COM  * sockparams
4168348SEric.Yu@Sun.COM  *
4178348SEric.Yu@Sun.COM  * Used for mapping family/type/protocol to module
4180Sstevel@tonic-gate  */
4190Sstevel@tonic-gate struct sockparams {
4208348SEric.Yu@Sun.COM 	/*
4218348SEric.Yu@Sun.COM 	 * The family, type, protocol, sdev_info and smod_info are
4228348SEric.Yu@Sun.COM 	 * set when the entry is created, and they will never change
4238348SEric.Yu@Sun.COM 	 * thereafter.
4248348SEric.Yu@Sun.COM 	 */
4258348SEric.Yu@Sun.COM 	int		sp_family;
4268348SEric.Yu@Sun.COM 	int		sp_type;
4278348SEric.Yu@Sun.COM 	int		sp_protocol;
4288348SEric.Yu@Sun.COM 
4298348SEric.Yu@Sun.COM 	sdev_info_t	sp_sdev_info;	/* STREAM device */
4308348SEric.Yu@Sun.COM 	char		*sp_smod_name;	/* socket module name */
4318348SEric.Yu@Sun.COM 	smod_info_t	*sp_smod_info;	/* socket module */
4328348SEric.Yu@Sun.COM 
4338348SEric.Yu@Sun.COM 	kmutex_t	sp_lock;	/* lock for refcnt */
4348348SEric.Yu@Sun.COM 	uint64_t	sp_refcnt;	/* entry reference count */
4358348SEric.Yu@Sun.COM 
4368348SEric.Yu@Sun.COM 	/*
4378348SEric.Yu@Sun.COM 	 * The entries below are only modified while holding
4388348SEric.Yu@Sun.COM 	 * splist_lock as a writer.
4398348SEric.Yu@Sun.COM 	 */
4408348SEric.Yu@Sun.COM 	int		sp_flags;	/* see below */
4418348SEric.Yu@Sun.COM 	list_node_t	sp_node;
4420Sstevel@tonic-gate };
4430Sstevel@tonic-gate 
4448348SEric.Yu@Sun.COM 
4458348SEric.Yu@Sun.COM /*
4468348SEric.Yu@Sun.COM  * sockparams flags
4478348SEric.Yu@Sun.COM  */
4488348SEric.Yu@Sun.COM #define	SOCKPARAMS_EPHEMERAL	0x1	/* temp. entry, not on global list */
4498348SEric.Yu@Sun.COM 
4508348SEric.Yu@Sun.COM extern void sockparams_init(void);
4518348SEric.Yu@Sun.COM extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int,
4528348SEric.Yu@Sun.COM     const char *, int, int *);
4538348SEric.Yu@Sun.COM extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int,
4548348SEric.Yu@Sun.COM     const char *, int, int *);
4558348SEric.Yu@Sun.COM extern void sockparams_ephemeral_drop_last_ref(struct sockparams *);
4568348SEric.Yu@Sun.COM 
4578348SEric.Yu@Sun.COM extern void smod_init(void);
4588348SEric.Yu@Sun.COM extern void smod_add(smod_info_t *);
4598348SEric.Yu@Sun.COM extern int smod_register(const smod_reg_t *);
4608348SEric.Yu@Sun.COM extern int smod_unregister(const char *);
4618348SEric.Yu@Sun.COM extern smod_info_t *smod_lookup_byname(const char *);
4628348SEric.Yu@Sun.COM 
4638348SEric.Yu@Sun.COM #define	SOCKPARAMS_HAS_DEVICE(sp)					\
4648348SEric.Yu@Sun.COM 	((sp)->sp_sdev_info.sd_devpath != NULL)
4658348SEric.Yu@Sun.COM 
4668348SEric.Yu@Sun.COM /* Increase the smod_info_t reference count */
4678348SEric.Yu@Sun.COM #define	SMOD_INC_REF(smodp) {						\
4688348SEric.Yu@Sun.COM 	ASSERT((smodp) != NULL);					\
4698348SEric.Yu@Sun.COM 	DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp));	\
4708348SEric.Yu@Sun.COM 	atomic_inc_uint(&(smodp)->smod_refcnt);				\
4718348SEric.Yu@Sun.COM }
4728348SEric.Yu@Sun.COM 
4738348SEric.Yu@Sun.COM /*
4748348SEric.Yu@Sun.COM  * Decreace the socket module entry reference count.
4758348SEric.Yu@Sun.COM  * When no one mapping to the entry, we try to unload the module from the
4768348SEric.Yu@Sun.COM  * kernel. If the module can't unload, just leave the module entry with
4778348SEric.Yu@Sun.COM  * a zero refcnt.
4788348SEric.Yu@Sun.COM  */
4798348SEric.Yu@Sun.COM #define	SMOD_DEC_REF(sp, smodp) {					\
4808348SEric.Yu@Sun.COM 	ASSERT((smodp) != NULL);					\
4818348SEric.Yu@Sun.COM 	ASSERT((smodp)->smod_refcnt != 0);				\
4828348SEric.Yu@Sun.COM 	atomic_dec_uint(&(smodp)->smod_refcnt);				\
4838348SEric.Yu@Sun.COM 	/*								\
4848348SEric.Yu@Sun.COM 	 * No need to atomically check the return value because the	\
4858348SEric.Yu@Sun.COM 	 * socket module framework will verify that no one is using	\
4868348SEric.Yu@Sun.COM 	 * the module before unloading. Worst thing that can happen	\
4878348SEric.Yu@Sun.COM 	 * here is multiple calls to mod_remove_by_name(), which is OK.	\
4888348SEric.Yu@Sun.COM 	 */								\
4898348SEric.Yu@Sun.COM 	if ((smodp)->smod_refcnt == 0)					\
4908348SEric.Yu@Sun.COM 		(void) mod_remove_by_name((sp)->sp_smod_name);		\
4918348SEric.Yu@Sun.COM }
4928348SEric.Yu@Sun.COM 
4938348SEric.Yu@Sun.COM /* Increase the reference count */
4948348SEric.Yu@Sun.COM #define	SOCKPARAMS_INC_REF(sp) {					\
4958348SEric.Yu@Sun.COM 	ASSERT((sp) != NULL);						\
4968348SEric.Yu@Sun.COM 	DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp));	\
4978348SEric.Yu@Sun.COM 	mutex_enter(&(sp)->sp_lock);					\
4988348SEric.Yu@Sun.COM 	(sp)->sp_refcnt++;						\
4998348SEric.Yu@Sun.COM 	ASSERT((sp)->sp_refcnt != 0);					\
5008348SEric.Yu@Sun.COM 	mutex_exit(&(sp)->sp_lock);					\
5018348SEric.Yu@Sun.COM }
5028348SEric.Yu@Sun.COM 
5038348SEric.Yu@Sun.COM /*
5048348SEric.Yu@Sun.COM  * Decrease the reference count.
5058348SEric.Yu@Sun.COM  *
5068348SEric.Yu@Sun.COM  * If the sockparams is ephemeral, then the thread dropping the last ref
5078348SEric.Yu@Sun.COM  * count will destroy the entry.
5088348SEric.Yu@Sun.COM  */
5098348SEric.Yu@Sun.COM #define	SOCKPARAMS_DEC_REF(sp) {					\
5108348SEric.Yu@Sun.COM 	ASSERT((sp) != NULL);						\
5118348SEric.Yu@Sun.COM 	DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp));	\
5128348SEric.Yu@Sun.COM 	mutex_enter(&(sp)->sp_lock);					\
5138348SEric.Yu@Sun.COM 	ASSERT((sp)->sp_refcnt > 0);					\
5148348SEric.Yu@Sun.COM 	if ((sp)->sp_refcnt == 1) {					\
5158348SEric.Yu@Sun.COM 		if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) {		\
5168348SEric.Yu@Sun.COM 			mutex_exit(&(sp)->sp_lock);			\
5178348SEric.Yu@Sun.COM 			sockparams_ephemeral_drop_last_ref((sp));	\
5188348SEric.Yu@Sun.COM 		} else {						\
5198348SEric.Yu@Sun.COM 			(sp)->sp_refcnt--;				\
5208348SEric.Yu@Sun.COM 			if ((sp)->sp_smod_info != NULL)			\
5218348SEric.Yu@Sun.COM 				SMOD_DEC_REF(sp, (sp)->sp_smod_info);	\
5228348SEric.Yu@Sun.COM 			(sp)->sp_smod_info = NULL;			\
5238348SEric.Yu@Sun.COM 			mutex_exit(&(sp)->sp_lock);			\
5248348SEric.Yu@Sun.COM 		}							\
5258348SEric.Yu@Sun.COM 	} else {							\
5268348SEric.Yu@Sun.COM 		(sp)->sp_refcnt--;					\
5278348SEric.Yu@Sun.COM 		mutex_exit(&(sp)->sp_lock);				\
5288348SEric.Yu@Sun.COM 	}								\
5298348SEric.Yu@Sun.COM }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate  * Used to traverse the list of AF_UNIX sockets to construct the kstat
5330Sstevel@tonic-gate  * for netstat(1m).
5340Sstevel@tonic-gate  */
5350Sstevel@tonic-gate struct socklist {
5360Sstevel@tonic-gate 	kmutex_t	sl_lock;
5370Sstevel@tonic-gate 	struct sonode	*sl_list;
5380Sstevel@tonic-gate };
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate extern struct socklist socklist;
5410Sstevel@tonic-gate /*
5420Sstevel@tonic-gate  * ss_full_waits is the number of times the reader thread
5430Sstevel@tonic-gate  * waits when the queue is full and ss_empty_waits is the number
5440Sstevel@tonic-gate  * of times the consumer thread waits when the queue is empty.
5450Sstevel@tonic-gate  * No locks for these as they are just indicators of whether
5460Sstevel@tonic-gate  * disk or network or both is slow or fast.
5470Sstevel@tonic-gate  */
5480Sstevel@tonic-gate struct sendfile_stats {
5490Sstevel@tonic-gate 	uint32_t ss_file_cached;
5500Sstevel@tonic-gate 	uint32_t ss_file_not_cached;
5510Sstevel@tonic-gate 	uint32_t ss_full_waits;
5520Sstevel@tonic-gate 	uint32_t ss_empty_waits;
5530Sstevel@tonic-gate 	uint32_t ss_file_segmap;
5540Sstevel@tonic-gate };
5550Sstevel@tonic-gate 
5560Sstevel@tonic-gate /*
5570Sstevel@tonic-gate  * A single sendfile request is represented by snf_req.
5580Sstevel@tonic-gate  */
5590Sstevel@tonic-gate typedef struct snf_req {
5600Sstevel@tonic-gate 	struct snf_req	*sr_next;
5610Sstevel@tonic-gate 	mblk_t		*sr_mp_head;
5620Sstevel@tonic-gate 	mblk_t		*sr_mp_tail;
5630Sstevel@tonic-gate 	kmutex_t	sr_lock;
5640Sstevel@tonic-gate 	kcondvar_t	sr_cv;
5650Sstevel@tonic-gate 	uint_t		sr_qlen;
5660Sstevel@tonic-gate 	int		sr_hiwat;
5670Sstevel@tonic-gate 	int		sr_lowat;
5680Sstevel@tonic-gate 	int		sr_operation;
5690Sstevel@tonic-gate 	struct vnode	*sr_vp;
5700Sstevel@tonic-gate 	file_t 		*sr_fp;
5710Sstevel@tonic-gate 	ssize_t		sr_maxpsz;
5720Sstevel@tonic-gate 	u_offset_t	sr_file_off;
5730Sstevel@tonic-gate 	u_offset_t	sr_file_size;
5740Sstevel@tonic-gate #define	SR_READ_DONE	0x80000000
5750Sstevel@tonic-gate 	int		sr_read_error;
5760Sstevel@tonic-gate 	int		sr_write_error;
5770Sstevel@tonic-gate } snf_req_t;
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate /* A queue of sendfile requests */
5800Sstevel@tonic-gate struct sendfile_queue {
5810Sstevel@tonic-gate 	snf_req_t	*snfq_req_head;
5820Sstevel@tonic-gate 	snf_req_t	*snfq_req_tail;
5830Sstevel@tonic-gate 	kmutex_t	snfq_lock;
5840Sstevel@tonic-gate 	kcondvar_t	snfq_cv;
5850Sstevel@tonic-gate 	int		snfq_svc_threads;	/* # of service threads */
5860Sstevel@tonic-gate 	int		snfq_idle_cnt;		/* # of idling threads */
5870Sstevel@tonic-gate 	int		snfq_max_threads;
5880Sstevel@tonic-gate 	int		snfq_req_cnt;		/* Number of requests */
5890Sstevel@tonic-gate };
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate #define	READ_OP			1
5920Sstevel@tonic-gate #define	SNFQ_TIMEOUT		(60 * 5 * hz)	/* 5 minutes */
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate /* Socket network operations switch */
5950Sstevel@tonic-gate struct sonodeops {
5968348SEric.Yu@Sun.COM 	int 	(*sop_init)(struct sonode *, struct sonode *, cred_t *,
5970Sstevel@tonic-gate 		    int);
5988348SEric.Yu@Sun.COM 	int	(*sop_accept)(struct sonode *, int, cred_t *, struct sonode **);
5998348SEric.Yu@Sun.COM 	int	(*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
6008348SEric.Yu@Sun.COM 		    int, cred_t *);
6018348SEric.Yu@Sun.COM 	int	(*sop_listen)(struct sonode *, int, cred_t *);
6020Sstevel@tonic-gate 	int	(*sop_connect)(struct sonode *, const struct sockaddr *,
6038348SEric.Yu@Sun.COM 		    socklen_t, int, int, cred_t *);
6040Sstevel@tonic-gate 	int	(*sop_recvmsg)(struct sonode *, struct msghdr *,
6058348SEric.Yu@Sun.COM 		    struct uio *, cred_t *);
6060Sstevel@tonic-gate 	int	(*sop_sendmsg)(struct sonode *, struct msghdr *,
6078348SEric.Yu@Sun.COM 		    struct uio *, cred_t *);
6088348SEric.Yu@Sun.COM 	int	(*sop_sendmblk)(struct sonode *, struct msghdr *, int,
6098348SEric.Yu@Sun.COM 		    cred_t *, mblk_t **);
6108348SEric.Yu@Sun.COM 	int	(*sop_getpeername)(struct sonode *, struct sockaddr *,
6118348SEric.Yu@Sun.COM 		    socklen_t *, boolean_t, cred_t *);
6128348SEric.Yu@Sun.COM 	int	(*sop_getsockname)(struct sonode *, struct sockaddr *,
6138348SEric.Yu@Sun.COM 		    socklen_t *, cred_t *);
6148348SEric.Yu@Sun.COM 	int	(*sop_shutdown)(struct sonode *, int, cred_t *);
6150Sstevel@tonic-gate 	int	(*sop_getsockopt)(struct sonode *, int, int, void *,
6168348SEric.Yu@Sun.COM 		    socklen_t *, int, cred_t *);
6170Sstevel@tonic-gate 	int 	(*sop_setsockopt)(struct sonode *, int, int, const void *,
6188348SEric.Yu@Sun.COM 		    socklen_t, cred_t *);
6198348SEric.Yu@Sun.COM 	int 	(*sop_ioctl)(struct sonode *, int, intptr_t, int,
6208348SEric.Yu@Sun.COM 		    cred_t *, int32_t *);
6218348SEric.Yu@Sun.COM 	int 	(*sop_poll)(struct sonode *, short, int, short *,
6228348SEric.Yu@Sun.COM 		    struct pollhead **);
6238348SEric.Yu@Sun.COM 	int 	(*sop_close)(struct sonode *, int, cred_t *);
6240Sstevel@tonic-gate };
6250Sstevel@tonic-gate 
6268348SEric.Yu@Sun.COM #define	SOP_INIT(so, flag, cr, flags)	\
6278348SEric.Yu@Sun.COM 	((so)->so_ops->sop_init((so), (flag), (cr), (flags)))
6288348SEric.Yu@Sun.COM #define	SOP_ACCEPT(so, fflag, cr, nsop)	\
6298348SEric.Yu@Sun.COM 	((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop)))
6308348SEric.Yu@Sun.COM #define	SOP_BIND(so, name, namelen, flags, cr)	\
6318348SEric.Yu@Sun.COM 	((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr)))
6328348SEric.Yu@Sun.COM #define	SOP_LISTEN(so, backlog, cr)	\
6338348SEric.Yu@Sun.COM 	((so)->so_ops->sop_listen((so), (backlog), (cr)))
6348348SEric.Yu@Sun.COM #define	SOP_CONNECT(so, name, namelen, fflag, flags, cr)	\
6358348SEric.Yu@Sun.COM 	((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \
6368348SEric.Yu@Sun.COM 	(cr)))
6378348SEric.Yu@Sun.COM #define	SOP_RECVMSG(so, msg, uiop, cr)	\
6388348SEric.Yu@Sun.COM 	((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr)))
6398348SEric.Yu@Sun.COM #define	SOP_SENDMSG(so, msg, uiop, cr)	\
6408348SEric.Yu@Sun.COM 	((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr)))
6418348SEric.Yu@Sun.COM #define	SOP_SENDMBLK(so, msg, size, cr, mpp)	\
6428348SEric.Yu@Sun.COM 	((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp)))
6438348SEric.Yu@Sun.COM #define	SOP_GETPEERNAME(so, addr, addrlen, accept, cr)	\
6448348SEric.Yu@Sun.COM 	((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr)))
6458348SEric.Yu@Sun.COM #define	SOP_GETSOCKNAME(so, addr, addrlen, cr)	\
6468348SEric.Yu@Sun.COM 	((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr)))
6478348SEric.Yu@Sun.COM #define	SOP_SHUTDOWN(so, how, cr)	\
6488348SEric.Yu@Sun.COM 	((so)->so_ops->sop_shutdown((so), (how), (cr)))
6498348SEric.Yu@Sun.COM #define	SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \
6500Sstevel@tonic-gate 	((so)->so_ops->sop_getsockopt((so), (level), (optionname),	\
6518348SEric.Yu@Sun.COM 	    (optval), (optlenp), (flags), (cr)))
6528348SEric.Yu@Sun.COM #define	SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr)	\
6530Sstevel@tonic-gate 	((so)->so_ops->sop_setsockopt((so), (level), (optionname),	\
6548348SEric.Yu@Sun.COM 	    (optval), (optlen), (cr)))
6558348SEric.Yu@Sun.COM #define	SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)	\
6568348SEric.Yu@Sun.COM 	((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp)))
6578348SEric.Yu@Sun.COM #define	SOP_POLL(so, events, anyyet, reventsp, phpp) \
6588348SEric.Yu@Sun.COM 	((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp)))
6598348SEric.Yu@Sun.COM #define	SOP_CLOSE(so, flag, cr)	\
6608348SEric.Yu@Sun.COM 	((so)->so_ops->sop_close((so), (flag), (cr)))
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate #ifdef _KERNEL
6650Sstevel@tonic-gate 
6660Sstevel@tonic-gate #define	ISALIGNED_cmsghdr(addr) \
6670Sstevel@tonic-gate 		(((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0)
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate #define	ROUNDUP_cmsglen(len) \
6700Sstevel@tonic-gate 	(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
6710Sstevel@tonic-gate 
6728348SEric.Yu@Sun.COM #define	IS_NON_STREAM_SOCK(vp) \
6738348SEric.Yu@Sun.COM 	((vp)->v_type == VSOCK && (vp)->v_stream == NULL)
6740Sstevel@tonic-gate /*
6752712Snn35248  * Macros that operate on struct cmsghdr.
6762712Snn35248  * Used in parsing msg_control.
6772712Snn35248  * The CMSG_VALID macro does not assume that the last option buffer is padded.
6780Sstevel@tonic-gate  */
6790Sstevel@tonic-gate #define	CMSG_NEXT(cmsg)						\
6800Sstevel@tonic-gate 	(struct cmsghdr *)((uintptr_t)(cmsg) +			\
6810Sstevel@tonic-gate 	    ROUNDUP_cmsglen((cmsg)->cmsg_len))
6822712Snn35248 #define	CMSG_CONTENT(cmsg)	(&((cmsg)[1]))
6832712Snn35248 #define	CMSG_CONTENTLEN(cmsg)	((cmsg)->cmsg_len - sizeof (struct cmsghdr))
6842712Snn35248 #define	CMSG_VALID(cmsg, start, end)					\
6852712Snn35248 	(ISALIGNED_cmsghdr(cmsg) &&					\
6862712Snn35248 	((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&			\
6872712Snn35248 	((uintptr_t)(cmsg) < (uintptr_t)(end)) &&			\
6882712Snn35248 	((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) &&	\
6892712Snn35248 	((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate /*
6920Sstevel@tonic-gate  * Maximum size of any argument that is copied in (addresses, options,
6930Sstevel@tonic-gate  * access rights). MUST be at least MAXPATHLEN + 3.
6940Sstevel@tonic-gate  * BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
6950Sstevel@tonic-gate  */
6960Sstevel@tonic-gate #define	SO_MAXARGSIZE	8192
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate /*
6990Sstevel@tonic-gate  * Convert between vnode and sonode
7000Sstevel@tonic-gate  */
7010Sstevel@tonic-gate #define	VTOSO(vp)	((struct sonode *)((vp)->v_data))
7020Sstevel@tonic-gate #define	SOTOV(sp)	((sp)->so_vnode)
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate /*
7050Sstevel@tonic-gate  * Internal flags for sobind()
7060Sstevel@tonic-gate  */
7070Sstevel@tonic-gate #define	_SOBIND_REBIND		0x01	/* Bind to existing local address */
7080Sstevel@tonic-gate #define	_SOBIND_UNSPEC		0x02	/* Bind to unspecified address */
7090Sstevel@tonic-gate #define	_SOBIND_LOCK_HELD	0x04	/* so_excl_lock held by caller */
7100Sstevel@tonic-gate #define	_SOBIND_NOXLATE		0x08	/* No addr translation for AF_UNIX */
7110Sstevel@tonic-gate #define	_SOBIND_XPG4_2		0x10	/* xpg4.2 semantics */
7120Sstevel@tonic-gate #define	_SOBIND_SOCKBSD		0x20	/* BSD semantics */
7130Sstevel@tonic-gate #define	_SOBIND_LISTEN		0x40	/* Make into SS_ACCEPTCONN */
7140Sstevel@tonic-gate #define	_SOBIND_SOCKETPAIR	0x80	/* Internal flag for so_socketpair() */
7150Sstevel@tonic-gate 					/* to enable listen with backlog = 1 */
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate /*
7180Sstevel@tonic-gate  * Internal flags for sounbind()
7190Sstevel@tonic-gate  */
7200Sstevel@tonic-gate #define	_SOUNBIND_REBIND	0x01	/* Don't clear fields - will rebind */
7210Sstevel@tonic-gate 
7220Sstevel@tonic-gate /*
7230Sstevel@tonic-gate  * Internal flags for soconnect()
7240Sstevel@tonic-gate  */
7250Sstevel@tonic-gate #define	_SOCONNECT_NOXLATE	0x01	/* No addr translation for AF_UNIX */
7260Sstevel@tonic-gate #define	_SOCONNECT_DID_BIND	0x02	/* Unbind when connect fails */
7270Sstevel@tonic-gate #define	_SOCONNECT_XPG4_2	0x04	/* xpg4.2 semantics */
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate /*
7300Sstevel@tonic-gate  * Internal flags for sodisconnect()
7310Sstevel@tonic-gate  */
7320Sstevel@tonic-gate #define	_SODISCONNECT_LOCK_HELD	0x01	/* so_excl_lock held by caller */
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate /*
7350Sstevel@tonic-gate  * Internal flags for sotpi_getsockopt().
7360Sstevel@tonic-gate  */
7370Sstevel@tonic-gate #define	_SOGETSOCKOPT_XPG4_2	0x01	/* xpg4.2 semantics */
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate /*
7400Sstevel@tonic-gate  * Internal flags for soallocproto*()
7410Sstevel@tonic-gate  */
7420Sstevel@tonic-gate #define	_ALLOC_NOSLEEP		0	/* Don't sleep for memory */
7430Sstevel@tonic-gate #define	_ALLOC_INTR		1	/* Sleep until interrupt */
7440Sstevel@tonic-gate #define	_ALLOC_SLEEP		2	/* Sleep forever */
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate  * Internal structure for handling AF_UNIX file descriptor passing
7480Sstevel@tonic-gate  */
7490Sstevel@tonic-gate struct fdbuf {
7500Sstevel@tonic-gate 	int		fd_size;	/* In bytes, for kmem_free */
7510Sstevel@tonic-gate 	int		fd_numfd;	/* Number of elements below */
7520Sstevel@tonic-gate 	char		*fd_ebuf;	/* Extra buffer to free  */
7530Sstevel@tonic-gate 	int		fd_ebuflen;
7540Sstevel@tonic-gate 	frtn_t		fd_frtn;
7550Sstevel@tonic-gate 	struct file	*fd_fds[1];	/* One or more */
7560Sstevel@tonic-gate };
7570Sstevel@tonic-gate #define	FDBUF_HDRSIZE	(sizeof (struct fdbuf) - sizeof (struct file *))
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate /*
7600Sstevel@tonic-gate  * Variable that can be patched to set what version of socket socket()
7610Sstevel@tonic-gate  * will create.
7620Sstevel@tonic-gate  */
7630Sstevel@tonic-gate extern int so_default_version;
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate #ifdef DEBUG
7660Sstevel@tonic-gate /* Turn on extra testing capabilities */
7670Sstevel@tonic-gate #define	SOCK_TEST
7680Sstevel@tonic-gate #endif /* DEBUG */
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate #ifdef DEBUG
7710Sstevel@tonic-gate char	*pr_state(uint_t, uint_t);
7720Sstevel@tonic-gate char	*pr_addr(int, struct sockaddr *, t_uscalar_t);
7730Sstevel@tonic-gate int	so_verify_oobstate(struct sonode *);
7740Sstevel@tonic-gate #endif /* DEBUG */
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate /*
7770Sstevel@tonic-gate  * DEBUG macros
7780Sstevel@tonic-gate  */
7797632SNick.Todd@Sun.COM #if defined(DEBUG)
7800Sstevel@tonic-gate #define	SOCK_DEBUG
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate extern int sockdebug;
7830Sstevel@tonic-gate extern int sockprinterr;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate #define	eprint(args)	printf args
7860Sstevel@tonic-gate #define	eprintso(so, args) \
7870Sstevel@tonic-gate { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; }
7880Sstevel@tonic-gate #define	eprintline(error)					\
7890Sstevel@tonic-gate {								\
7900Sstevel@tonic-gate 	if (error != EINTR && (sockprinterr || sockdebug > 0))	\
7910Sstevel@tonic-gate 		printf("socket error %d: line %d file %s\n",	\
7920Sstevel@tonic-gate 			(error), __LINE__, __FILE__);		\
7930Sstevel@tonic-gate }
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate #define	eprintsoline(so, error)					\
7960Sstevel@tonic-gate { if (sockprinterr && ((so)->so_options & SO_DEBUG))		\
7970Sstevel@tonic-gate 	printf("socket(%p) error %d: line %d file %s\n",	\
7987632SNick.Todd@Sun.COM 		(void *)(so), (error), __LINE__, __FILE__);	\
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate #define	dprint(level, args)	{ if (sockdebug > (level)) printf args; }
8010Sstevel@tonic-gate #define	dprintso(so, level, args) \
8020Sstevel@tonic-gate { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; }
8030Sstevel@tonic-gate 
8047632SNick.Todd@Sun.COM #else /* define(DEBUG) */
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate #define	eprint(args)		{}
8070Sstevel@tonic-gate #define	eprintso(so, args)	{}
8080Sstevel@tonic-gate #define	eprintline(error)	{}
8090Sstevel@tonic-gate #define	eprintsoline(so, error)	{}
8100Sstevel@tonic-gate #define	dprint(level, args)	{}
8110Sstevel@tonic-gate #define	dprintso(so, level, args) {}
8120Sstevel@tonic-gate 
8137632SNick.Todd@Sun.COM #endif /* defined(DEBUG) */
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate extern struct vfsops			sock_vfsops;
8168348SEric.Yu@Sun.COM extern struct vnodeops			*socket_vnodeops;
8178348SEric.Yu@Sun.COM extern const struct fs_operation_def	socket_vnodeops_template[];
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate extern dev_t				sockdev;
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate /*
8220Sstevel@tonic-gate  * sockfs functions
8230Sstevel@tonic-gate  */
8240Sstevel@tonic-gate extern int	sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
8250Sstevel@tonic-gate 			uchar_t *, int *, int, rval_t *);
8260Sstevel@tonic-gate extern int	sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
8270Sstevel@tonic-gate 			uchar_t, int, int);
8288348SEric.Yu@Sun.COM extern int	sogetvp(char *, vnode_t **, int);
8290Sstevel@tonic-gate extern int	sockinit(int, char *);
8308348SEric.Yu@Sun.COM extern int	soconfig(int, int, int,	char *, int, char *);
8318348SEric.Yu@Sun.COM extern int	solookup(int, int, int, struct sockparams **);
8320Sstevel@tonic-gate extern void	so_lock_single(struct sonode *);
8330Sstevel@tonic-gate extern void	so_unlock_single(struct sonode *, int);
8340Sstevel@tonic-gate extern int	so_lock_read(struct sonode *, int);
8350Sstevel@tonic-gate extern int	so_lock_read_intr(struct sonode *, int);
8360Sstevel@tonic-gate extern void	so_unlock_read(struct sonode *);
8370Sstevel@tonic-gate extern void	*sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
8380Sstevel@tonic-gate extern void	so_getopt_srcaddr(void *, t_uscalar_t,
8390Sstevel@tonic-gate 			void **, t_uscalar_t *);
8400Sstevel@tonic-gate extern int	so_getopt_unix_close(void *, t_uscalar_t);
8410Sstevel@tonic-gate extern void	fdbuf_free(struct fdbuf *);
8420Sstevel@tonic-gate extern mblk_t	*fdbuf_allocmsg(int, struct fdbuf *);
8430Sstevel@tonic-gate extern int	fdbuf_create(void *, int, struct fdbuf **);
8440Sstevel@tonic-gate extern void	so_closefds(void *, t_uscalar_t, int, int);
8450Sstevel@tonic-gate extern int	so_getfdopt(void *, t_uscalar_t, int, void **, int *);
8460Sstevel@tonic-gate t_uscalar_t	so_optlen(void *, t_uscalar_t, int);
8470Sstevel@tonic-gate extern void	so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *);
8480Sstevel@tonic-gate extern t_uscalar_t
8490Sstevel@tonic-gate 		so_cmsglen(mblk_t *, void *, t_uscalar_t, int);
8500Sstevel@tonic-gate extern int	so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int,
8510Sstevel@tonic-gate 			void *, t_uscalar_t);
8520Sstevel@tonic-gate extern void	soisconnecting(struct sonode *);
8530Sstevel@tonic-gate extern void	soisconnected(struct sonode *);
8540Sstevel@tonic-gate extern void	soisdisconnected(struct sonode *, int);
8550Sstevel@tonic-gate extern void	socantsendmore(struct sonode *);
8560Sstevel@tonic-gate extern void	socantrcvmore(struct sonode *);
8570Sstevel@tonic-gate extern void	soseterror(struct sonode *, int);
8588348SEric.Yu@Sun.COM extern int	sogeterr(struct sonode *, boolean_t);
8590Sstevel@tonic-gate extern int	sowaitconnected(struct sonode *, int, int);
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate extern ssize_t	soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
8620Sstevel@tonic-gate extern void	*sock_kstat_init(zoneid_t);
8630Sstevel@tonic-gate extern void	sock_kstat_fini(zoneid_t, void *);
8645227Stz204579 extern struct sonode *getsonode(int, int *, file_t **);
8650Sstevel@tonic-gate /*
8665331Samw  * Function wrappers (mostly around the sonode switch) for
8670Sstevel@tonic-gate  * backward compatibility.
8680Sstevel@tonic-gate  */
8690Sstevel@tonic-gate extern int	soaccept(struct sonode *, int, struct sonode **);
8700Sstevel@tonic-gate extern int	sobind(struct sonode *, struct sockaddr *, socklen_t,
8710Sstevel@tonic-gate 		    int, int);
8720Sstevel@tonic-gate extern int	solisten(struct sonode *, int);
8730Sstevel@tonic-gate extern int	soconnect(struct sonode *, const struct sockaddr *, socklen_t,
8740Sstevel@tonic-gate 		    int, int);
8750Sstevel@tonic-gate extern int	sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
8760Sstevel@tonic-gate extern int	sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
8770Sstevel@tonic-gate extern int	soshutdown(struct sonode *, int);
8780Sstevel@tonic-gate extern int	sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
8790Sstevel@tonic-gate 		    int);
8800Sstevel@tonic-gate extern int	sosetsockopt(struct sonode *, int, int, const void *,
8810Sstevel@tonic-gate 		    t_uscalar_t);
8820Sstevel@tonic-gate 
8838348SEric.Yu@Sun.COM extern struct sonode	*socreate(struct sockparams *, int, int, int, int,
8848348SEric.Yu@Sun.COM 			    int *);
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate extern int	so_copyin(const void *, void *, size_t, int);
8870Sstevel@tonic-gate extern int	so_copyout(const void *, void *, size_t, int);
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate #endif
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate /*
8920Sstevel@tonic-gate  * Internal structure for obtaining sonode information from the socklist.
8930Sstevel@tonic-gate  * These types match those corresponding in the sonode structure.
8940Sstevel@tonic-gate  * This is not a published interface, and may change at any time.
8950Sstevel@tonic-gate  */
8960Sstevel@tonic-gate struct sockinfo {
8970Sstevel@tonic-gate 	uint_t		si_size;		/* real length of this struct */
8980Sstevel@tonic-gate 	short		si_family;
8990Sstevel@tonic-gate 	short		si_type;
9000Sstevel@tonic-gate 	ushort_t	si_flag;
9010Sstevel@tonic-gate 	uint_t		si_state;
9020Sstevel@tonic-gate 	uint_t		si_ux_laddr_sou_magic;
9030Sstevel@tonic-gate 	uint_t		si_ux_faddr_sou_magic;
9040Sstevel@tonic-gate 	t_scalar_t	si_serv_type;
9050Sstevel@tonic-gate 	t_uscalar_t	si_laddr_soa_len;
9060Sstevel@tonic-gate 	t_uscalar_t	si_faddr_soa_len;
9070Sstevel@tonic-gate 	uint16_t	si_laddr_family;
9080Sstevel@tonic-gate 	uint16_t	si_faddr_family;
9090Sstevel@tonic-gate 	char		si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
9100Sstevel@tonic-gate 	char		si_faddr_sun_path[MAXPATHLEN + 1];
9118348SEric.Yu@Sun.COM 	boolean_t	si_faddr_noxlate;
9120Sstevel@tonic-gate 	zoneid_t	si_szoneid;
9130Sstevel@tonic-gate };
9140Sstevel@tonic-gate 
9158348SEric.Yu@Sun.COM #define	SOCKMOD_PATH	"socketmod"	/* dir where sockmods are stored */
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate #ifdef	__cplusplus
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate #endif
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate #endif	/* _SYS_SOCKETVAR_H */
922