10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51548Srshoaib * Common Development and Distribution License (the "License"). 61548Srshoaib * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211548Srshoaib 220Sstevel@tonic-gate /* 236707Sbrutus * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* 310Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 320Sstevel@tonic-gate * The Regents of the University of California 330Sstevel@tonic-gate * All Rights Reserved 340Sstevel@tonic-gate * 350Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 360Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 370Sstevel@tonic-gate * contributors. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifndef _SYS_SOCKETVAR_H 410Sstevel@tonic-gate #define _SYS_SOCKETVAR_H 420Sstevel@tonic-gate 430Sstevel@tonic-gate #include <sys/types.h> 440Sstevel@tonic-gate #include <sys/stream.h> 450Sstevel@tonic-gate #include <sys/t_lock.h> 460Sstevel@tonic-gate #include <sys/cred.h> 470Sstevel@tonic-gate #include <sys/vnode.h> 480Sstevel@tonic-gate #include <sys/file.h> 490Sstevel@tonic-gate #include <sys/param.h> 500Sstevel@tonic-gate #include <sys/zone.h> 51*8348SEric.Yu@Sun.COM #include <sys/sdt.h> 52*8348SEric.Yu@Sun.COM #include <sys/modctl.h> 53*8348SEric.Yu@Sun.COM #include <sys/atomic.h> 54*8348SEric.Yu@Sun.COM #include <sys/socket.h> 55*8348SEric.Yu@Sun.COM #include <sys/ksocket.h> 566707Sbrutus #include <sys/sodirect.h> 570Sstevel@tonic-gate 580Sstevel@tonic-gate #ifdef __cplusplus 590Sstevel@tonic-gate extern "C" { 600Sstevel@tonic-gate #endif 610Sstevel@tonic-gate 620Sstevel@tonic-gate /* 630Sstevel@tonic-gate * Internal representation of the address used to represent addresses 640Sstevel@tonic-gate * in the loopback transport for AF_UNIX. While the sockaddr_un is used 650Sstevel@tonic-gate * as the sockfs layer address for AF_UNIX the pathnames contained in 660Sstevel@tonic-gate * these addresses are not unique (due to relative pathnames) thus can not 670Sstevel@tonic-gate * be used in the transport. 680Sstevel@tonic-gate * 690Sstevel@tonic-gate * The transport level address consists of a magic number (used to separate the 700Sstevel@tonic-gate * name space for specific and implicit binds). For a specific bind 710Sstevel@tonic-gate * this is followed by a "vnode *" which ensures that all specific binds 720Sstevel@tonic-gate * have a unique transport level address. For implicit binds the latter 730Sstevel@tonic-gate * part of the address is a byte string (of the same length as a pointer) 740Sstevel@tonic-gate * that is assigned by the loopback transport. 750Sstevel@tonic-gate * 760Sstevel@tonic-gate * The uniqueness assumes that the loopback transport has a separate namespace 770Sstevel@tonic-gate * for sockets in order to avoid name conflicts with e.g. TLI use of the 780Sstevel@tonic-gate * same transport. 790Sstevel@tonic-gate */ 800Sstevel@tonic-gate struct so_ux_addr { 810Sstevel@tonic-gate void *soua_vp; /* vnode pointer or assigned by tl */ 820Sstevel@tonic-gate uint_t soua_magic; /* See below */ 830Sstevel@tonic-gate }; 840Sstevel@tonic-gate 850Sstevel@tonic-gate #define SOU_MAGIC_EXPLICIT 0x75787670 /* "uxvp" */ 860Sstevel@tonic-gate #define SOU_MAGIC_IMPLICIT 0x616e6f6e /* "anon" */ 870Sstevel@tonic-gate 880Sstevel@tonic-gate struct sockaddr_ux { 890Sstevel@tonic-gate sa_family_t sou_family; /* AF_UNIX */ 900Sstevel@tonic-gate struct so_ux_addr sou_addr; 910Sstevel@tonic-gate }; 920Sstevel@tonic-gate 93*8348SEric.Yu@Sun.COM #if defined(_KERNEL) || defined(_KMEMUSER) 94*8348SEric.Yu@Sun.COM 95*8348SEric.Yu@Sun.COM #include <sys/socket_proto.h> 96*8348SEric.Yu@Sun.COM 970Sstevel@tonic-gate typedef struct sonodeops sonodeops_t; 98741Smasputra typedef struct sonode sonode_t; 990Sstevel@tonic-gate 1000Sstevel@tonic-gate /* 1010Sstevel@tonic-gate * The sonode represents a socket. A sonode never exist in the file system 1020Sstevel@tonic-gate * name space and can not be opened using open() - only the socket, socketpair 1030Sstevel@tonic-gate * and accept calls create sonodes. 1040Sstevel@tonic-gate * 105*8348SEric.Yu@Sun.COM * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and 106*8348SEric.Yu@Sun.COM * SOREADLOCKED flags in so_flag. The mutex protects all the state in the 107*8348SEric.Yu@Sun.COM * sonode. It is expected that the underlying transport protocol serializes 108*8348SEric.Yu@Sun.COM * socket operations, so sockfs will not normally not single-thread 109*8348SEric.Yu@Sun.COM * operations. However, certain sockets, including TPI based ones, can only 110*8348SEric.Yu@Sun.COM * handle one control operation at a time. The SOLOCKED flag is used to 111*8348SEric.Yu@Sun.COM * single-thread operations from sockfs users to prevent e.g. multiple bind() 112*8348SEric.Yu@Sun.COM * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is 113*8348SEric.Yu@Sun.COM * used to ensure that only one thread sleeps in kstrgetmsg for a given 114*8348SEric.Yu@Sun.COM * sonode. This is needed to ensure atomic operation for things like 115*8348SEric.Yu@Sun.COM * MSG_WAITALL. 1160Sstevel@tonic-gate * 117*8348SEric.Yu@Sun.COM * The so_fallback_rwlock is used to ensure that for sockets that can 118*8348SEric.Yu@Sun.COM * fall back to TPI, the fallback is not initiated until all pending 119*8348SEric.Yu@Sun.COM * operations have completed. 1200Sstevel@tonic-gate * 1210Sstevel@tonic-gate * Note that so_lock is sometimes held across calls that might go to sleep 1220Sstevel@tonic-gate * (kmem_alloc and soallocproto*). This implies that no other lock in 1230Sstevel@tonic-gate * the system should be held when calling into sockfs; from the system call 124*8348SEric.Yu@Sun.COM * side or from strrput (in case of TPI based sockets). If locks are held 125*8348SEric.Yu@Sun.COM * while calling into sockfs the system might hang when running low on memory. 1260Sstevel@tonic-gate */ 1270Sstevel@tonic-gate struct sonode { 1280Sstevel@tonic-gate struct vnode *so_vnode; /* vnode associated with this sonode */ 1290Sstevel@tonic-gate 130*8348SEric.Yu@Sun.COM sonodeops_t *so_ops; /* operations vector for this sonode */ 131*8348SEric.Yu@Sun.COM void *so_priv; /* sonode private data */ 1320Sstevel@tonic-gate 133*8348SEric.Yu@Sun.COM krwlock_t so_fallback_rwlock; 1340Sstevel@tonic-gate kmutex_t so_lock; /* protects sonode fields */ 135*8348SEric.Yu@Sun.COM 1360Sstevel@tonic-gate kcondvar_t so_state_cv; /* synchronize state changes */ 1370Sstevel@tonic-gate kcondvar_t so_want_cv; /* wait due to SOLOCKED */ 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate /* These fields are protected by so_lock */ 1400Sstevel@tonic-gate 141*8348SEric.Yu@Sun.COM uint_t so_state; /* internal state flags SS_*, below */ 142*8348SEric.Yu@Sun.COM uint_t so_mode; /* characteristics on socket. SM_* */ 143*8348SEric.Yu@Sun.COM ushort_t so_flag; /* flags, see below */ 144*8348SEric.Yu@Sun.COM int so_count; /* count of opened references */ 1450Sstevel@tonic-gate 146*8348SEric.Yu@Sun.COM sock_connid_t so_proto_connid; /* protocol generation number */ 1470Sstevel@tonic-gate 148*8348SEric.Yu@Sun.COM ushort_t so_error; /* error affecting connection */ 149*8348SEric.Yu@Sun.COM 150*8348SEric.Yu@Sun.COM struct sockparams *so_sockparams; /* vnode or socket module */ 1510Sstevel@tonic-gate /* Needed to recreate the same socket for accept */ 1520Sstevel@tonic-gate short so_family; 1530Sstevel@tonic-gate short so_type; 1540Sstevel@tonic-gate short so_protocol; 1550Sstevel@tonic-gate short so_version; /* From so_socket call */ 156*8348SEric.Yu@Sun.COM 157*8348SEric.Yu@Sun.COM /* Accept queue */ 158*8348SEric.Yu@Sun.COM kmutex_t so_acceptq_lock; /* protects accept queue */ 159*8348SEric.Yu@Sun.COM struct sonode *so_acceptq_next; /* acceptq list node */ 160*8348SEric.Yu@Sun.COM struct sonode *so_acceptq_head; 161*8348SEric.Yu@Sun.COM struct sonode **so_acceptq_tail; 162*8348SEric.Yu@Sun.COM unsigned int so_acceptq_len; 163*8348SEric.Yu@Sun.COM unsigned int so_backlog; /* Listen backlog */ 164*8348SEric.Yu@Sun.COM kcondvar_t so_acceptq_cv; /* wait for new conn. */ 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate /* Options */ 1670Sstevel@tonic-gate short so_options; /* From socket call, see socket.h */ 1680Sstevel@tonic-gate struct linger so_linger; /* SO_LINGER value */ 169*8348SEric.Yu@Sun.COM #define so_sndbuf so_proto_props.sopp_txhiwat /* SO_SNDBUF value */ 170*8348SEric.Yu@Sun.COM #define so_sndlowat so_proto_props.sopp_txlowat /* tx low water mark */ 171*8348SEric.Yu@Sun.COM #define so_rcvbuf so_proto_props.sopp_rxhiwat /* SO_RCVBUF value */ 172*8348SEric.Yu@Sun.COM #define so_rcvlowat so_proto_props.sopp_rxlowat /* rx low water mark */ 173*8348SEric.Yu@Sun.COM #define so_max_addr_len so_proto_props.sopp_maxaddrlen 174*8348SEric.Yu@Sun.COM #define so_minpsz so_proto_props.sopp_minpsz 175*8348SEric.Yu@Sun.COM #define so_maxpsz so_proto_props.sopp_maxpsz 1760Sstevel@tonic-gate 177*8348SEric.Yu@Sun.COM clock_t so_sndtimeo; /* send timeout */ 178*8348SEric.Yu@Sun.COM clock_t so_rcvtimeo; /* recv timeout */ 179*8348SEric.Yu@Sun.COM 1800Sstevel@tonic-gate mblk_t *so_oobmsg; /* outofline oob data */ 181*8348SEric.Yu@Sun.COM ssize_t so_oobmark; /* offset of the oob data */ 182*8348SEric.Yu@Sun.COM 1830Sstevel@tonic-gate pid_t so_pgrp; /* pgrp for signals */ 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate cred_t *so_peercred; /* connected socket peer cred */ 1860Sstevel@tonic-gate pid_t so_cpid; /* connected socket peer cached pid */ 1870Sstevel@tonic-gate zoneid_t so_zoneid; /* opener's zoneid */ 1880Sstevel@tonic-gate 189*8348SEric.Yu@Sun.COM struct pollhead so_poll_list; /* common pollhead */ 190*8348SEric.Yu@Sun.COM short so_pollev; /* events that should be generated */ 191*8348SEric.Yu@Sun.COM 192*8348SEric.Yu@Sun.COM /* Receive */ 193*8348SEric.Yu@Sun.COM unsigned int so_rcv_queued; 194*8348SEric.Yu@Sun.COM mblk_t *so_rcv_q_head; 195*8348SEric.Yu@Sun.COM mblk_t *so_rcv_q_last_head; 196*8348SEric.Yu@Sun.COM mblk_t *so_rcv_head; /* 1st mblk in the list */ 197*8348SEric.Yu@Sun.COM mblk_t *so_rcv_last_head; /* last mblk in b_next chain */ 198*8348SEric.Yu@Sun.COM kcondvar_t so_rcv_cv; 199*8348SEric.Yu@Sun.COM uint_t so_rcv_wanted; /* # of bytes wanted by app */ 200*8348SEric.Yu@Sun.COM timeout_id_t so_rcv_timer_tid; 201*8348SEric.Yu@Sun.COM 202*8348SEric.Yu@Sun.COM #define so_rcv_thresh so_proto_props.sopp_rcvthresh 203*8348SEric.Yu@Sun.COM #define so_rcv_timer_interval so_proto_props.sopp_rcvtimer 2040Sstevel@tonic-gate 205*8348SEric.Yu@Sun.COM /* Send */ 206*8348SEric.Yu@Sun.COM boolean_t so_snd_qfull; /* Transmit full */ 207*8348SEric.Yu@Sun.COM kcondvar_t so_snd_cv; 208*8348SEric.Yu@Sun.COM 209*8348SEric.Yu@Sun.COM boolean_t so_rcv_wakeup; 210*8348SEric.Yu@Sun.COM boolean_t so_snd_wakeup; 211898Skais 212*8348SEric.Yu@Sun.COM /* Communication channel with protocol */ 213*8348SEric.Yu@Sun.COM sock_lower_handle_t so_proto_handle; 214*8348SEric.Yu@Sun.COM sock_downcalls_t *so_downcalls; 215*8348SEric.Yu@Sun.COM 216*8348SEric.Yu@Sun.COM struct sock_proto_props so_proto_props; /* protocol settings */ 217*8348SEric.Yu@Sun.COM boolean_t so_flowctrld; /* Flow controlled */ 218*8348SEric.Yu@Sun.COM uint_t so_copyflag; /* Copy related flag */ 219*8348SEric.Yu@Sun.COM kcondvar_t so_copy_cv; /* Copy cond variable */ 220*8348SEric.Yu@Sun.COM 221*8348SEric.Yu@Sun.COM /* kernel sockets */ 222*8348SEric.Yu@Sun.COM ksocket_callbacks_t so_ksock_callbacks; 223*8348SEric.Yu@Sun.COM void *so_ksock_cb_arg; /* callback argument */ 224*8348SEric.Yu@Sun.COM kcondvar_t so_closing_cv; 2256707Sbrutus 2266707Sbrutus /* != NULL for sodirect_t enabled socket */ 227*8348SEric.Yu@Sun.COM sodirect_t *so_direct; 2280Sstevel@tonic-gate }; 2290Sstevel@tonic-gate 230*8348SEric.Yu@Sun.COM /* 231*8348SEric.Yu@Sun.COM * We do an initial check for events without holding locks. However, 232*8348SEric.Yu@Sun.COM * if there are no event available, then we redo the check for POLLIN 233*8348SEric.Yu@Sun.COM * events under the lock. 234*8348SEric.Yu@Sun.COM */ 235*8348SEric.Yu@Sun.COM #define SO_HAVE_DATA(so) \ 236*8348SEric.Yu@Sun.COM ((so)->so_rcv_timer_tid == 0 && (so->so_rcv_queued > 0)) || \ 237*8348SEric.Yu@Sun.COM ((so)->so_rcv_queued > (so)->so_rcv_thresh) || \ 238*8348SEric.Yu@Sun.COM ((so)->so_state & SS_CANTRCVMORE) 239*8348SEric.Yu@Sun.COM 240*8348SEric.Yu@Sun.COM /* 241*8348SEric.Yu@Sun.COM * Events handled by the protocol (in case sd_poll is set) 242*8348SEric.Yu@Sun.COM */ 243*8348SEric.Yu@Sun.COM #define SO_PROTO_POLLEV (POLLIN|POLLRDNORM|POLLRDBAND) 244*8348SEric.Yu@Sun.COM 245*8348SEric.Yu@Sun.COM 246*8348SEric.Yu@Sun.COM #endif /* _KERNEL || _KMEMUSER */ 247*8348SEric.Yu@Sun.COM 2480Sstevel@tonic-gate /* flags */ 2490Sstevel@tonic-gate #define SOMOD 0x0001 /* update socket modification time */ 2500Sstevel@tonic-gate #define SOACC 0x0002 /* update socket access time */ 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate #define SOLOCKED 0x0010 /* use to serialize open/closes */ 2530Sstevel@tonic-gate #define SOREADLOCKED 0x0020 /* serialize kstrgetmsg calls */ 2540Sstevel@tonic-gate #define SOWANT 0x0040 /* some process waiting on lock */ 2550Sstevel@tonic-gate #define SOCLONE 0x0080 /* child of clone driver */ 2560Sstevel@tonic-gate #define SOASYNC_UNBIND 0x0100 /* wait for ACK of async unbind */ 2570Sstevel@tonic-gate 258*8348SEric.Yu@Sun.COM #define SOCK_IS_NONSTR(so) ((so)->so_vnode->v_stream == NULL) 259*8348SEric.Yu@Sun.COM 2600Sstevel@tonic-gate /* 2610Sstevel@tonic-gate * Socket state bits. 2620Sstevel@tonic-gate */ 2630Sstevel@tonic-gate #define SS_ISCONNECTED 0x00000001 /* socket connected to a peer */ 2640Sstevel@tonic-gate #define SS_ISCONNECTING 0x00000002 /* in process, connecting to peer */ 2650Sstevel@tonic-gate #define SS_ISDISCONNECTING 0x00000004 /* in process of disconnecting */ 2660Sstevel@tonic-gate #define SS_CANTSENDMORE 0x00000008 /* can't send more data to peer */ 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate #define SS_CANTRCVMORE 0x00000010 /* can't receive more data */ 2690Sstevel@tonic-gate #define SS_ISBOUND 0x00000020 /* socket is bound */ 2700Sstevel@tonic-gate #define SS_NDELAY 0x00000040 /* FNDELAY non-blocking */ 2710Sstevel@tonic-gate #define SS_NONBLOCK 0x00000080 /* O_NONBLOCK non-blocking */ 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate #define SS_ASYNC 0x00000100 /* async i/o notify */ 2740Sstevel@tonic-gate #define SS_ACCEPTCONN 0x00000200 /* listen done */ 275*8348SEric.Yu@Sun.COM /* unused 0x00000400 */ /* was SS_HASCONNIND */ 2760Sstevel@tonic-gate #define SS_SAVEDEOR 0x00000800 /* Saved MSG_EOR rcv side state */ 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate #define SS_RCVATMARK 0x00001000 /* at mark on input */ 2790Sstevel@tonic-gate #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */ 2800Sstevel@tonic-gate #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */ 2810Sstevel@tonic-gate #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */ 282*8348SEric.Yu@Sun.COM #define SS_CLOSING 0x00010000 /* in process of closing */ 2830Sstevel@tonic-gate 284*8348SEric.Yu@Sun.COM /* unused 0x00020000 */ /* was SS_FADDR_NOXLATE */ 285*8348SEric.Yu@Sun.COM /* unused 0x00040000 */ /* was SS_HASDATA */ 286*8348SEric.Yu@Sun.COM /* unused 0x00080000 */ /* was SS_DONEREAD */ 287*8348SEric.Yu@Sun.COM /* unused 0x00100000 */ /* was SS_MOREDATA */ 288*8348SEric.Yu@Sun.COM /* unused 0x00200000 */ /* was SS_DIRECT */ 2890Sstevel@tonic-gate 2906707Sbrutus #define SS_SODIRECT 0x00400000 /* transport supports sodirect */ 2910Sstevel@tonic-gate 292*8348SEric.Yu@Sun.COM /* unused 0x01000000 */ /* was SS_LADDR_VALID */ 293*8348SEric.Yu@Sun.COM /* unused 0x02000000 */ /* was SS_FADDR_VALID */ 294*8348SEric.Yu@Sun.COM 295*8348SEric.Yu@Sun.COM #define SS_SENTLASTREADSIG 0x10000000 /* last rx signal has been sent */ 296*8348SEric.Yu@Sun.COM #define SS_SENTLASTWRITESIG 0x20000000 /* last tx signal has been sent */ 297*8348SEric.Yu@Sun.COM 298*8348SEric.Yu@Sun.COM #define SS_FALLBACK_PENDING 0x40000000 299*8348SEric.Yu@Sun.COM #define SS_FALLBACK_COMP 0x80000000 300*8348SEric.Yu@Sun.COM 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* Set of states when the socket can't be rebound */ 3030Sstevel@tonic-gate #define SS_CANTREBIND (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\ 3040Sstevel@tonic-gate SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN) 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate /* 307*8348SEric.Yu@Sun.COM * Sockets that can fall back to TPI must ensure that fall back is not 308*8348SEric.Yu@Sun.COM * initiated while a thread is using a socket. 309*8348SEric.Yu@Sun.COM */ 310*8348SEric.Yu@Sun.COM #define SO_BLOCK_FALLBACK(so, fn) { \ 311*8348SEric.Yu@Sun.COM ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ 312*8348SEric.Yu@Sun.COM rw_enter(&(so)->so_fallback_rwlock, RW_READER); \ 313*8348SEric.Yu@Sun.COM if ((so)->so_state & SS_FALLBACK_COMP) { \ 314*8348SEric.Yu@Sun.COM rw_exit(&(so)->so_fallback_rwlock); \ 315*8348SEric.Yu@Sun.COM return (fn); \ 316*8348SEric.Yu@Sun.COM } \ 317*8348SEric.Yu@Sun.COM } 318*8348SEric.Yu@Sun.COM 319*8348SEric.Yu@Sun.COM #define SO_UNBLOCK_FALLBACK(so) { \ 320*8348SEric.Yu@Sun.COM rw_exit(&(so)->so_fallback_rwlock); \ 321*8348SEric.Yu@Sun.COM } 322*8348SEric.Yu@Sun.COM 323*8348SEric.Yu@Sun.COM /* Poll events */ 324*8348SEric.Yu@Sun.COM #define SO_POLLEV_IN 0x1 /* POLLIN wakeup needed */ 325*8348SEric.Yu@Sun.COM #define SO_POLLEV_ALWAYS 0x2 /* wakeups */ 326*8348SEric.Yu@Sun.COM 327*8348SEric.Yu@Sun.COM /* 3280Sstevel@tonic-gate * Characteristics of sockets. Not changed after the socket is created. 3290Sstevel@tonic-gate */ 3300Sstevel@tonic-gate #define SM_PRIV 0x001 /* privileged for broadcast, raw... */ 3310Sstevel@tonic-gate #define SM_ATOMIC 0x002 /* atomic data transmission */ 3320Sstevel@tonic-gate #define SM_ADDR 0x004 /* addresses given with messages */ 3330Sstevel@tonic-gate #define SM_CONNREQUIRED 0x008 /* connection required by protocol */ 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate #define SM_FDPASSING 0x010 /* passes file descriptors */ 3360Sstevel@tonic-gate #define SM_EXDATA 0x020 /* Can handle T_EXDATA_REQ */ 3370Sstevel@tonic-gate #define SM_OPTDATA 0x040 /* Can handle T_OPTDATA_REQ */ 3380Sstevel@tonic-gate #define SM_BYTESTREAM 0x080 /* Byte stream - can use M_DATA */ 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate #define SM_ACCEPTOR_ID 0x100 /* so_acceptor_id is valid */ 3410Sstevel@tonic-gate 342*8348SEric.Yu@Sun.COM #define SM_KERNEL 0x200 /* kernel socket */ 343*8348SEric.Yu@Sun.COM 344*8348SEric.Yu@Sun.COM #define SM_ACCEPTSUPP 0x400 /* can handle accept() */ 345*8348SEric.Yu@Sun.COM 3460Sstevel@tonic-gate /* 3470Sstevel@tonic-gate * Socket versions. Used by the socket library when calling _so_socket(). 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate #define SOV_STREAM 0 /* Not a socket - just a stream */ 3500Sstevel@tonic-gate #define SOV_DEFAULT 1 /* Select based on so_default_version */ 3510Sstevel@tonic-gate #define SOV_SOCKSTREAM 2 /* Socket plus streams operations */ 3520Sstevel@tonic-gate #define SOV_SOCKBSD 3 /* Socket with no streams operations */ 3530Sstevel@tonic-gate #define SOV_XPG4_2 4 /* Xnet socket */ 3540Sstevel@tonic-gate 3550Sstevel@tonic-gate #if defined(_KERNEL) || defined(_KMEMUSER) 356*8348SEric.Yu@Sun.COM 3570Sstevel@tonic-gate /* 358*8348SEric.Yu@Sun.COM * sonode create and destroy functions. 359*8348SEric.Yu@Sun.COM */ 360*8348SEric.Yu@Sun.COM typedef struct sonode *(*so_create_func_t)(struct sockparams *, 361*8348SEric.Yu@Sun.COM int, int, int, int, int, int *, cred_t *); 362*8348SEric.Yu@Sun.COM typedef void (*so_destroy_func_t)(struct sonode *); 363*8348SEric.Yu@Sun.COM 364*8348SEric.Yu@Sun.COM /* STREAM device information */ 365*8348SEric.Yu@Sun.COM typedef struct sdev_info { 366*8348SEric.Yu@Sun.COM char *sd_devpath; 367*8348SEric.Yu@Sun.COM int sd_devpathlen; /* Is 0 if sp_devpath is a static string */ 368*8348SEric.Yu@Sun.COM vnode_t *sd_vnode; 369*8348SEric.Yu@Sun.COM } sdev_info_t; 370*8348SEric.Yu@Sun.COM 371*8348SEric.Yu@Sun.COM #define SOCKMOD_VERSION 1 372*8348SEric.Yu@Sun.COM /* name of the TPI pseudo socket module */ 373*8348SEric.Yu@Sun.COM #define SOTPI_SMOD_NAME "socktpi" 374*8348SEric.Yu@Sun.COM 375*8348SEric.Yu@Sun.COM typedef struct __smod_priv_s { 376*8348SEric.Yu@Sun.COM so_create_func_t smodp_sock_create_func; 377*8348SEric.Yu@Sun.COM so_destroy_func_t smodp_sock_destroy_func; 378*8348SEric.Yu@Sun.COM so_proto_fallback_func_t smodp_proto_fallback_func; 379*8348SEric.Yu@Sun.COM } __smod_priv_t; 380*8348SEric.Yu@Sun.COM 381*8348SEric.Yu@Sun.COM /* 382*8348SEric.Yu@Sun.COM * Socket module register information 383*8348SEric.Yu@Sun.COM */ 384*8348SEric.Yu@Sun.COM typedef struct smod_reg_s { 385*8348SEric.Yu@Sun.COM int smod_version; 386*8348SEric.Yu@Sun.COM char *smod_name; 387*8348SEric.Yu@Sun.COM size_t smod_uc_version; 388*8348SEric.Yu@Sun.COM size_t smod_dc_version; 389*8348SEric.Yu@Sun.COM so_proto_create_func_t smod_proto_create_func; 390*8348SEric.Yu@Sun.COM 391*8348SEric.Yu@Sun.COM /* __smod_priv_data must be NULL */ 392*8348SEric.Yu@Sun.COM __smod_priv_t *__smod_priv; 393*8348SEric.Yu@Sun.COM } smod_reg_t; 394*8348SEric.Yu@Sun.COM 395*8348SEric.Yu@Sun.COM /* 396*8348SEric.Yu@Sun.COM * Socket module information 397*8348SEric.Yu@Sun.COM */ 398*8348SEric.Yu@Sun.COM typedef struct smod_info { 399*8348SEric.Yu@Sun.COM int smod_version; 400*8348SEric.Yu@Sun.COM char *smod_name; 401*8348SEric.Yu@Sun.COM uint_t smod_refcnt; /* # of entries */ 402*8348SEric.Yu@Sun.COM size_t smod_uc_version; /* upcall version */ 403*8348SEric.Yu@Sun.COM size_t smod_dc_version; /* down call version */ 404*8348SEric.Yu@Sun.COM so_proto_create_func_t smod_proto_create_func; 405*8348SEric.Yu@Sun.COM so_proto_fallback_func_t smod_proto_fallback_func; 406*8348SEric.Yu@Sun.COM so_create_func_t smod_sock_create_func; 407*8348SEric.Yu@Sun.COM so_destroy_func_t smod_sock_destroy_func; 408*8348SEric.Yu@Sun.COM list_node_t smod_node; 409*8348SEric.Yu@Sun.COM } smod_info_t; 410*8348SEric.Yu@Sun.COM 411*8348SEric.Yu@Sun.COM /* 412*8348SEric.Yu@Sun.COM * sockparams 413*8348SEric.Yu@Sun.COM * 414*8348SEric.Yu@Sun.COM * Used for mapping family/type/protocol to module 4150Sstevel@tonic-gate */ 4160Sstevel@tonic-gate struct sockparams { 417*8348SEric.Yu@Sun.COM /* 418*8348SEric.Yu@Sun.COM * The family, type, protocol, sdev_info and smod_info are 419*8348SEric.Yu@Sun.COM * set when the entry is created, and they will never change 420*8348SEric.Yu@Sun.COM * thereafter. 421*8348SEric.Yu@Sun.COM */ 422*8348SEric.Yu@Sun.COM int sp_family; 423*8348SEric.Yu@Sun.COM int sp_type; 424*8348SEric.Yu@Sun.COM int sp_protocol; 425*8348SEric.Yu@Sun.COM 426*8348SEric.Yu@Sun.COM sdev_info_t sp_sdev_info; /* STREAM device */ 427*8348SEric.Yu@Sun.COM char *sp_smod_name; /* socket module name */ 428*8348SEric.Yu@Sun.COM smod_info_t *sp_smod_info; /* socket module */ 429*8348SEric.Yu@Sun.COM 430*8348SEric.Yu@Sun.COM kmutex_t sp_lock; /* lock for refcnt */ 431*8348SEric.Yu@Sun.COM uint64_t sp_refcnt; /* entry reference count */ 432*8348SEric.Yu@Sun.COM 433*8348SEric.Yu@Sun.COM /* 434*8348SEric.Yu@Sun.COM * The entries below are only modified while holding 435*8348SEric.Yu@Sun.COM * splist_lock as a writer. 436*8348SEric.Yu@Sun.COM */ 437*8348SEric.Yu@Sun.COM int sp_flags; /* see below */ 438*8348SEric.Yu@Sun.COM list_node_t sp_node; 4390Sstevel@tonic-gate }; 4400Sstevel@tonic-gate 441*8348SEric.Yu@Sun.COM 442*8348SEric.Yu@Sun.COM /* 443*8348SEric.Yu@Sun.COM * sockparams flags 444*8348SEric.Yu@Sun.COM */ 445*8348SEric.Yu@Sun.COM #define SOCKPARAMS_EPHEMERAL 0x1 /* temp. entry, not on global list */ 446*8348SEric.Yu@Sun.COM 447*8348SEric.Yu@Sun.COM extern void sockparams_init(void); 448*8348SEric.Yu@Sun.COM extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int, 449*8348SEric.Yu@Sun.COM const char *, int, int *); 450*8348SEric.Yu@Sun.COM extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int, 451*8348SEric.Yu@Sun.COM const char *, int, int *); 452*8348SEric.Yu@Sun.COM extern void sockparams_ephemeral_drop_last_ref(struct sockparams *); 453*8348SEric.Yu@Sun.COM 454*8348SEric.Yu@Sun.COM extern void smod_init(void); 455*8348SEric.Yu@Sun.COM extern void smod_add(smod_info_t *); 456*8348SEric.Yu@Sun.COM extern int smod_register(const smod_reg_t *); 457*8348SEric.Yu@Sun.COM extern int smod_unregister(const char *); 458*8348SEric.Yu@Sun.COM extern smod_info_t *smod_lookup_byname(const char *); 459*8348SEric.Yu@Sun.COM 460*8348SEric.Yu@Sun.COM #define SOCKPARAMS_HAS_DEVICE(sp) \ 461*8348SEric.Yu@Sun.COM ((sp)->sp_sdev_info.sd_devpath != NULL) 462*8348SEric.Yu@Sun.COM 463*8348SEric.Yu@Sun.COM /* Increase the smod_info_t reference count */ 464*8348SEric.Yu@Sun.COM #define SMOD_INC_REF(smodp) { \ 465*8348SEric.Yu@Sun.COM ASSERT((smodp) != NULL); \ 466*8348SEric.Yu@Sun.COM DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \ 467*8348SEric.Yu@Sun.COM atomic_inc_uint(&(smodp)->smod_refcnt); \ 468*8348SEric.Yu@Sun.COM } 469*8348SEric.Yu@Sun.COM 470*8348SEric.Yu@Sun.COM /* 471*8348SEric.Yu@Sun.COM * Decreace the socket module entry reference count. 472*8348SEric.Yu@Sun.COM * When no one mapping to the entry, we try to unload the module from the 473*8348SEric.Yu@Sun.COM * kernel. If the module can't unload, just leave the module entry with 474*8348SEric.Yu@Sun.COM * a zero refcnt. 475*8348SEric.Yu@Sun.COM */ 476*8348SEric.Yu@Sun.COM #define SMOD_DEC_REF(sp, smodp) { \ 477*8348SEric.Yu@Sun.COM ASSERT((smodp) != NULL); \ 478*8348SEric.Yu@Sun.COM ASSERT((smodp)->smod_refcnt != 0); \ 479*8348SEric.Yu@Sun.COM atomic_dec_uint(&(smodp)->smod_refcnt); \ 480*8348SEric.Yu@Sun.COM /* \ 481*8348SEric.Yu@Sun.COM * No need to atomically check the return value because the \ 482*8348SEric.Yu@Sun.COM * socket module framework will verify that no one is using \ 483*8348SEric.Yu@Sun.COM * the module before unloading. Worst thing that can happen \ 484*8348SEric.Yu@Sun.COM * here is multiple calls to mod_remove_by_name(), which is OK. \ 485*8348SEric.Yu@Sun.COM */ \ 486*8348SEric.Yu@Sun.COM if ((smodp)->smod_refcnt == 0) \ 487*8348SEric.Yu@Sun.COM (void) mod_remove_by_name((sp)->sp_smod_name); \ 488*8348SEric.Yu@Sun.COM } 489*8348SEric.Yu@Sun.COM 490*8348SEric.Yu@Sun.COM /* Increase the reference count */ 491*8348SEric.Yu@Sun.COM #define SOCKPARAMS_INC_REF(sp) { \ 492*8348SEric.Yu@Sun.COM ASSERT((sp) != NULL); \ 493*8348SEric.Yu@Sun.COM DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \ 494*8348SEric.Yu@Sun.COM mutex_enter(&(sp)->sp_lock); \ 495*8348SEric.Yu@Sun.COM (sp)->sp_refcnt++; \ 496*8348SEric.Yu@Sun.COM ASSERT((sp)->sp_refcnt != 0); \ 497*8348SEric.Yu@Sun.COM mutex_exit(&(sp)->sp_lock); \ 498*8348SEric.Yu@Sun.COM } 499*8348SEric.Yu@Sun.COM 500*8348SEric.Yu@Sun.COM /* 501*8348SEric.Yu@Sun.COM * Decrease the reference count. 502*8348SEric.Yu@Sun.COM * 503*8348SEric.Yu@Sun.COM * If the sockparams is ephemeral, then the thread dropping the last ref 504*8348SEric.Yu@Sun.COM * count will destroy the entry. 505*8348SEric.Yu@Sun.COM */ 506*8348SEric.Yu@Sun.COM #define SOCKPARAMS_DEC_REF(sp) { \ 507*8348SEric.Yu@Sun.COM ASSERT((sp) != NULL); \ 508*8348SEric.Yu@Sun.COM DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \ 509*8348SEric.Yu@Sun.COM mutex_enter(&(sp)->sp_lock); \ 510*8348SEric.Yu@Sun.COM ASSERT((sp)->sp_refcnt > 0); \ 511*8348SEric.Yu@Sun.COM if ((sp)->sp_refcnt == 1) { \ 512*8348SEric.Yu@Sun.COM if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) { \ 513*8348SEric.Yu@Sun.COM mutex_exit(&(sp)->sp_lock); \ 514*8348SEric.Yu@Sun.COM sockparams_ephemeral_drop_last_ref((sp)); \ 515*8348SEric.Yu@Sun.COM } else { \ 516*8348SEric.Yu@Sun.COM (sp)->sp_refcnt--; \ 517*8348SEric.Yu@Sun.COM if ((sp)->sp_smod_info != NULL) \ 518*8348SEric.Yu@Sun.COM SMOD_DEC_REF(sp, (sp)->sp_smod_info); \ 519*8348SEric.Yu@Sun.COM (sp)->sp_smod_info = NULL; \ 520*8348SEric.Yu@Sun.COM mutex_exit(&(sp)->sp_lock); \ 521*8348SEric.Yu@Sun.COM } \ 522*8348SEric.Yu@Sun.COM } else { \ 523*8348SEric.Yu@Sun.COM (sp)->sp_refcnt--; \ 524*8348SEric.Yu@Sun.COM mutex_exit(&(sp)->sp_lock); \ 525*8348SEric.Yu@Sun.COM } \ 526*8348SEric.Yu@Sun.COM } 5270Sstevel@tonic-gate 5280Sstevel@tonic-gate /* 5290Sstevel@tonic-gate * Used to traverse the list of AF_UNIX sockets to construct the kstat 5300Sstevel@tonic-gate * for netstat(1m). 5310Sstevel@tonic-gate */ 5320Sstevel@tonic-gate struct socklist { 5330Sstevel@tonic-gate kmutex_t sl_lock; 5340Sstevel@tonic-gate struct sonode *sl_list; 5350Sstevel@tonic-gate }; 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate extern struct socklist socklist; 5380Sstevel@tonic-gate /* 5390Sstevel@tonic-gate * ss_full_waits is the number of times the reader thread 5400Sstevel@tonic-gate * waits when the queue is full and ss_empty_waits is the number 5410Sstevel@tonic-gate * of times the consumer thread waits when the queue is empty. 5420Sstevel@tonic-gate * No locks for these as they are just indicators of whether 5430Sstevel@tonic-gate * disk or network or both is slow or fast. 5440Sstevel@tonic-gate */ 5450Sstevel@tonic-gate struct sendfile_stats { 5460Sstevel@tonic-gate uint32_t ss_file_cached; 5470Sstevel@tonic-gate uint32_t ss_file_not_cached; 5480Sstevel@tonic-gate uint32_t ss_full_waits; 5490Sstevel@tonic-gate uint32_t ss_empty_waits; 5500Sstevel@tonic-gate uint32_t ss_file_segmap; 5510Sstevel@tonic-gate }; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate /* 5540Sstevel@tonic-gate * A single sendfile request is represented by snf_req. 5550Sstevel@tonic-gate */ 5560Sstevel@tonic-gate typedef struct snf_req { 5570Sstevel@tonic-gate struct snf_req *sr_next; 5580Sstevel@tonic-gate mblk_t *sr_mp_head; 5590Sstevel@tonic-gate mblk_t *sr_mp_tail; 5600Sstevel@tonic-gate kmutex_t sr_lock; 5610Sstevel@tonic-gate kcondvar_t sr_cv; 5620Sstevel@tonic-gate uint_t sr_qlen; 5630Sstevel@tonic-gate int sr_hiwat; 5640Sstevel@tonic-gate int sr_lowat; 5650Sstevel@tonic-gate int sr_operation; 5660Sstevel@tonic-gate struct vnode *sr_vp; 5670Sstevel@tonic-gate file_t *sr_fp; 5680Sstevel@tonic-gate ssize_t sr_maxpsz; 5690Sstevel@tonic-gate u_offset_t sr_file_off; 5700Sstevel@tonic-gate u_offset_t sr_file_size; 5710Sstevel@tonic-gate #define SR_READ_DONE 0x80000000 5720Sstevel@tonic-gate int sr_read_error; 5730Sstevel@tonic-gate int sr_write_error; 5740Sstevel@tonic-gate } snf_req_t; 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate /* A queue of sendfile requests */ 5770Sstevel@tonic-gate struct sendfile_queue { 5780Sstevel@tonic-gate snf_req_t *snfq_req_head; 5790Sstevel@tonic-gate snf_req_t *snfq_req_tail; 5800Sstevel@tonic-gate kmutex_t snfq_lock; 5810Sstevel@tonic-gate kcondvar_t snfq_cv; 5820Sstevel@tonic-gate int snfq_svc_threads; /* # of service threads */ 5830Sstevel@tonic-gate int snfq_idle_cnt; /* # of idling threads */ 5840Sstevel@tonic-gate int snfq_max_threads; 5850Sstevel@tonic-gate int snfq_req_cnt; /* Number of requests */ 5860Sstevel@tonic-gate }; 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate #define READ_OP 1 5890Sstevel@tonic-gate #define SNFQ_TIMEOUT (60 * 5 * hz) /* 5 minutes */ 5900Sstevel@tonic-gate 5910Sstevel@tonic-gate /* Socket network operations switch */ 5920Sstevel@tonic-gate struct sonodeops { 593*8348SEric.Yu@Sun.COM int (*sop_init)(struct sonode *, struct sonode *, cred_t *, 5940Sstevel@tonic-gate int); 595*8348SEric.Yu@Sun.COM int (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **); 596*8348SEric.Yu@Sun.COM int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t, 597*8348SEric.Yu@Sun.COM int, cred_t *); 598*8348SEric.Yu@Sun.COM int (*sop_listen)(struct sonode *, int, cred_t *); 5990Sstevel@tonic-gate int (*sop_connect)(struct sonode *, const struct sockaddr *, 600*8348SEric.Yu@Sun.COM socklen_t, int, int, cred_t *); 6010Sstevel@tonic-gate int (*sop_recvmsg)(struct sonode *, struct msghdr *, 602*8348SEric.Yu@Sun.COM struct uio *, cred_t *); 6030Sstevel@tonic-gate int (*sop_sendmsg)(struct sonode *, struct msghdr *, 604*8348SEric.Yu@Sun.COM struct uio *, cred_t *); 605*8348SEric.Yu@Sun.COM int (*sop_sendmblk)(struct sonode *, struct msghdr *, int, 606*8348SEric.Yu@Sun.COM cred_t *, mblk_t **); 607*8348SEric.Yu@Sun.COM int (*sop_getpeername)(struct sonode *, struct sockaddr *, 608*8348SEric.Yu@Sun.COM socklen_t *, boolean_t, cred_t *); 609*8348SEric.Yu@Sun.COM int (*sop_getsockname)(struct sonode *, struct sockaddr *, 610*8348SEric.Yu@Sun.COM socklen_t *, cred_t *); 611*8348SEric.Yu@Sun.COM int (*sop_shutdown)(struct sonode *, int, cred_t *); 6120Sstevel@tonic-gate int (*sop_getsockopt)(struct sonode *, int, int, void *, 613*8348SEric.Yu@Sun.COM socklen_t *, int, cred_t *); 6140Sstevel@tonic-gate int (*sop_setsockopt)(struct sonode *, int, int, const void *, 615*8348SEric.Yu@Sun.COM socklen_t, cred_t *); 616*8348SEric.Yu@Sun.COM int (*sop_ioctl)(struct sonode *, int, intptr_t, int, 617*8348SEric.Yu@Sun.COM cred_t *, int32_t *); 618*8348SEric.Yu@Sun.COM int (*sop_poll)(struct sonode *, short, int, short *, 619*8348SEric.Yu@Sun.COM struct pollhead **); 620*8348SEric.Yu@Sun.COM int (*sop_close)(struct sonode *, int, cred_t *); 6210Sstevel@tonic-gate }; 6220Sstevel@tonic-gate 623*8348SEric.Yu@Sun.COM #define SOP_INIT(so, flag, cr, flags) \ 624*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_init((so), (flag), (cr), (flags))) 625*8348SEric.Yu@Sun.COM #define SOP_ACCEPT(so, fflag, cr, nsop) \ 626*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop))) 627*8348SEric.Yu@Sun.COM #define SOP_BIND(so, name, namelen, flags, cr) \ 628*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr))) 629*8348SEric.Yu@Sun.COM #define SOP_LISTEN(so, backlog, cr) \ 630*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_listen((so), (backlog), (cr))) 631*8348SEric.Yu@Sun.COM #define SOP_CONNECT(so, name, namelen, fflag, flags, cr) \ 632*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \ 633*8348SEric.Yu@Sun.COM (cr))) 634*8348SEric.Yu@Sun.COM #define SOP_RECVMSG(so, msg, uiop, cr) \ 635*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr))) 636*8348SEric.Yu@Sun.COM #define SOP_SENDMSG(so, msg, uiop, cr) \ 637*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr))) 638*8348SEric.Yu@Sun.COM #define SOP_SENDMBLK(so, msg, size, cr, mpp) \ 639*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp))) 640*8348SEric.Yu@Sun.COM #define SOP_GETPEERNAME(so, addr, addrlen, accept, cr) \ 641*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr))) 642*8348SEric.Yu@Sun.COM #define SOP_GETSOCKNAME(so, addr, addrlen, cr) \ 643*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr))) 644*8348SEric.Yu@Sun.COM #define SOP_SHUTDOWN(so, how, cr) \ 645*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_shutdown((so), (how), (cr))) 646*8348SEric.Yu@Sun.COM #define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \ 6470Sstevel@tonic-gate ((so)->so_ops->sop_getsockopt((so), (level), (optionname), \ 648*8348SEric.Yu@Sun.COM (optval), (optlenp), (flags), (cr))) 649*8348SEric.Yu@Sun.COM #define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr) \ 6500Sstevel@tonic-gate ((so)->so_ops->sop_setsockopt((so), (level), (optionname), \ 651*8348SEric.Yu@Sun.COM (optval), (optlen), (cr))) 652*8348SEric.Yu@Sun.COM #define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp) \ 653*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp))) 654*8348SEric.Yu@Sun.COM #define SOP_POLL(so, events, anyyet, reventsp, phpp) \ 655*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp))) 656*8348SEric.Yu@Sun.COM #define SOP_CLOSE(so, flag, cr) \ 657*8348SEric.Yu@Sun.COM ((so)->so_ops->sop_close((so), (flag), (cr))) 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate #endif /* defined(_KERNEL) || defined(_KMEMUSER) */ 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate #ifdef _KERNEL 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate #define ISALIGNED_cmsghdr(addr) \ 6640Sstevel@tonic-gate (((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0) 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate #define ROUNDUP_cmsglen(len) \ 6670Sstevel@tonic-gate (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1)) 6680Sstevel@tonic-gate 669*8348SEric.Yu@Sun.COM #define IS_NON_STREAM_SOCK(vp) \ 670*8348SEric.Yu@Sun.COM ((vp)->v_type == VSOCK && (vp)->v_stream == NULL) 6710Sstevel@tonic-gate /* 6722712Snn35248 * Macros that operate on struct cmsghdr. 6732712Snn35248 * Used in parsing msg_control. 6742712Snn35248 * The CMSG_VALID macro does not assume that the last option buffer is padded. 6750Sstevel@tonic-gate */ 6760Sstevel@tonic-gate #define CMSG_NEXT(cmsg) \ 6770Sstevel@tonic-gate (struct cmsghdr *)((uintptr_t)(cmsg) + \ 6780Sstevel@tonic-gate ROUNDUP_cmsglen((cmsg)->cmsg_len)) 6792712Snn35248 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 6802712Snn35248 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 6812712Snn35248 #define CMSG_VALID(cmsg, start, end) \ 6822712Snn35248 (ISALIGNED_cmsghdr(cmsg) && \ 6832712Snn35248 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 6842712Snn35248 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 6852712Snn35248 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 6862712Snn35248 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate /* 6890Sstevel@tonic-gate * Maximum size of any argument that is copied in (addresses, options, 6900Sstevel@tonic-gate * access rights). MUST be at least MAXPATHLEN + 3. 6910Sstevel@tonic-gate * BSD and SunOS 4.X limited this to MLEN or MCLBYTES. 6920Sstevel@tonic-gate */ 6930Sstevel@tonic-gate #define SO_MAXARGSIZE 8192 6940Sstevel@tonic-gate 6950Sstevel@tonic-gate /* 6960Sstevel@tonic-gate * Convert between vnode and sonode 6970Sstevel@tonic-gate */ 6980Sstevel@tonic-gate #define VTOSO(vp) ((struct sonode *)((vp)->v_data)) 6990Sstevel@tonic-gate #define SOTOV(sp) ((sp)->so_vnode) 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate /* 7020Sstevel@tonic-gate * Internal flags for sobind() 7030Sstevel@tonic-gate */ 7040Sstevel@tonic-gate #define _SOBIND_REBIND 0x01 /* Bind to existing local address */ 7050Sstevel@tonic-gate #define _SOBIND_UNSPEC 0x02 /* Bind to unspecified address */ 7060Sstevel@tonic-gate #define _SOBIND_LOCK_HELD 0x04 /* so_excl_lock held by caller */ 7070Sstevel@tonic-gate #define _SOBIND_NOXLATE 0x08 /* No addr translation for AF_UNIX */ 7080Sstevel@tonic-gate #define _SOBIND_XPG4_2 0x10 /* xpg4.2 semantics */ 7090Sstevel@tonic-gate #define _SOBIND_SOCKBSD 0x20 /* BSD semantics */ 7100Sstevel@tonic-gate #define _SOBIND_LISTEN 0x40 /* Make into SS_ACCEPTCONN */ 7110Sstevel@tonic-gate #define _SOBIND_SOCKETPAIR 0x80 /* Internal flag for so_socketpair() */ 7120Sstevel@tonic-gate /* to enable listen with backlog = 1 */ 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate /* 7150Sstevel@tonic-gate * Internal flags for sounbind() 7160Sstevel@tonic-gate */ 7170Sstevel@tonic-gate #define _SOUNBIND_REBIND 0x01 /* Don't clear fields - will rebind */ 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate /* 7200Sstevel@tonic-gate * Internal flags for soconnect() 7210Sstevel@tonic-gate */ 7220Sstevel@tonic-gate #define _SOCONNECT_NOXLATE 0x01 /* No addr translation for AF_UNIX */ 7230Sstevel@tonic-gate #define _SOCONNECT_DID_BIND 0x02 /* Unbind when connect fails */ 7240Sstevel@tonic-gate #define _SOCONNECT_XPG4_2 0x04 /* xpg4.2 semantics */ 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate /* 7270Sstevel@tonic-gate * Internal flags for sodisconnect() 7280Sstevel@tonic-gate */ 7290Sstevel@tonic-gate #define _SODISCONNECT_LOCK_HELD 0x01 /* so_excl_lock held by caller */ 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate /* 7320Sstevel@tonic-gate * Internal flags for sotpi_getsockopt(). 7330Sstevel@tonic-gate */ 7340Sstevel@tonic-gate #define _SOGETSOCKOPT_XPG4_2 0x01 /* xpg4.2 semantics */ 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate /* 7370Sstevel@tonic-gate * Internal flags for soallocproto*() 7380Sstevel@tonic-gate */ 7390Sstevel@tonic-gate #define _ALLOC_NOSLEEP 0 /* Don't sleep for memory */ 7400Sstevel@tonic-gate #define _ALLOC_INTR 1 /* Sleep until interrupt */ 7410Sstevel@tonic-gate #define _ALLOC_SLEEP 2 /* Sleep forever */ 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* 7440Sstevel@tonic-gate * Internal structure for handling AF_UNIX file descriptor passing 7450Sstevel@tonic-gate */ 7460Sstevel@tonic-gate struct fdbuf { 7470Sstevel@tonic-gate int fd_size; /* In bytes, for kmem_free */ 7480Sstevel@tonic-gate int fd_numfd; /* Number of elements below */ 7490Sstevel@tonic-gate char *fd_ebuf; /* Extra buffer to free */ 7500Sstevel@tonic-gate int fd_ebuflen; 7510Sstevel@tonic-gate frtn_t fd_frtn; 7520Sstevel@tonic-gate struct file *fd_fds[1]; /* One or more */ 7530Sstevel@tonic-gate }; 7540Sstevel@tonic-gate #define FDBUF_HDRSIZE (sizeof (struct fdbuf) - sizeof (struct file *)) 7550Sstevel@tonic-gate 7560Sstevel@tonic-gate /* 7570Sstevel@tonic-gate * Variable that can be patched to set what version of socket socket() 7580Sstevel@tonic-gate * will create. 7590Sstevel@tonic-gate */ 7600Sstevel@tonic-gate extern int so_default_version; 7610Sstevel@tonic-gate 7620Sstevel@tonic-gate #ifdef DEBUG 7630Sstevel@tonic-gate /* Turn on extra testing capabilities */ 7640Sstevel@tonic-gate #define SOCK_TEST 7650Sstevel@tonic-gate #endif /* DEBUG */ 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate #ifdef DEBUG 7680Sstevel@tonic-gate char *pr_state(uint_t, uint_t); 7690Sstevel@tonic-gate char *pr_addr(int, struct sockaddr *, t_uscalar_t); 7700Sstevel@tonic-gate int so_verify_oobstate(struct sonode *); 7710Sstevel@tonic-gate #endif /* DEBUG */ 7720Sstevel@tonic-gate 7730Sstevel@tonic-gate /* 7740Sstevel@tonic-gate * DEBUG macros 7750Sstevel@tonic-gate */ 7767632SNick.Todd@Sun.COM #if defined(DEBUG) 7770Sstevel@tonic-gate #define SOCK_DEBUG 7780Sstevel@tonic-gate 7790Sstevel@tonic-gate extern int sockdebug; 7800Sstevel@tonic-gate extern int sockprinterr; 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate #define eprint(args) printf args 7830Sstevel@tonic-gate #define eprintso(so, args) \ 7840Sstevel@tonic-gate { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; } 7850Sstevel@tonic-gate #define eprintline(error) \ 7860Sstevel@tonic-gate { \ 7870Sstevel@tonic-gate if (error != EINTR && (sockprinterr || sockdebug > 0)) \ 7880Sstevel@tonic-gate printf("socket error %d: line %d file %s\n", \ 7890Sstevel@tonic-gate (error), __LINE__, __FILE__); \ 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate #define eprintsoline(so, error) \ 7930Sstevel@tonic-gate { if (sockprinterr && ((so)->so_options & SO_DEBUG)) \ 7940Sstevel@tonic-gate printf("socket(%p) error %d: line %d file %s\n", \ 7957632SNick.Todd@Sun.COM (void *)(so), (error), __LINE__, __FILE__); \ 7960Sstevel@tonic-gate } 7970Sstevel@tonic-gate #define dprint(level, args) { if (sockdebug > (level)) printf args; } 7980Sstevel@tonic-gate #define dprintso(so, level, args) \ 7990Sstevel@tonic-gate { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; } 8000Sstevel@tonic-gate 8017632SNick.Todd@Sun.COM #else /* define(DEBUG) */ 8020Sstevel@tonic-gate 8030Sstevel@tonic-gate #define eprint(args) {} 8040Sstevel@tonic-gate #define eprintso(so, args) {} 8050Sstevel@tonic-gate #define eprintline(error) {} 8060Sstevel@tonic-gate #define eprintsoline(so, error) {} 8070Sstevel@tonic-gate #define dprint(level, args) {} 8080Sstevel@tonic-gate #define dprintso(so, level, args) {} 8090Sstevel@tonic-gate 8107632SNick.Todd@Sun.COM #endif /* defined(DEBUG) */ 8110Sstevel@tonic-gate 8120Sstevel@tonic-gate extern struct vfsops sock_vfsops; 813*8348SEric.Yu@Sun.COM extern struct vnodeops *socket_vnodeops; 814*8348SEric.Yu@Sun.COM extern const struct fs_operation_def socket_vnodeops_template[]; 8150Sstevel@tonic-gate 8160Sstevel@tonic-gate extern dev_t sockdev; 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate /* 8190Sstevel@tonic-gate * sockfs functions 8200Sstevel@tonic-gate */ 8210Sstevel@tonic-gate extern int sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *, 8220Sstevel@tonic-gate uchar_t *, int *, int, rval_t *); 8230Sstevel@tonic-gate extern int sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *, 8240Sstevel@tonic-gate uchar_t, int, int); 825*8348SEric.Yu@Sun.COM extern int sogetvp(char *, vnode_t **, int); 8260Sstevel@tonic-gate extern int sockinit(int, char *); 827*8348SEric.Yu@Sun.COM extern int soconfig(int, int, int, char *, int, char *); 828*8348SEric.Yu@Sun.COM extern int solookup(int, int, int, struct sockparams **); 8290Sstevel@tonic-gate extern void so_lock_single(struct sonode *); 8300Sstevel@tonic-gate extern void so_unlock_single(struct sonode *, int); 8310Sstevel@tonic-gate extern int so_lock_read(struct sonode *, int); 8320Sstevel@tonic-gate extern int so_lock_read_intr(struct sonode *, int); 8330Sstevel@tonic-gate extern void so_unlock_read(struct sonode *); 8340Sstevel@tonic-gate extern void *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t); 8350Sstevel@tonic-gate extern void so_getopt_srcaddr(void *, t_uscalar_t, 8360Sstevel@tonic-gate void **, t_uscalar_t *); 8370Sstevel@tonic-gate extern int so_getopt_unix_close(void *, t_uscalar_t); 8380Sstevel@tonic-gate extern void fdbuf_free(struct fdbuf *); 8390Sstevel@tonic-gate extern mblk_t *fdbuf_allocmsg(int, struct fdbuf *); 8400Sstevel@tonic-gate extern int fdbuf_create(void *, int, struct fdbuf **); 8410Sstevel@tonic-gate extern void so_closefds(void *, t_uscalar_t, int, int); 8420Sstevel@tonic-gate extern int so_getfdopt(void *, t_uscalar_t, int, void **, int *); 8430Sstevel@tonic-gate t_uscalar_t so_optlen(void *, t_uscalar_t, int); 8440Sstevel@tonic-gate extern void so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *); 8450Sstevel@tonic-gate extern t_uscalar_t 8460Sstevel@tonic-gate so_cmsglen(mblk_t *, void *, t_uscalar_t, int); 8470Sstevel@tonic-gate extern int so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int, 8480Sstevel@tonic-gate void *, t_uscalar_t); 8490Sstevel@tonic-gate extern void soisconnecting(struct sonode *); 8500Sstevel@tonic-gate extern void soisconnected(struct sonode *); 8510Sstevel@tonic-gate extern void soisdisconnected(struct sonode *, int); 8520Sstevel@tonic-gate extern void socantsendmore(struct sonode *); 8530Sstevel@tonic-gate extern void socantrcvmore(struct sonode *); 8540Sstevel@tonic-gate extern void soseterror(struct sonode *, int); 855*8348SEric.Yu@Sun.COM extern int sogeterr(struct sonode *, boolean_t); 8560Sstevel@tonic-gate extern int sowaitconnected(struct sonode *, int, int); 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate extern ssize_t soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t); 8590Sstevel@tonic-gate extern void *sock_kstat_init(zoneid_t); 8600Sstevel@tonic-gate extern void sock_kstat_fini(zoneid_t, void *); 8615227Stz204579 extern struct sonode *getsonode(int, int *, file_t **); 8620Sstevel@tonic-gate /* 8635331Samw * Function wrappers (mostly around the sonode switch) for 8640Sstevel@tonic-gate * backward compatibility. 8650Sstevel@tonic-gate */ 8660Sstevel@tonic-gate extern int soaccept(struct sonode *, int, struct sonode **); 8670Sstevel@tonic-gate extern int sobind(struct sonode *, struct sockaddr *, socklen_t, 8680Sstevel@tonic-gate int, int); 8690Sstevel@tonic-gate extern int solisten(struct sonode *, int); 8700Sstevel@tonic-gate extern int soconnect(struct sonode *, const struct sockaddr *, socklen_t, 8710Sstevel@tonic-gate int, int); 8720Sstevel@tonic-gate extern int sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *); 8730Sstevel@tonic-gate extern int sosendmsg(struct sonode *, struct nmsghdr *, struct uio *); 8740Sstevel@tonic-gate extern int soshutdown(struct sonode *, int); 8750Sstevel@tonic-gate extern int sogetsockopt(struct sonode *, int, int, void *, socklen_t *, 8760Sstevel@tonic-gate int); 8770Sstevel@tonic-gate extern int sosetsockopt(struct sonode *, int, int, const void *, 8780Sstevel@tonic-gate t_uscalar_t); 8790Sstevel@tonic-gate 880*8348SEric.Yu@Sun.COM extern struct sonode *socreate(struct sockparams *, int, int, int, int, 881*8348SEric.Yu@Sun.COM int *); 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate extern int so_copyin(const void *, void *, size_t, int); 8840Sstevel@tonic-gate extern int so_copyout(const void *, void *, size_t, int); 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate #endif 8870Sstevel@tonic-gate 8880Sstevel@tonic-gate /* 8890Sstevel@tonic-gate * Internal structure for obtaining sonode information from the socklist. 8900Sstevel@tonic-gate * These types match those corresponding in the sonode structure. 8910Sstevel@tonic-gate * This is not a published interface, and may change at any time. 8920Sstevel@tonic-gate */ 8930Sstevel@tonic-gate struct sockinfo { 8940Sstevel@tonic-gate uint_t si_size; /* real length of this struct */ 8950Sstevel@tonic-gate short si_family; 8960Sstevel@tonic-gate short si_type; 8970Sstevel@tonic-gate ushort_t si_flag; 8980Sstevel@tonic-gate uint_t si_state; 8990Sstevel@tonic-gate uint_t si_ux_laddr_sou_magic; 9000Sstevel@tonic-gate uint_t si_ux_faddr_sou_magic; 9010Sstevel@tonic-gate t_scalar_t si_serv_type; 9020Sstevel@tonic-gate t_uscalar_t si_laddr_soa_len; 9030Sstevel@tonic-gate t_uscalar_t si_faddr_soa_len; 9040Sstevel@tonic-gate uint16_t si_laddr_family; 9050Sstevel@tonic-gate uint16_t si_faddr_family; 9060Sstevel@tonic-gate char si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */ 9070Sstevel@tonic-gate char si_faddr_sun_path[MAXPATHLEN + 1]; 908*8348SEric.Yu@Sun.COM boolean_t si_faddr_noxlate; 9090Sstevel@tonic-gate zoneid_t si_szoneid; 9100Sstevel@tonic-gate }; 9110Sstevel@tonic-gate 912*8348SEric.Yu@Sun.COM #define SOCKMOD_PATH "socketmod" /* dir where sockmods are stored */ 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate #ifdef __cplusplus 9150Sstevel@tonic-gate } 9160Sstevel@tonic-gate #endif 9170Sstevel@tonic-gate 9180Sstevel@tonic-gate #endif /* _SYS_SOCKETVAR_H */ 919