18348SEric.Yu@Sun.COM /* 28348SEric.Yu@Sun.COM * CDDL HEADER START 38348SEric.Yu@Sun.COM * 48348SEric.Yu@Sun.COM * The contents of this file are subject to the terms of the 58348SEric.Yu@Sun.COM * Common Development and Distribution License (the "License"). 68348SEric.Yu@Sun.COM * You may not use this file except in compliance with the License. 78348SEric.Yu@Sun.COM * 88348SEric.Yu@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98348SEric.Yu@Sun.COM * or http://www.opensolaris.org/os/licensing. 108348SEric.Yu@Sun.COM * See the License for the specific language governing permissions 118348SEric.Yu@Sun.COM * and limitations under the License. 128348SEric.Yu@Sun.COM * 138348SEric.Yu@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 148348SEric.Yu@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158348SEric.Yu@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 168348SEric.Yu@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 178348SEric.Yu@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 188348SEric.Yu@Sun.COM * 198348SEric.Yu@Sun.COM * CDDL HEADER END 208348SEric.Yu@Sun.COM */ 218348SEric.Yu@Sun.COM 228348SEric.Yu@Sun.COM /* 238489Sshenjian * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 248348SEric.Yu@Sun.COM * Use is subject to license terms. 258348SEric.Yu@Sun.COM */ 268348SEric.Yu@Sun.COM 278348SEric.Yu@Sun.COM #include <sys/types.h> 288348SEric.Yu@Sun.COM #include <sys/param.h> 298348SEric.Yu@Sun.COM #include <sys/systm.h> 308348SEric.Yu@Sun.COM #include <sys/sysmacros.h> 318348SEric.Yu@Sun.COM #include <sys/debug.h> 328348SEric.Yu@Sun.COM #include <sys/cmn_err.h> 338348SEric.Yu@Sun.COM #include <sys/vfs.h> 348348SEric.Yu@Sun.COM #include <sys/policy.h> 358348SEric.Yu@Sun.COM #include <sys/modctl.h> 368348SEric.Yu@Sun.COM 378348SEric.Yu@Sun.COM #include <sys/sunddi.h> 388348SEric.Yu@Sun.COM 398348SEric.Yu@Sun.COM #include <sys/strsun.h> 408348SEric.Yu@Sun.COM #include <sys/stropts.h> 418348SEric.Yu@Sun.COM #include <sys/strsubr.h> 428348SEric.Yu@Sun.COM #include <sys/socket.h> 438348SEric.Yu@Sun.COM #include <sys/socketvar.h> 448348SEric.Yu@Sun.COM #include <sys/uio.h> 458348SEric.Yu@Sun.COM 468348SEric.Yu@Sun.COM #include <inet/ipclassifier.h> 478348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h> 488348SEric.Yu@Sun.COM #include <fs/sockfs/nl7c.h> 498399SRao.Shoaib@Sun.COM #include <fs/sockfs/socktpi.h> 50*9491SAnders.Persson@Sun.COM #include <fs/sockfs/sodirect.h> 518348SEric.Yu@Sun.COM #include <inet/ip.h> 528348SEric.Yu@Sun.COM 538348SEric.Yu@Sun.COM extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print; 548348SEric.Yu@Sun.COM 558348SEric.Yu@Sun.COM /* 568348SEric.Yu@Sun.COM * Common socket access functions. 578348SEric.Yu@Sun.COM * 588348SEric.Yu@Sun.COM * Instead of accessing the sonode switch directly (i.e., SOP_xxx()), 598348SEric.Yu@Sun.COM * the socket_xxx() function should be used. 608348SEric.Yu@Sun.COM */ 618348SEric.Yu@Sun.COM 628348SEric.Yu@Sun.COM /* 638348SEric.Yu@Sun.COM * Try to create a new sonode of the requested <family, type, protocol>. 648348SEric.Yu@Sun.COM */ 658348SEric.Yu@Sun.COM /* ARGSUSED */ 668348SEric.Yu@Sun.COM struct sonode * 678348SEric.Yu@Sun.COM socket_create(int family, int type, int protocol, char *devpath, char *mod, 688348SEric.Yu@Sun.COM int flags, int version, struct cred *cr, int *errorp) 698348SEric.Yu@Sun.COM { 708348SEric.Yu@Sun.COM struct sonode *so; 718348SEric.Yu@Sun.COM struct sockparams *sp = NULL; 728489Sshenjian int saved_error; 738348SEric.Yu@Sun.COM 748348SEric.Yu@Sun.COM /* 758348SEric.Yu@Sun.COM * Look for a sockparams entry that match the given criteria. 768348SEric.Yu@Sun.COM * solookup() returns with the entry held. 778348SEric.Yu@Sun.COM */ 788348SEric.Yu@Sun.COM *errorp = solookup(family, type, protocol, &sp); 798489Sshenjian saved_error = *errorp; 808348SEric.Yu@Sun.COM if (sp == NULL) { 818348SEric.Yu@Sun.COM int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 828348SEric.Yu@Sun.COM /* 838348SEric.Yu@Sun.COM * There is no matching sockparams entry. An ephemeral entry is 848348SEric.Yu@Sun.COM * created if the caller specifies a device or a socket module. 858348SEric.Yu@Sun.COM */ 868348SEric.Yu@Sun.COM if (devpath != NULL) { 878489Sshenjian saved_error = 0; 888348SEric.Yu@Sun.COM sp = sockparams_hold_ephemeral_bydev(family, type, 898348SEric.Yu@Sun.COM protocol, devpath, kmflags, errorp); 908348SEric.Yu@Sun.COM } else if (mod != NULL) { 918489Sshenjian saved_error = 0; 928348SEric.Yu@Sun.COM sp = sockparams_hold_ephemeral_bymod(family, type, 938348SEric.Yu@Sun.COM protocol, mod, kmflags, errorp); 948348SEric.Yu@Sun.COM } else { 958489Sshenjian *errorp = solookup(family, type, 0, &sp); 968348SEric.Yu@Sun.COM } 978348SEric.Yu@Sun.COM 988489Sshenjian if (sp == NULL) { 998489Sshenjian if (saved_error && (*errorp == EPROTONOSUPPORT || 1008489Sshenjian *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 1018489Sshenjian *errorp = saved_error; 1028348SEric.Yu@Sun.COM return (NULL); 1038489Sshenjian } 1048348SEric.Yu@Sun.COM } 1058348SEric.Yu@Sun.COM 1068348SEric.Yu@Sun.COM ASSERT(sp->sp_smod_info != NULL); 1078348SEric.Yu@Sun.COM ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP); 1088964SAnders.Persson@Sun.COM sp->sp_stats.sps_ncreate.value.ui64++; 1098348SEric.Yu@Sun.COM so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, 1108348SEric.Yu@Sun.COM protocol, version, flags, errorp, cr); 1118348SEric.Yu@Sun.COM if (so == NULL) { 1128348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(sp); 1138348SEric.Yu@Sun.COM } else { 1148348SEric.Yu@Sun.COM if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) { 1158348SEric.Yu@Sun.COM /* Cannot fail, only bumps so_count */ 1168348SEric.Yu@Sun.COM (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 1178348SEric.Yu@Sun.COM } else { 1188489Sshenjian if (saved_error && (*errorp == EPROTONOSUPPORT || 1198489Sshenjian *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 1208489Sshenjian *errorp = saved_error; 1218348SEric.Yu@Sun.COM socket_destroy(so); 1228348SEric.Yu@Sun.COM so = NULL; 1238348SEric.Yu@Sun.COM } 1248348SEric.Yu@Sun.COM } 1258348SEric.Yu@Sun.COM return (so); 1268348SEric.Yu@Sun.COM } 1278348SEric.Yu@Sun.COM 1288348SEric.Yu@Sun.COM struct sonode * 1298348SEric.Yu@Sun.COM socket_newconn(struct sonode *parent, sock_lower_handle_t lh, 1308348SEric.Yu@Sun.COM sock_downcalls_t *dc, int flags, int *errorp) 1318348SEric.Yu@Sun.COM { 1328348SEric.Yu@Sun.COM struct sonode *so; 1338348SEric.Yu@Sun.COM struct sockparams *sp; 1348348SEric.Yu@Sun.COM struct cred *cr; 1358348SEric.Yu@Sun.COM 1368348SEric.Yu@Sun.COM if ((cr = CRED()) == NULL) 1378348SEric.Yu@Sun.COM cr = kcred; 1388348SEric.Yu@Sun.COM 1398348SEric.Yu@Sun.COM sp = parent->so_sockparams; 1408348SEric.Yu@Sun.COM ASSERT(sp != NULL); 1418348SEric.Yu@Sun.COM 1428964SAnders.Persson@Sun.COM sp->sp_stats.sps_ncreate.value.ui64++; 1438348SEric.Yu@Sun.COM so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family, 1448348SEric.Yu@Sun.COM parent->so_type, parent->so_protocol, parent->so_version, flags, 1458348SEric.Yu@Sun.COM errorp, cr); 1468348SEric.Yu@Sun.COM if (so != NULL) { 1478348SEric.Yu@Sun.COM SOCKPARAMS_INC_REF(sp); 1488348SEric.Yu@Sun.COM 1498348SEric.Yu@Sun.COM so->so_proto_handle = lh; 1508348SEric.Yu@Sun.COM so->so_downcalls = dc; 1518348SEric.Yu@Sun.COM /* 1528348SEric.Yu@Sun.COM * This function may be called in interrupt context, and CRED() 1538348SEric.Yu@Sun.COM * will be NULL. In this case, pass in kcred. 1548348SEric.Yu@Sun.COM */ 1558348SEric.Yu@Sun.COM if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) { 1568348SEric.Yu@Sun.COM /* Cannot fail, only bumps so_count */ 1578348SEric.Yu@Sun.COM (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 1588348SEric.Yu@Sun.COM } else { 1598348SEric.Yu@Sun.COM socket_destroy(so); 1608348SEric.Yu@Sun.COM so = NULL; 1618348SEric.Yu@Sun.COM } 1628348SEric.Yu@Sun.COM } 1638348SEric.Yu@Sun.COM 1648348SEric.Yu@Sun.COM return (so); 1658348SEric.Yu@Sun.COM } 1668348SEric.Yu@Sun.COM 1678348SEric.Yu@Sun.COM /* 1688348SEric.Yu@Sun.COM * Bind local endpoint. 1698348SEric.Yu@Sun.COM */ 1708348SEric.Yu@Sun.COM int 1718348SEric.Yu@Sun.COM socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1728348SEric.Yu@Sun.COM int flags, cred_t *cr) 1738348SEric.Yu@Sun.COM { 1748348SEric.Yu@Sun.COM return (SOP_BIND(so, name, namelen, flags, cr)); 1758348SEric.Yu@Sun.COM } 1768348SEric.Yu@Sun.COM 1778348SEric.Yu@Sun.COM /* 1788348SEric.Yu@Sun.COM * Turn socket into a listen socket. 1798348SEric.Yu@Sun.COM */ 1808348SEric.Yu@Sun.COM int 1818348SEric.Yu@Sun.COM socket_listen(struct sonode *so, int backlog, cred_t *cr) 1828348SEric.Yu@Sun.COM { 1838348SEric.Yu@Sun.COM if (backlog < 0) { 1848348SEric.Yu@Sun.COM backlog = 0; 1858348SEric.Yu@Sun.COM } 1868348SEric.Yu@Sun.COM 1878348SEric.Yu@Sun.COM /* 1888348SEric.Yu@Sun.COM * Use the same qlimit as in BSD. BSD checks the qlimit 1898348SEric.Yu@Sun.COM * before queuing the next connection implying that a 1908348SEric.Yu@Sun.COM * listen(sock, 0) allows one connection to be queued. 1918348SEric.Yu@Sun.COM * BSD also uses 1.5 times the requested backlog. 1928348SEric.Yu@Sun.COM * 1938348SEric.Yu@Sun.COM * XNS Issue 4 required a strict interpretation of the backlog. 1948348SEric.Yu@Sun.COM * This has been waived subsequently for Issue 4 and the change 1958348SEric.Yu@Sun.COM * incorporated in XNS Issue 5. So we aren't required to do 1968348SEric.Yu@Sun.COM * anything special for XPG apps. 1978348SEric.Yu@Sun.COM */ 1988348SEric.Yu@Sun.COM if (backlog >= (INT_MAX - 1) / 3) 1998348SEric.Yu@Sun.COM backlog = INT_MAX; 2008348SEric.Yu@Sun.COM else 2018348SEric.Yu@Sun.COM backlog = backlog * 3 / 2 + 1; 2028348SEric.Yu@Sun.COM 2038348SEric.Yu@Sun.COM return (SOP_LISTEN(so, backlog, cr)); 2048348SEric.Yu@Sun.COM } 2058348SEric.Yu@Sun.COM 2068348SEric.Yu@Sun.COM /* 2078348SEric.Yu@Sun.COM * Accept incoming connection. 2088348SEric.Yu@Sun.COM */ 2098348SEric.Yu@Sun.COM int 2108348SEric.Yu@Sun.COM socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop) 2118348SEric.Yu@Sun.COM { 2128348SEric.Yu@Sun.COM return (SOP_ACCEPT(lso, fflag, cr, nsop)); 2138348SEric.Yu@Sun.COM } 2148348SEric.Yu@Sun.COM 2158348SEric.Yu@Sun.COM /* 2168348SEric.Yu@Sun.COM * Active open. 2178348SEric.Yu@Sun.COM */ 2188348SEric.Yu@Sun.COM int 2198348SEric.Yu@Sun.COM socket_connect(struct sonode *so, const struct sockaddr *name, 2208348SEric.Yu@Sun.COM socklen_t namelen, int fflag, int flags, cred_t *cr) 2218348SEric.Yu@Sun.COM { 2228348SEric.Yu@Sun.COM int error; 2238348SEric.Yu@Sun.COM 2248348SEric.Yu@Sun.COM /* 2258348SEric.Yu@Sun.COM * Handle a connect to a name parameter of type AF_UNSPEC like a 2268348SEric.Yu@Sun.COM * connect to a null address. This is the portable method to 2278348SEric.Yu@Sun.COM * unconnect a socket. 2288348SEric.Yu@Sun.COM */ 2298348SEric.Yu@Sun.COM if ((namelen >= sizeof (sa_family_t)) && 2308348SEric.Yu@Sun.COM (name->sa_family == AF_UNSPEC)) { 2318348SEric.Yu@Sun.COM name = NULL; 2328348SEric.Yu@Sun.COM namelen = 0; 2338348SEric.Yu@Sun.COM } 2348348SEric.Yu@Sun.COM 2358348SEric.Yu@Sun.COM error = SOP_CONNECT(so, name, namelen, fflag, flags, cr); 2368348SEric.Yu@Sun.COM 2378348SEric.Yu@Sun.COM if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) { 2388348SEric.Yu@Sun.COM /* 2398348SEric.Yu@Sun.COM * X/Open specification contains a requirement that 2408348SEric.Yu@Sun.COM * ENETUNREACH be returned but does not require 2418348SEric.Yu@Sun.COM * EHOSTUNREACH. In order to keep the test suite 2428348SEric.Yu@Sun.COM * happy we mess with the errno here. 2438348SEric.Yu@Sun.COM */ 2448348SEric.Yu@Sun.COM error = ENETUNREACH; 2458348SEric.Yu@Sun.COM } 2468348SEric.Yu@Sun.COM 2478348SEric.Yu@Sun.COM return (error); 2488348SEric.Yu@Sun.COM } 2498348SEric.Yu@Sun.COM 2508348SEric.Yu@Sun.COM /* 2518348SEric.Yu@Sun.COM * Get address of remote node. 2528348SEric.Yu@Sun.COM */ 2538348SEric.Yu@Sun.COM int 2548348SEric.Yu@Sun.COM socket_getpeername(struct sonode *so, struct sockaddr *addr, 2558348SEric.Yu@Sun.COM socklen_t *addrlen, boolean_t accept, cred_t *cr) 2568348SEric.Yu@Sun.COM { 2578348SEric.Yu@Sun.COM ASSERT(*addrlen > 0); 2588348SEric.Yu@Sun.COM return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 2598348SEric.Yu@Sun.COM 2608348SEric.Yu@Sun.COM } 2618348SEric.Yu@Sun.COM 2628348SEric.Yu@Sun.COM /* 2638348SEric.Yu@Sun.COM * Get local address. 2648348SEric.Yu@Sun.COM */ 2658348SEric.Yu@Sun.COM int 2668348SEric.Yu@Sun.COM socket_getsockname(struct sonode *so, struct sockaddr *addr, 2678348SEric.Yu@Sun.COM socklen_t *addrlen, cred_t *cr) 2688348SEric.Yu@Sun.COM { 2698348SEric.Yu@Sun.COM return (SOP_GETSOCKNAME(so, addr, addrlen, cr)); 2708348SEric.Yu@Sun.COM 2718348SEric.Yu@Sun.COM } 2728348SEric.Yu@Sun.COM 2738348SEric.Yu@Sun.COM /* 2748348SEric.Yu@Sun.COM * Called from shutdown(). 2758348SEric.Yu@Sun.COM */ 2768348SEric.Yu@Sun.COM int 2778348SEric.Yu@Sun.COM socket_shutdown(struct sonode *so, int how, cred_t *cr) 2788348SEric.Yu@Sun.COM { 2798348SEric.Yu@Sun.COM return (SOP_SHUTDOWN(so, how, cr)); 2808348SEric.Yu@Sun.COM } 2818348SEric.Yu@Sun.COM 2828348SEric.Yu@Sun.COM /* 2838348SEric.Yu@Sun.COM * Get socket options. 2848348SEric.Yu@Sun.COM */ 2858348SEric.Yu@Sun.COM /*ARGSUSED*/ 2868348SEric.Yu@Sun.COM int 2878348SEric.Yu@Sun.COM socket_getsockopt(struct sonode *so, int level, int option_name, 2888348SEric.Yu@Sun.COM void *optval, socklen_t *optlenp, int flags, cred_t *cr) 2898348SEric.Yu@Sun.COM { 2908348SEric.Yu@Sun.COM return (SOP_GETSOCKOPT(so, level, option_name, optval, 2918348SEric.Yu@Sun.COM optlenp, flags, cr)); 2928348SEric.Yu@Sun.COM } 2938348SEric.Yu@Sun.COM 2948348SEric.Yu@Sun.COM /* 2958348SEric.Yu@Sun.COM * Set socket options 2968348SEric.Yu@Sun.COM */ 2978348SEric.Yu@Sun.COM int 2988348SEric.Yu@Sun.COM socket_setsockopt(struct sonode *so, int level, int option_name, 2998348SEric.Yu@Sun.COM const void *optval, t_uscalar_t optlen, cred_t *cr) 3008348SEric.Yu@Sun.COM { 3018489Sshenjian int val = 1; 3028348SEric.Yu@Sun.COM /* Caller allocates aligned optval, or passes null */ 3038348SEric.Yu@Sun.COM ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 3048348SEric.Yu@Sun.COM /* If optval is null optlen is 0, and vice-versa */ 3058348SEric.Yu@Sun.COM ASSERT(optval != NULL || optlen == 0); 3068348SEric.Yu@Sun.COM ASSERT(optlen != 0 || optval == NULL); 3078348SEric.Yu@Sun.COM 3088489Sshenjian if (optval == NULL && optlen == 0) 3098489Sshenjian optval = &val; 3108348SEric.Yu@Sun.COM 3118348SEric.Yu@Sun.COM return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 3128348SEric.Yu@Sun.COM } 3138348SEric.Yu@Sun.COM 3148348SEric.Yu@Sun.COM int 3158348SEric.Yu@Sun.COM socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3168348SEric.Yu@Sun.COM cred_t *cr) 3178348SEric.Yu@Sun.COM { 3188348SEric.Yu@Sun.COM int error = 0; 3198348SEric.Yu@Sun.COM ssize_t orig_resid = uiop->uio_resid; 3208348SEric.Yu@Sun.COM 3218348SEric.Yu@Sun.COM /* 3228348SEric.Yu@Sun.COM * Do not bypass the cache if we are doing a local (AF_UNIX) write. 3238348SEric.Yu@Sun.COM */ 3248348SEric.Yu@Sun.COM if (so->so_family == AF_UNIX) 3258348SEric.Yu@Sun.COM uiop->uio_extflg |= UIO_COPY_CACHED; 3268348SEric.Yu@Sun.COM else 3278348SEric.Yu@Sun.COM uiop->uio_extflg &= ~UIO_COPY_CACHED; 3288348SEric.Yu@Sun.COM 3298348SEric.Yu@Sun.COM error = SOP_SENDMSG(so, msg, uiop, cr); 3308348SEric.Yu@Sun.COM switch (error) { 3318348SEric.Yu@Sun.COM default: 3328348SEric.Yu@Sun.COM break; 3338348SEric.Yu@Sun.COM case EINTR: 3348586Sshenjian /* EAGAIN is EWOULDBLOCK */ 3358348SEric.Yu@Sun.COM case EWOULDBLOCK: 3368348SEric.Yu@Sun.COM /* We did a partial send */ 3378348SEric.Yu@Sun.COM if (uiop->uio_resid != orig_resid) 3388348SEric.Yu@Sun.COM error = 0; 3398348SEric.Yu@Sun.COM break; 3408348SEric.Yu@Sun.COM case EPIPE: 3418348SEric.Yu@Sun.COM if ((so->so_mode & SM_KERNEL) == 0) 3428348SEric.Yu@Sun.COM tsignal(curthread, SIGPIPE); 3438348SEric.Yu@Sun.COM break; 3448348SEric.Yu@Sun.COM } 3458348SEric.Yu@Sun.COM 3468348SEric.Yu@Sun.COM return (error); 3478348SEric.Yu@Sun.COM } 3488348SEric.Yu@Sun.COM 3498348SEric.Yu@Sun.COM int 3508348SEric.Yu@Sun.COM socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 3518348SEric.Yu@Sun.COM struct cred *cr, mblk_t **mpp) 3528348SEric.Yu@Sun.COM { 3538348SEric.Yu@Sun.COM int error = 0; 3548348SEric.Yu@Sun.COM 3558348SEric.Yu@Sun.COM error = SOP_SENDMBLK(so, msg, fflag, cr, mpp); 3568348SEric.Yu@Sun.COM if (error == EPIPE) { 3578348SEric.Yu@Sun.COM tsignal(curthread, SIGPIPE); 3588348SEric.Yu@Sun.COM } 3598348SEric.Yu@Sun.COM return (error); 3608348SEric.Yu@Sun.COM } 3618348SEric.Yu@Sun.COM 3628348SEric.Yu@Sun.COM int 3638348SEric.Yu@Sun.COM socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3648348SEric.Yu@Sun.COM cred_t *cr) 3658348SEric.Yu@Sun.COM { 3668348SEric.Yu@Sun.COM int error; 3678348SEric.Yu@Sun.COM ssize_t orig_resid = uiop->uio_resid; 3688348SEric.Yu@Sun.COM 3698348SEric.Yu@Sun.COM /* 3708348SEric.Yu@Sun.COM * Do not bypass the cache when reading data, as the application 3718348SEric.Yu@Sun.COM * is likely to access the data shortly. 3728348SEric.Yu@Sun.COM */ 3738348SEric.Yu@Sun.COM uiop->uio_extflg |= UIO_COPY_CACHED; 3748348SEric.Yu@Sun.COM 3758348SEric.Yu@Sun.COM error = SOP_RECVMSG(so, msg, uiop, cr); 3768348SEric.Yu@Sun.COM 3778348SEric.Yu@Sun.COM switch (error) { 3788348SEric.Yu@Sun.COM case EINTR: 3798586Sshenjian /* EAGAIN is EWOULDBLOCK */ 3808348SEric.Yu@Sun.COM case EWOULDBLOCK: 3818348SEric.Yu@Sun.COM /* We did a partial read */ 3828348SEric.Yu@Sun.COM if (uiop->uio_resid != orig_resid) 3838348SEric.Yu@Sun.COM error = 0; 3848348SEric.Yu@Sun.COM break; 3858348SEric.Yu@Sun.COM default: 3868348SEric.Yu@Sun.COM break; 3878348SEric.Yu@Sun.COM } 3888348SEric.Yu@Sun.COM return (error); 3898348SEric.Yu@Sun.COM } 3908348SEric.Yu@Sun.COM 3918348SEric.Yu@Sun.COM int 3928348SEric.Yu@Sun.COM socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 3938348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 3948348SEric.Yu@Sun.COM { 3958348SEric.Yu@Sun.COM return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 3968348SEric.Yu@Sun.COM } 3978348SEric.Yu@Sun.COM 3988348SEric.Yu@Sun.COM int 3998348SEric.Yu@Sun.COM socket_poll(struct sonode *so, short events, int anyyet, short *reventsp, 4008348SEric.Yu@Sun.COM struct pollhead **phpp) 4018348SEric.Yu@Sun.COM { 4028348SEric.Yu@Sun.COM return (SOP_POLL(so, events, anyyet, reventsp, phpp)); 4038348SEric.Yu@Sun.COM } 4048348SEric.Yu@Sun.COM 4058348SEric.Yu@Sun.COM int 4068348SEric.Yu@Sun.COM socket_close(struct sonode *so, int flag, struct cred *cr) 4078348SEric.Yu@Sun.COM { 4088348SEric.Yu@Sun.COM return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL)); 4098348SEric.Yu@Sun.COM } 4108348SEric.Yu@Sun.COM 4118348SEric.Yu@Sun.COM int 4128348SEric.Yu@Sun.COM socket_close_internal(struct sonode *so, int flag, cred_t *cr) 4138348SEric.Yu@Sun.COM { 4148348SEric.Yu@Sun.COM ASSERT(so->so_count == 0); 4158348SEric.Yu@Sun.COM 4168348SEric.Yu@Sun.COM return (SOP_CLOSE(so, flag, cr)); 4178348SEric.Yu@Sun.COM } 4188348SEric.Yu@Sun.COM 4198348SEric.Yu@Sun.COM void 4208348SEric.Yu@Sun.COM socket_destroy(struct sonode *so) 4218348SEric.Yu@Sun.COM { 4228348SEric.Yu@Sun.COM vn_invalid(SOTOV(so)); 4238348SEric.Yu@Sun.COM VN_RELE(SOTOV(so)); 4248348SEric.Yu@Sun.COM } 4258348SEric.Yu@Sun.COM 4268348SEric.Yu@Sun.COM /* ARGSUSED */ 4278348SEric.Yu@Sun.COM void 4288348SEric.Yu@Sun.COM socket_destroy_internal(struct sonode *so, cred_t *cr) 4298348SEric.Yu@Sun.COM { 4308348SEric.Yu@Sun.COM struct sockparams *sp = so->so_sockparams; 4318348SEric.Yu@Sun.COM ASSERT(so->so_count == 0 && sp != NULL); 4328348SEric.Yu@Sun.COM 4338348SEric.Yu@Sun.COM sp->sp_smod_info->smod_sock_destroy_func(so); 4348348SEric.Yu@Sun.COM 4358348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(sp); 4368348SEric.Yu@Sun.COM } 4378348SEric.Yu@Sun.COM 4388348SEric.Yu@Sun.COM /* 4398348SEric.Yu@Sun.COM * TODO Once the common vnode ops is available, then the vnops argument 4408348SEric.Yu@Sun.COM * should be removed. 4418348SEric.Yu@Sun.COM */ 4428348SEric.Yu@Sun.COM /*ARGSUSED*/ 4438348SEric.Yu@Sun.COM int 4448348SEric.Yu@Sun.COM sonode_constructor(void *buf, void *cdrarg, int kmflags) 4458348SEric.Yu@Sun.COM { 4468348SEric.Yu@Sun.COM struct sonode *so = buf; 4478348SEric.Yu@Sun.COM struct vnode *vp; 4488348SEric.Yu@Sun.COM 4498348SEric.Yu@Sun.COM vp = so->so_vnode = vn_alloc(kmflags); 4508348SEric.Yu@Sun.COM if (vp == NULL) { 4518348SEric.Yu@Sun.COM return (-1); 4528348SEric.Yu@Sun.COM } 4538348SEric.Yu@Sun.COM vp->v_data = so; 4548348SEric.Yu@Sun.COM vn_setops(vp, socket_vnodeops); 4558348SEric.Yu@Sun.COM 4568348SEric.Yu@Sun.COM so->so_priv = NULL; 4578348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 4588348SEric.Yu@Sun.COM 4598348SEric.Yu@Sun.COM so->so_proto_handle = NULL; 4608348SEric.Yu@Sun.COM 4618348SEric.Yu@Sun.COM so->so_peercred = NULL; 4628348SEric.Yu@Sun.COM 4638348SEric.Yu@Sun.COM so->so_rcv_queued = 0; 4648348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 4658348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 4668348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 4678348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 4688348SEric.Yu@Sun.COM so->so_rcv_wanted = 0; 4698348SEric.Yu@Sun.COM so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER; 4708348SEric.Yu@Sun.COM so->so_rcv_timer_tid = 0; 4718348SEric.Yu@Sun.COM so->so_rcv_thresh = 0; 4728348SEric.Yu@Sun.COM 4738348SEric.Yu@Sun.COM so->so_acceptq_head = NULL; 4748348SEric.Yu@Sun.COM so->so_acceptq_tail = &so->so_acceptq_head; 4758348SEric.Yu@Sun.COM so->so_acceptq_next = NULL; 4768348SEric.Yu@Sun.COM so->so_acceptq_len = 0; 4778348SEric.Yu@Sun.COM so->so_backlog = 0; 4788348SEric.Yu@Sun.COM 4798348SEric.Yu@Sun.COM so->so_snd_qfull = B_FALSE; 4808348SEric.Yu@Sun.COM 4818348SEric.Yu@Sun.COM mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 4828348SEric.Yu@Sun.COM mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL); 4838348SEric.Yu@Sun.COM rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL); 4848348SEric.Yu@Sun.COM cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 4858348SEric.Yu@Sun.COM cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 4868348SEric.Yu@Sun.COM 4878348SEric.Yu@Sun.COM cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL); 4888348SEric.Yu@Sun.COM cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL); 4898348SEric.Yu@Sun.COM cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL); 4908348SEric.Yu@Sun.COM cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL); 4918348SEric.Yu@Sun.COM cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL); 4928348SEric.Yu@Sun.COM 4938348SEric.Yu@Sun.COM return (0); 4948348SEric.Yu@Sun.COM } 4958348SEric.Yu@Sun.COM 4968348SEric.Yu@Sun.COM /*ARGSUSED*/ 4978348SEric.Yu@Sun.COM void 4988348SEric.Yu@Sun.COM sonode_destructor(void *buf, void *cdrarg) 4998348SEric.Yu@Sun.COM { 5008348SEric.Yu@Sun.COM struct sonode *so = buf; 5018348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 5028348SEric.Yu@Sun.COM 5038348SEric.Yu@Sun.COM ASSERT(so->so_priv == NULL); 5048348SEric.Yu@Sun.COM ASSERT(so->so_peercred == NULL); 5058348SEric.Yu@Sun.COM 5068348SEric.Yu@Sun.COM ASSERT(so->so_oobmsg == NULL); 5078348SEric.Yu@Sun.COM 5088348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_head == NULL); 5098348SEric.Yu@Sun.COM 5108348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_head == NULL); 5118348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_tail == &so->so_acceptq_head); 5128348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_next == NULL); 5138348SEric.Yu@Sun.COM 5148348SEric.Yu@Sun.COM ASSERT(vp->v_data == so); 5158348SEric.Yu@Sun.COM ASSERT(vn_matchops(vp, socket_vnodeops)); 5168348SEric.Yu@Sun.COM 5178348SEric.Yu@Sun.COM vn_free(vp); 5188348SEric.Yu@Sun.COM 5198348SEric.Yu@Sun.COM mutex_destroy(&so->so_lock); 5208348SEric.Yu@Sun.COM mutex_destroy(&so->so_acceptq_lock); 5218348SEric.Yu@Sun.COM rw_destroy(&so->so_fallback_rwlock); 5228348SEric.Yu@Sun.COM 5238348SEric.Yu@Sun.COM cv_destroy(&so->so_state_cv); 5248348SEric.Yu@Sun.COM cv_destroy(&so->so_want_cv); 5258348SEric.Yu@Sun.COM cv_destroy(&so->so_acceptq_cv); 5268348SEric.Yu@Sun.COM cv_destroy(&so->so_snd_cv); 5278348SEric.Yu@Sun.COM cv_destroy(&so->so_rcv_cv); 5288348SEric.Yu@Sun.COM cv_destroy(&so->so_closing_cv); 5298348SEric.Yu@Sun.COM } 5308348SEric.Yu@Sun.COM 5318348SEric.Yu@Sun.COM void 5328348SEric.Yu@Sun.COM sonode_init(struct sonode *so, struct sockparams *sp, int family, 5338348SEric.Yu@Sun.COM int type, int protocol, sonodeops_t *sops) 5348348SEric.Yu@Sun.COM { 5358348SEric.Yu@Sun.COM vnode_t *vp; 5368348SEric.Yu@Sun.COM 5378348SEric.Yu@Sun.COM vp = SOTOV(so); 5388348SEric.Yu@Sun.COM 5398348SEric.Yu@Sun.COM so->so_flag = 0; 5408348SEric.Yu@Sun.COM 5418348SEric.Yu@Sun.COM so->so_state = 0; 5428348SEric.Yu@Sun.COM so->so_mode = 0; 5438348SEric.Yu@Sun.COM 5448348SEric.Yu@Sun.COM so->so_count = 0; 5458348SEric.Yu@Sun.COM 5468348SEric.Yu@Sun.COM so->so_family = family; 5478348SEric.Yu@Sun.COM so->so_type = type; 5488348SEric.Yu@Sun.COM so->so_protocol = protocol; 5498348SEric.Yu@Sun.COM 5508348SEric.Yu@Sun.COM SOCK_CONNID_INIT(so->so_proto_connid); 5518348SEric.Yu@Sun.COM 5528348SEric.Yu@Sun.COM so->so_options = 0; 5538348SEric.Yu@Sun.COM so->so_linger.l_onoff = 0; 5548348SEric.Yu@Sun.COM so->so_linger.l_linger = 0; 5558348SEric.Yu@Sun.COM so->so_sndbuf = 0; 5568348SEric.Yu@Sun.COM so->so_error = 0; 5578348SEric.Yu@Sun.COM so->so_rcvtimeo = 0; 5588348SEric.Yu@Sun.COM so->so_sndtimeo = 0; 5598465SEric.Yu@Sun.COM so->so_xpg_rcvbuf = 0; 5608348SEric.Yu@Sun.COM 5618348SEric.Yu@Sun.COM ASSERT(so->so_oobmsg == NULL); 5628348SEric.Yu@Sun.COM so->so_oobmark = 0; 5638348SEric.Yu@Sun.COM so->so_pgrp = 0; 5648348SEric.Yu@Sun.COM 5658348SEric.Yu@Sun.COM ASSERT(so->so_peercred == NULL); 5668348SEric.Yu@Sun.COM 5678348SEric.Yu@Sun.COM so->so_zoneid = getzoneid(); 5688348SEric.Yu@Sun.COM 5698348SEric.Yu@Sun.COM so->so_sockparams = sp; 5708348SEric.Yu@Sun.COM 5718348SEric.Yu@Sun.COM so->so_ops = sops; 5728348SEric.Yu@Sun.COM 5738399SRao.Shoaib@Sun.COM so->so_not_str = (sops != &sotpi_sonodeops); 5748399SRao.Shoaib@Sun.COM 5758348SEric.Yu@Sun.COM so->so_proto_handle = NULL; 5768348SEric.Yu@Sun.COM 5778348SEric.Yu@Sun.COM so->so_downcalls = NULL; 5788348SEric.Yu@Sun.COM 5798348SEric.Yu@Sun.COM so->so_copyflag = 0; 5808348SEric.Yu@Sun.COM 5818348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_head == NULL); 5828348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_tail == &so->so_acceptq_head); 5838348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_next == NULL); 5848348SEric.Yu@Sun.COM 5858348SEric.Yu@Sun.COM vn_reinit(vp); 5868348SEric.Yu@Sun.COM vp->v_vfsp = rootvfs; 5878348SEric.Yu@Sun.COM vp->v_type = VSOCK; 5888348SEric.Yu@Sun.COM vp->v_rdev = sockdev; 5898348SEric.Yu@Sun.COM 5908348SEric.Yu@Sun.COM so->so_rcv_queued = 0; 5918348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 5928348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 5938348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 5948348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 5958348SEric.Yu@Sun.COM 5968348SEric.Yu@Sun.COM so->so_snd_qfull = B_FALSE; 5978348SEric.Yu@Sun.COM so->so_minpsz = 0; 5988348SEric.Yu@Sun.COM 5998348SEric.Yu@Sun.COM so->so_rcv_wakeup = B_FALSE; 6008348SEric.Yu@Sun.COM so->so_snd_wakeup = B_FALSE; 6018348SEric.Yu@Sun.COM so->so_flowctrld = B_FALSE; 6028348SEric.Yu@Sun.COM 6038348SEric.Yu@Sun.COM so->so_pollev = 0; 6048348SEric.Yu@Sun.COM bzero(&so->so_poll_list, sizeof (so->so_poll_list)); 6058348SEric.Yu@Sun.COM bzero(&so->so_proto_props, sizeof (struct sock_proto_props)); 6068348SEric.Yu@Sun.COM 6078348SEric.Yu@Sun.COM bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t)); 6088348SEric.Yu@Sun.COM so->so_ksock_cb_arg = NULL; 6098348SEric.Yu@Sun.COM 6108348SEric.Yu@Sun.COM so->so_max_addr_len = sizeof (struct sockaddr_storage); 6118348SEric.Yu@Sun.COM 6128348SEric.Yu@Sun.COM so->so_direct = NULL; 6138348SEric.Yu@Sun.COM 6148348SEric.Yu@Sun.COM vn_exists(vp); 6158348SEric.Yu@Sun.COM } 6168348SEric.Yu@Sun.COM 6178348SEric.Yu@Sun.COM void 6188348SEric.Yu@Sun.COM sonode_fini(struct sonode *so) 6198348SEric.Yu@Sun.COM { 6208348SEric.Yu@Sun.COM mblk_t *mp; 6218348SEric.Yu@Sun.COM vnode_t *vp; 6228348SEric.Yu@Sun.COM 6238348SEric.Yu@Sun.COM ASSERT(so->so_count == 0); 6248348SEric.Yu@Sun.COM 6258348SEric.Yu@Sun.COM if (so->so_rcv_timer_tid) { 6268348SEric.Yu@Sun.COM ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 6278348SEric.Yu@Sun.COM (void) untimeout(so->so_rcv_timer_tid); 6288348SEric.Yu@Sun.COM so->so_rcv_timer_tid = 0; 6298348SEric.Yu@Sun.COM } 6308348SEric.Yu@Sun.COM 6318348SEric.Yu@Sun.COM so_acceptq_flush(so); 6328348SEric.Yu@Sun.COM 6338348SEric.Yu@Sun.COM if ((mp = so->so_oobmsg) != NULL) { 6348348SEric.Yu@Sun.COM freemsg(mp); 6358348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 6368348SEric.Yu@Sun.COM so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 6378348SEric.Yu@Sun.COM SS_RCVATMARK); 6388348SEric.Yu@Sun.COM } 6398348SEric.Yu@Sun.COM 6408348SEric.Yu@Sun.COM if (so->so_poll_list.ph_list != NULL) { 6418348SEric.Yu@Sun.COM pollwakeup(&so->so_poll_list, POLLERR); 6428348SEric.Yu@Sun.COM pollhead_clean(&so->so_poll_list); 6438348SEric.Yu@Sun.COM } 6448348SEric.Yu@Sun.COM 645*9491SAnders.Persson@Sun.COM if (so->so_direct != NULL) 646*9491SAnders.Persson@Sun.COM sod_sock_fini(so); 6478348SEric.Yu@Sun.COM 6488348SEric.Yu@Sun.COM vp = SOTOV(so); 6498348SEric.Yu@Sun.COM vn_invalid(vp); 6508348SEric.Yu@Sun.COM 6518348SEric.Yu@Sun.COM if (so->so_peercred != NULL) { 6528348SEric.Yu@Sun.COM crfree(so->so_peercred); 6538348SEric.Yu@Sun.COM so->so_peercred = NULL; 6548348SEric.Yu@Sun.COM } 6558348SEric.Yu@Sun.COM } 656