10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51548Srshoaib * Common Development and Distribution License (the "License"). 61548Srshoaib * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211548Srshoaib 220Sstevel@tonic-gate /* 23*12643SAnders.Persson@Sun.COM * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #include <sys/types.h> 270Sstevel@tonic-gate #include <sys/t_lock.h> 280Sstevel@tonic-gate #include <sys/param.h> 290Sstevel@tonic-gate #include <sys/systm.h> 300Sstevel@tonic-gate #include <sys/buf.h> 310Sstevel@tonic-gate #include <sys/conf.h> 320Sstevel@tonic-gate #include <sys/cred.h> 330Sstevel@tonic-gate #include <sys/kmem.h> 348348SEric.Yu@Sun.COM #include <sys/kmem_impl.h> 350Sstevel@tonic-gate #include <sys/sysmacros.h> 360Sstevel@tonic-gate #include <sys/vfs.h> 370Sstevel@tonic-gate #include <sys/vnode.h> 380Sstevel@tonic-gate #include <sys/debug.h> 390Sstevel@tonic-gate #include <sys/errno.h> 400Sstevel@tonic-gate #include <sys/time.h> 410Sstevel@tonic-gate #include <sys/file.h> 420Sstevel@tonic-gate #include <sys/open.h> 430Sstevel@tonic-gate #include <sys/user.h> 440Sstevel@tonic-gate #include <sys/termios.h> 450Sstevel@tonic-gate #include <sys/stream.h> 460Sstevel@tonic-gate #include <sys/strsubr.h> 470Sstevel@tonic-gate #include <sys/strsun.h> 488348SEric.Yu@Sun.COM #include <sys/suntpi.h> 490Sstevel@tonic-gate #include <sys/ddi.h> 500Sstevel@tonic-gate #include <sys/esunddi.h> 510Sstevel@tonic-gate #include <sys/flock.h> 520Sstevel@tonic-gate #include <sys/modctl.h> 530Sstevel@tonic-gate #include <sys/vtrace.h> 540Sstevel@tonic-gate #include <sys/cmn_err.h> 550Sstevel@tonic-gate #include <sys/pathname.h> 560Sstevel@tonic-gate 570Sstevel@tonic-gate #include <sys/socket.h> 580Sstevel@tonic-gate #include <sys/socketvar.h> 59741Smasputra #include <sys/sockio.h> 600Sstevel@tonic-gate #include <netinet/in.h> 610Sstevel@tonic-gate #include <sys/un.h> 620Sstevel@tonic-gate #include <sys/strsun.h> 630Sstevel@tonic-gate 640Sstevel@tonic-gate #include <sys/tiuser.h> 650Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 660Sstevel@tonic-gate #include <sys/tihdr.h> 670Sstevel@tonic-gate #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 680Sstevel@tonic-gate 690Sstevel@tonic-gate #include <c2/audit.h> 700Sstevel@tonic-gate 710Sstevel@tonic-gate #include <inet/common.h> 720Sstevel@tonic-gate #include <inet/ip.h> 730Sstevel@tonic-gate #include <inet/ip6.h> 740Sstevel@tonic-gate #include <inet/tcp.h> 75741Smasputra #include <inet/udp_impl.h> 760Sstevel@tonic-gate 771974Sbrutus #include <sys/zone.h> 781974Sbrutus 790Sstevel@tonic-gate #include <fs/sockfs/nl7c.h> 801974Sbrutus #include <fs/sockfs/nl7curi.h> 810Sstevel@tonic-gate 82898Skais #include <inet/kssl/ksslapi.h> 83898Skais 848348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h> 858348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi.h> 868348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi_impl.h> 878348SEric.Yu@Sun.COM 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * Possible failures when memory can't be allocated. The documented behavior: 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * 5.5: 4.X: XNET: 920Sstevel@tonic-gate * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 930Sstevel@tonic-gate * EINTR 940Sstevel@tonic-gate * (4.X does not document EINTR but returns it) 950Sstevel@tonic-gate * bind: ENOSR - ENOBUFS/ENOSR 960Sstevel@tonic-gate * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 970Sstevel@tonic-gate * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 980Sstevel@tonic-gate * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 990Sstevel@tonic-gate * (4.X getpeername and getsockname do not fail in practice) 1000Sstevel@tonic-gate * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 1010Sstevel@tonic-gate * listen: - - ENOBUFS 1020Sstevel@tonic-gate * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 1030Sstevel@tonic-gate * EINTR 1040Sstevel@tonic-gate * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 1050Sstevel@tonic-gate * EINTR 1060Sstevel@tonic-gate * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 1070Sstevel@tonic-gate * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 1080Sstevel@tonic-gate * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 1090Sstevel@tonic-gate * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 1100Sstevel@tonic-gate * 1110Sstevel@tonic-gate * Resolution. When allocation fails: 1120Sstevel@tonic-gate * recv: return EINTR 1130Sstevel@tonic-gate * send: return EINTR 1140Sstevel@tonic-gate * connect, accept: EINTR 1150Sstevel@tonic-gate * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 1160Sstevel@tonic-gate * socket, socketpair: ENOBUFS 1170Sstevel@tonic-gate * getpeername, getsockname: sleep 1180Sstevel@tonic-gate * getsockopt, setsockopt: sleep 1190Sstevel@tonic-gate */ 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate #ifdef SOCK_TEST 1220Sstevel@tonic-gate /* 1230Sstevel@tonic-gate * Variables that make sockfs do something other than the standard TPI 1240Sstevel@tonic-gate * for the AF_INET transports. 1250Sstevel@tonic-gate * 1260Sstevel@tonic-gate * solisten_tpi_tcp: 1270Sstevel@tonic-gate * TCP can handle a O_T_BIND_REQ with an increased backlog even though 1280Sstevel@tonic-gate * the transport is already bound. This is needed to avoid loosing the 1290Sstevel@tonic-gate * port number should listen() do a T_UNBIND_REQ followed by a 1300Sstevel@tonic-gate * O_T_BIND_REQ. 1310Sstevel@tonic-gate * 1320Sstevel@tonic-gate * soconnect_tpi_udp: 1330Sstevel@tonic-gate * UDP and ICMP can handle a T_CONN_REQ. 1340Sstevel@tonic-gate * This is needed to make the sequence of connect(), getsockname() 1350Sstevel@tonic-gate * return the local IP address used to send packets to the connected to 1360Sstevel@tonic-gate * destination. 1370Sstevel@tonic-gate * 1380Sstevel@tonic-gate * soconnect_tpi_tcp: 1390Sstevel@tonic-gate * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 1400Sstevel@tonic-gate * Set this to non-zero to send TPI conformant messages to TCP in this 1410Sstevel@tonic-gate * respect. This is a performance optimization. 1420Sstevel@tonic-gate * 1430Sstevel@tonic-gate * soaccept_tpi_tcp: 1440Sstevel@tonic-gate * TCP can handle a T_CONN_REQ without the acceptor being bound. 1450Sstevel@tonic-gate * This is a performance optimization that has been picked up in XTI. 1460Sstevel@tonic-gate * 1470Sstevel@tonic-gate * soaccept_tpi_multioptions: 1480Sstevel@tonic-gate * When inheriting SOL_SOCKET options from the listener to the accepting 1490Sstevel@tonic-gate * socket send them as a single message for AF_INET{,6}. 1500Sstevel@tonic-gate */ 1510Sstevel@tonic-gate int solisten_tpi_tcp = 0; 1520Sstevel@tonic-gate int soconnect_tpi_udp = 0; 1530Sstevel@tonic-gate int soconnect_tpi_tcp = 0; 1540Sstevel@tonic-gate int soaccept_tpi_tcp = 0; 1550Sstevel@tonic-gate int soaccept_tpi_multioptions = 1; 1560Sstevel@tonic-gate #else /* SOCK_TEST */ 1570Sstevel@tonic-gate #define soconnect_tpi_tcp 0 1580Sstevel@tonic-gate #define soconnect_tpi_udp 0 1590Sstevel@tonic-gate #define solisten_tpi_tcp 0 1600Sstevel@tonic-gate #define soaccept_tpi_tcp 0 1610Sstevel@tonic-gate #define soaccept_tpi_multioptions 1 1620Sstevel@tonic-gate #endif /* SOCK_TEST */ 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate #ifdef SOCK_TEST 1650Sstevel@tonic-gate extern int do_useracc; 1660Sstevel@tonic-gate extern clock_t sock_test_timelimit; 1670Sstevel@tonic-gate #endif /* SOCK_TEST */ 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate /* 1700Sstevel@tonic-gate * Some X/Open added checks might have to be backed out to keep SunOS 4.X 1710Sstevel@tonic-gate * applications working. Turn on this flag to disable these checks. 1720Sstevel@tonic-gate */ 1730Sstevel@tonic-gate int xnet_skip_checks = 0; 1740Sstevel@tonic-gate int xnet_check_print = 0; 1750Sstevel@tonic-gate int xnet_truncate_print = 0; 1760Sstevel@tonic-gate 1778348SEric.Yu@Sun.COM static void sotpi_destroy(struct sonode *); 1788348SEric.Yu@Sun.COM static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 1798348SEric.Yu@Sun.COM int, int *, cred_t *cr); 1808348SEric.Yu@Sun.COM 1818348SEric.Yu@Sun.COM static boolean_t sotpi_info_create(struct sonode *, int); 1828348SEric.Yu@Sun.COM static void sotpi_info_init(struct sonode *); 1838348SEric.Yu@Sun.COM static void sotpi_info_fini(struct sonode *); 1848348SEric.Yu@Sun.COM static void sotpi_info_destroy(struct sonode *); 1858348SEric.Yu@Sun.COM 1868348SEric.Yu@Sun.COM /* 1878348SEric.Yu@Sun.COM * Do direct function call to the transport layer below; this would 1888348SEric.Yu@Sun.COM * also allow the transport to utilize read-side synchronous stream 1898348SEric.Yu@Sun.COM * interface if necessary. This is a /etc/system tunable that must 1908348SEric.Yu@Sun.COM * not be modified on a running system. By default this is enabled 1918348SEric.Yu@Sun.COM * for performance reasons and may be disabled for debugging purposes. 1928348SEric.Yu@Sun.COM */ 1938348SEric.Yu@Sun.COM boolean_t socktpi_direct = B_TRUE; 1948348SEric.Yu@Sun.COM 1958348SEric.Yu@Sun.COM static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 1968348SEric.Yu@Sun.COM 1970Sstevel@tonic-gate extern void sigintr(k_sigset_t *, int); 1980Sstevel@tonic-gate extern void sigunintr(k_sigset_t *); 1990Sstevel@tonic-gate 200898Skais /* Sockets acting as an in-kernel SSL proxy */ 201898Skais extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 202898Skais strsigset_t *, strsigset_t *, strpollset_t *); 203898Skais extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 204898Skais strsigset_t *, strsigset_t *, strpollset_t *); 205898Skais 2060Sstevel@tonic-gate static int sotpi_unbind(struct sonode *, int); 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate /* TPI sockfs sonode operations */ 2098348SEric.Yu@Sun.COM int sotpi_init(struct sonode *, struct sonode *, struct cred *, 2108348SEric.Yu@Sun.COM int); 2118348SEric.Yu@Sun.COM static int sotpi_accept(struct sonode *, int, struct cred *, 2128348SEric.Yu@Sun.COM struct sonode **); 2130Sstevel@tonic-gate static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 2148348SEric.Yu@Sun.COM int, struct cred *); 2158348SEric.Yu@Sun.COM static int sotpi_listen(struct sonode *, int, struct cred *); 216*12643SAnders.Persson@Sun.COM static int sotpi_connect(struct sonode *, struct sockaddr *, 2178348SEric.Yu@Sun.COM socklen_t, int, int, struct cred *); 2188348SEric.Yu@Sun.COM extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 2198348SEric.Yu@Sun.COM struct uio *, struct cred *); 2200Sstevel@tonic-gate static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 2218348SEric.Yu@Sun.COM struct uio *, struct cred *); 2228348SEric.Yu@Sun.COM static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 2238348SEric.Yu@Sun.COM struct cred *, mblk_t **); 224741Smasputra static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 225741Smasputra struct uio *, void *, t_uscalar_t, int); 226741Smasputra static int sodgram_direct(struct sonode *, struct sockaddr *, 227741Smasputra socklen_t, struct uio *, int); 2288348SEric.Yu@Sun.COM extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 2298348SEric.Yu@Sun.COM socklen_t *, boolean_t, struct cred *); 2308348SEric.Yu@Sun.COM static int sotpi_getsockname(struct sonode *, struct sockaddr *, 2318348SEric.Yu@Sun.COM socklen_t *, struct cred *); 2328348SEric.Yu@Sun.COM static int sotpi_shutdown(struct sonode *, int, struct cred *); 2338348SEric.Yu@Sun.COM extern int sotpi_getsockopt(struct sonode *, int, int, void *, 2348348SEric.Yu@Sun.COM socklen_t *, int, struct cred *); 2358348SEric.Yu@Sun.COM extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 2368348SEric.Yu@Sun.COM socklen_t, struct cred *); 2378348SEric.Yu@Sun.COM static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 2388348SEric.Yu@Sun.COM int32_t *); 2398348SEric.Yu@Sun.COM static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 2408348SEric.Yu@Sun.COM struct cred *, int32_t *); 2418348SEric.Yu@Sun.COM static int sotpi_poll(struct sonode *, short, int, short *, 2428348SEric.Yu@Sun.COM struct pollhead **); 2438348SEric.Yu@Sun.COM static int sotpi_close(struct sonode *, int, struct cred *); 2448348SEric.Yu@Sun.COM 2458348SEric.Yu@Sun.COM static int i_sotpi_info_constructor(sotpi_info_t *); 2468348SEric.Yu@Sun.COM static void i_sotpi_info_destructor(sotpi_info_t *); 2470Sstevel@tonic-gate 2480Sstevel@tonic-gate sonodeops_t sotpi_sonodeops = { 2498348SEric.Yu@Sun.COM sotpi_init, /* sop_init */ 2500Sstevel@tonic-gate sotpi_accept, /* sop_accept */ 2510Sstevel@tonic-gate sotpi_bind, /* sop_bind */ 2520Sstevel@tonic-gate sotpi_listen, /* sop_listen */ 2530Sstevel@tonic-gate sotpi_connect, /* sop_connect */ 2540Sstevel@tonic-gate sotpi_recvmsg, /* sop_recvmsg */ 2550Sstevel@tonic-gate sotpi_sendmsg, /* sop_sendmsg */ 2568348SEric.Yu@Sun.COM sotpi_sendmblk, /* sop_sendmblk */ 2570Sstevel@tonic-gate sotpi_getpeername, /* sop_getpeername */ 2580Sstevel@tonic-gate sotpi_getsockname, /* sop_getsockname */ 2590Sstevel@tonic-gate sotpi_shutdown, /* sop_shutdown */ 2600Sstevel@tonic-gate sotpi_getsockopt, /* sop_getsockopt */ 2618348SEric.Yu@Sun.COM sotpi_setsockopt, /* sop_setsockopt */ 2628348SEric.Yu@Sun.COM sotpi_ioctl, /* sop_ioctl */ 2638348SEric.Yu@Sun.COM sotpi_poll, /* sop_poll */ 2648348SEric.Yu@Sun.COM sotpi_close, /* sop_close */ 2650Sstevel@tonic-gate }; 2660Sstevel@tonic-gate 2670Sstevel@tonic-gate /* 2688348SEric.Yu@Sun.COM * Return a TPI socket vnode. 2698348SEric.Yu@Sun.COM * 2708348SEric.Yu@Sun.COM * Note that sockets assume that the driver will clone (either itself 2718348SEric.Yu@Sun.COM * or by using the clone driver) i.e. a socket() call will always 2728348SEric.Yu@Sun.COM * result in a new vnode being created. 2738348SEric.Yu@Sun.COM */ 2748348SEric.Yu@Sun.COM 2758348SEric.Yu@Sun.COM /* 2760Sstevel@tonic-gate * Common create code for socket and accept. If tso is set the values 2770Sstevel@tonic-gate * from that node is used instead of issuing a T_INFO_REQ. 2780Sstevel@tonic-gate */ 2798348SEric.Yu@Sun.COM 2808348SEric.Yu@Sun.COM /* ARGSUSED */ 2818348SEric.Yu@Sun.COM static struct sonode * 2828348SEric.Yu@Sun.COM sotpi_create(struct sockparams *sp, int family, int type, int protocol, 2838348SEric.Yu@Sun.COM int version, int sflags, int *errorp, cred_t *cr) 2840Sstevel@tonic-gate { 2850Sstevel@tonic-gate struct sonode *so; 2868348SEric.Yu@Sun.COM kmem_cache_t *cp; 2878348SEric.Yu@Sun.COM int sfamily = family; 2888348SEric.Yu@Sun.COM 2898348SEric.Yu@Sun.COM ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 2908348SEric.Yu@Sun.COM 2918348SEric.Yu@Sun.COM if (family == AF_NCA) { 2928348SEric.Yu@Sun.COM /* 2938348SEric.Yu@Sun.COM * The request is for an NCA socket so for NL7C use the 2948348SEric.Yu@Sun.COM * INET domain instead and mark NL7C_AF_NCA below. 2958348SEric.Yu@Sun.COM */ 2968348SEric.Yu@Sun.COM family = AF_INET; 2978348SEric.Yu@Sun.COM /* 2988348SEric.Yu@Sun.COM * NL7C is not supported in the non-global zone, 2998348SEric.Yu@Sun.COM * we enforce this restriction here. 3008348SEric.Yu@Sun.COM */ 3018348SEric.Yu@Sun.COM if (getzoneid() != GLOBAL_ZONEID) { 3028348SEric.Yu@Sun.COM *errorp = ENOTSUP; 3038348SEric.Yu@Sun.COM return (NULL); 3048348SEric.Yu@Sun.COM } 3058348SEric.Yu@Sun.COM } 3068348SEric.Yu@Sun.COM 3078348SEric.Yu@Sun.COM /* 3088348SEric.Yu@Sun.COM * to be compatible with old tpi socket implementation ignore 3098348SEric.Yu@Sun.COM * sleep flag (sflags) passed in 3108348SEric.Yu@Sun.COM */ 3118348SEric.Yu@Sun.COM cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 3128348SEric.Yu@Sun.COM so = kmem_cache_alloc(cp, KM_SLEEP); 3138348SEric.Yu@Sun.COM if (so == NULL) { 3148348SEric.Yu@Sun.COM *errorp = ENOMEM; 3158348SEric.Yu@Sun.COM return (NULL); 3168348SEric.Yu@Sun.COM } 3178348SEric.Yu@Sun.COM 3188348SEric.Yu@Sun.COM sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 3198348SEric.Yu@Sun.COM sotpi_info_init(so); 3208348SEric.Yu@Sun.COM 3218348SEric.Yu@Sun.COM if (sfamily == AF_NCA) { 3228348SEric.Yu@Sun.COM SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 3238348SEric.Yu@Sun.COM } 3248348SEric.Yu@Sun.COM 3258348SEric.Yu@Sun.COM if (version == SOV_DEFAULT) 3268348SEric.Yu@Sun.COM version = so_default_version; 3278348SEric.Yu@Sun.COM 3288348SEric.Yu@Sun.COM so->so_version = (short)version; 3298348SEric.Yu@Sun.COM *errorp = 0; 3308348SEric.Yu@Sun.COM 3318348SEric.Yu@Sun.COM return (so); 3328348SEric.Yu@Sun.COM } 3338348SEric.Yu@Sun.COM 3348348SEric.Yu@Sun.COM static void 3358348SEric.Yu@Sun.COM sotpi_destroy(struct sonode *so) 3368348SEric.Yu@Sun.COM { 3378348SEric.Yu@Sun.COM kmem_cache_t *cp; 3388348SEric.Yu@Sun.COM struct sockparams *origsp; 3398348SEric.Yu@Sun.COM 3408348SEric.Yu@Sun.COM /* 3418348SEric.Yu@Sun.COM * If there is a new dealloc function (ie. smod_destroy_func), 3428348SEric.Yu@Sun.COM * then it should check the correctness of the ops. 3438348SEric.Yu@Sun.COM */ 3448348SEric.Yu@Sun.COM 3458348SEric.Yu@Sun.COM ASSERT(so->so_ops == &sotpi_sonodeops); 3468348SEric.Yu@Sun.COM 3478348SEric.Yu@Sun.COM origsp = SOTOTPI(so)->sti_orig_sp; 3488348SEric.Yu@Sun.COM 3498348SEric.Yu@Sun.COM sotpi_info_fini(so); 3508348SEric.Yu@Sun.COM 3518348SEric.Yu@Sun.COM if (so->so_state & SS_FALLBACK_COMP) { 3528348SEric.Yu@Sun.COM /* 3538348SEric.Yu@Sun.COM * A fallback happend, which means that a sotpi_info_t struct 3548348SEric.Yu@Sun.COM * was allocated (as opposed to being allocated from the TPI 3558348SEric.Yu@Sun.COM * sonode cache. Therefore we explicitly free the struct 3568348SEric.Yu@Sun.COM * here. 3578348SEric.Yu@Sun.COM */ 3588348SEric.Yu@Sun.COM sotpi_info_destroy(so); 3598348SEric.Yu@Sun.COM ASSERT(origsp != NULL); 3608348SEric.Yu@Sun.COM 3618348SEric.Yu@Sun.COM origsp->sp_smod_info->smod_sock_destroy_func(so); 3628348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(origsp); 3638348SEric.Yu@Sun.COM } else { 3648348SEric.Yu@Sun.COM sonode_fini(so); 3658348SEric.Yu@Sun.COM cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 3668348SEric.Yu@Sun.COM socktpi_cache; 3678348SEric.Yu@Sun.COM kmem_cache_free(cp, so); 3688348SEric.Yu@Sun.COM } 3698348SEric.Yu@Sun.COM } 3708348SEric.Yu@Sun.COM 3718348SEric.Yu@Sun.COM /* ARGSUSED1 */ 3728348SEric.Yu@Sun.COM int 3738348SEric.Yu@Sun.COM sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 3748348SEric.Yu@Sun.COM { 3758348SEric.Yu@Sun.COM major_t maj; 3768348SEric.Yu@Sun.COM dev_t newdev; 3778348SEric.Yu@Sun.COM struct vnode *vp; 3788348SEric.Yu@Sun.COM int error = 0; 3798348SEric.Yu@Sun.COM struct stdata *stp; 3808348SEric.Yu@Sun.COM 3818348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 3828348SEric.Yu@Sun.COM 3838348SEric.Yu@Sun.COM dprint(1, ("sotpi_init()\n")); 3848348SEric.Yu@Sun.COM 3858348SEric.Yu@Sun.COM /* 3868348SEric.Yu@Sun.COM * over write the sleep flag passed in but that is ok 3878348SEric.Yu@Sun.COM * as tpi socket does not honor sleep flag. 3888348SEric.Yu@Sun.COM */ 3898348SEric.Yu@Sun.COM flags |= FREAD|FWRITE; 3908348SEric.Yu@Sun.COM 3918348SEric.Yu@Sun.COM /* 3928348SEric.Yu@Sun.COM * Record in so_flag that it is a clone. 3938348SEric.Yu@Sun.COM */ 3948348SEric.Yu@Sun.COM if (getmajor(sti->sti_dev) == clone_major) 3958348SEric.Yu@Sun.COM so->so_flag |= SOCLONE; 3968348SEric.Yu@Sun.COM 3978348SEric.Yu@Sun.COM if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 3988348SEric.Yu@Sun.COM (so->so_family == AF_INET || so->so_family == AF_INET6) && 3998348SEric.Yu@Sun.COM (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 4008348SEric.Yu@Sun.COM so->so_protocol == IPPROTO_IP)) { 401741Smasputra /* Tell tcp or udp that it's talking to sockets */ 402741Smasputra flags |= SO_SOCKSTR; 403741Smasputra 404741Smasputra /* 405741Smasputra * Here we indicate to socktpi_open() our attempt to 406741Smasputra * make direct calls between sockfs and transport. 407741Smasputra * The final decision is left to socktpi_open(). 408741Smasputra */ 4098348SEric.Yu@Sun.COM sti->sti_direct = 1; 410741Smasputra 411741Smasputra ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 412741Smasputra if (so->so_type == SOCK_STREAM && tso != NULL) { 4138348SEric.Yu@Sun.COM if (SOTOTPI(tso)->sti_direct) { 414741Smasputra /* 4158348SEric.Yu@Sun.COM * Inherit sti_direct from listener and pass 416741Smasputra * SO_ACCEPTOR open flag to tcp, indicating 417741Smasputra * that this is an accept fast-path instance. 418741Smasputra */ 419741Smasputra flags |= SO_ACCEPTOR; 420741Smasputra } else { 421741Smasputra /* 4228348SEric.Yu@Sun.COM * sti_direct is not set on listener, meaning 423741Smasputra * that the listener has been converted from 424741Smasputra * a socket to a stream. Ensure that the 425741Smasputra * acceptor inherits these settings. 426741Smasputra */ 4278348SEric.Yu@Sun.COM sti->sti_direct = 0; 428741Smasputra flags &= ~SO_SOCKSTR; 429741Smasputra } 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate } 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate /* 4340Sstevel@tonic-gate * Tell local transport that it is talking to sockets. 4350Sstevel@tonic-gate */ 4360Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 4370Sstevel@tonic-gate flags |= SO_SOCKSTR; 4380Sstevel@tonic-gate } 4390Sstevel@tonic-gate 4408348SEric.Yu@Sun.COM vp = SOTOV(so); 4418348SEric.Yu@Sun.COM newdev = vp->v_rdev; 4428348SEric.Yu@Sun.COM maj = getmajor(newdev); 4438348SEric.Yu@Sun.COM ASSERT(STREAMSTAB(maj)); 4448348SEric.Yu@Sun.COM 4458348SEric.Yu@Sun.COM error = stropen(vp, &newdev, flags, cr); 4468348SEric.Yu@Sun.COM 4478348SEric.Yu@Sun.COM stp = vp->v_stream; 4488348SEric.Yu@Sun.COM if (error == 0) { 4498348SEric.Yu@Sun.COM if (so->so_flag & SOCLONE) 4508348SEric.Yu@Sun.COM ASSERT(newdev != vp->v_rdev); 4518348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 4528348SEric.Yu@Sun.COM sti->sti_dev = newdev; 4538348SEric.Yu@Sun.COM vp->v_rdev = newdev; 4548348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 4558348SEric.Yu@Sun.COM 4568348SEric.Yu@Sun.COM if (stp->sd_flag & STRISTTY) { 4578348SEric.Yu@Sun.COM /* 4588348SEric.Yu@Sun.COM * this is a post SVR4 tty driver - a socket can not 4598348SEric.Yu@Sun.COM * be a controlling terminal. Fail the open. 4608348SEric.Yu@Sun.COM */ 4618348SEric.Yu@Sun.COM (void) sotpi_close(so, flags, cr); 4628348SEric.Yu@Sun.COM return (ENOTTY); /* XXX */ 4638348SEric.Yu@Sun.COM } 4648348SEric.Yu@Sun.COM 4658348SEric.Yu@Sun.COM ASSERT(stp->sd_wrq != NULL); 4668348SEric.Yu@Sun.COM sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 4678348SEric.Yu@Sun.COM 4688348SEric.Yu@Sun.COM /* 4698348SEric.Yu@Sun.COM * If caller is interested in doing direct function call 4708348SEric.Yu@Sun.COM * interface to/from transport module, probe the module 4718348SEric.Yu@Sun.COM * directly beneath the streamhead to see if it qualifies. 4728348SEric.Yu@Sun.COM * 4738348SEric.Yu@Sun.COM * We turn off the direct interface when qualifications fail. 4748348SEric.Yu@Sun.COM * In the acceptor case, we simply turn off the sti_direct 4758348SEric.Yu@Sun.COM * flag on the socket. We do the fallback after the accept 4768348SEric.Yu@Sun.COM * has completed, before the new socket is returned to the 4778348SEric.Yu@Sun.COM * application. 4788348SEric.Yu@Sun.COM */ 4798348SEric.Yu@Sun.COM if (sti->sti_direct) { 4808348SEric.Yu@Sun.COM queue_t *tq = stp->sd_wrq->q_next; 4818348SEric.Yu@Sun.COM 4828348SEric.Yu@Sun.COM /* 4838348SEric.Yu@Sun.COM * sti_direct is currently supported and tested 4848348SEric.Yu@Sun.COM * only for tcp/udp; this is the main reason to 4858348SEric.Yu@Sun.COM * have the following assertions. 4868348SEric.Yu@Sun.COM */ 4878348SEric.Yu@Sun.COM ASSERT(so->so_family == AF_INET || 4888348SEric.Yu@Sun.COM so->so_family == AF_INET6); 4898348SEric.Yu@Sun.COM ASSERT(so->so_protocol == IPPROTO_UDP || 4908348SEric.Yu@Sun.COM so->so_protocol == IPPROTO_TCP || 4918348SEric.Yu@Sun.COM so->so_protocol == IPPROTO_IP); 4928348SEric.Yu@Sun.COM ASSERT(so->so_type == SOCK_DGRAM || 4938348SEric.Yu@Sun.COM so->so_type == SOCK_STREAM); 4948348SEric.Yu@Sun.COM 4958348SEric.Yu@Sun.COM /* 4968348SEric.Yu@Sun.COM * Abort direct call interface if the module directly 4978348SEric.Yu@Sun.COM * underneath the stream head is not defined with the 4988348SEric.Yu@Sun.COM * _D_DIRECT flag. This could happen in the tcp or 4998348SEric.Yu@Sun.COM * udp case, when some other module is autopushed 5008348SEric.Yu@Sun.COM * above it, or for some reasons the expected module 5018348SEric.Yu@Sun.COM * isn't purely D_MP (which is the main requirement). 5028348SEric.Yu@Sun.COM */ 5038348SEric.Yu@Sun.COM if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 5048348SEric.Yu@Sun.COM !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 5058348SEric.Yu@Sun.COM int rval; 5068348SEric.Yu@Sun.COM 5078348SEric.Yu@Sun.COM /* Continue on without direct calls */ 5088348SEric.Yu@Sun.COM sti->sti_direct = 0; 5098348SEric.Yu@Sun.COM 5108348SEric.Yu@Sun.COM /* 5118348SEric.Yu@Sun.COM * Cannot issue ioctl on fallback socket since 5128348SEric.Yu@Sun.COM * there is no conn associated with the queue. 5138348SEric.Yu@Sun.COM * The fallback downcall will notify the proto 5148348SEric.Yu@Sun.COM * of the change. 5158348SEric.Yu@Sun.COM */ 5168348SEric.Yu@Sun.COM if (!(flags & SO_ACCEPTOR) && 5178348SEric.Yu@Sun.COM !(flags & SO_FALLBACK)) { 5188348SEric.Yu@Sun.COM if ((error = strioctl(vp, 5198348SEric.Yu@Sun.COM _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 5208348SEric.Yu@Sun.COM cr, &rval)) != 0) { 5218348SEric.Yu@Sun.COM (void) sotpi_close(so, flags, 5228348SEric.Yu@Sun.COM cr); 5238348SEric.Yu@Sun.COM return (error); 5248348SEric.Yu@Sun.COM } 5258348SEric.Yu@Sun.COM } 5268348SEric.Yu@Sun.COM } 5278348SEric.Yu@Sun.COM } 5288348SEric.Yu@Sun.COM 5298348SEric.Yu@Sun.COM if (flags & SO_FALLBACK) { 5308348SEric.Yu@Sun.COM /* 5318348SEric.Yu@Sun.COM * The stream created does not have a conn. 5328348SEric.Yu@Sun.COM * do stream set up after conn has been assigned 5338348SEric.Yu@Sun.COM */ 5348348SEric.Yu@Sun.COM return (error); 5358348SEric.Yu@Sun.COM } 5368348SEric.Yu@Sun.COM if (error = so_strinit(so, tso)) { 5378348SEric.Yu@Sun.COM (void) sotpi_close(so, flags, cr); 5388348SEric.Yu@Sun.COM return (error); 5398348SEric.Yu@Sun.COM } 5408348SEric.Yu@Sun.COM 5418348SEric.Yu@Sun.COM /* Wildcard */ 5428348SEric.Yu@Sun.COM if (so->so_protocol != so->so_sockparams->sp_protocol) { 5438348SEric.Yu@Sun.COM int protocol = so->so_protocol; 5448348SEric.Yu@Sun.COM /* 5458348SEric.Yu@Sun.COM * Issue SO_PROTOTYPE setsockopt. 5468348SEric.Yu@Sun.COM */ 5478348SEric.Yu@Sun.COM error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 5488348SEric.Yu@Sun.COM &protocol, (t_uscalar_t)sizeof (protocol), cr); 5498348SEric.Yu@Sun.COM if (error != 0) { 5508348SEric.Yu@Sun.COM (void) sotpi_close(so, flags, cr); 5518348SEric.Yu@Sun.COM /* 5528348SEric.Yu@Sun.COM * Setsockopt often fails with ENOPROTOOPT but 5538348SEric.Yu@Sun.COM * socket() should fail with 5548348SEric.Yu@Sun.COM * EPROTONOSUPPORT/EPROTOTYPE. 5558348SEric.Yu@Sun.COM */ 5568348SEric.Yu@Sun.COM return (EPROTONOSUPPORT); 5578348SEric.Yu@Sun.COM } 5588348SEric.Yu@Sun.COM } 5598348SEric.Yu@Sun.COM 5608348SEric.Yu@Sun.COM } else { 5618348SEric.Yu@Sun.COM /* 5628348SEric.Yu@Sun.COM * While the same socket can not be reopened (unlike specfs) 5638348SEric.Yu@Sun.COM * the stream head sets STREOPENFAIL when the autopush fails. 5648348SEric.Yu@Sun.COM */ 5658348SEric.Yu@Sun.COM if ((stp != NULL) && 5668348SEric.Yu@Sun.COM (stp->sd_flag & STREOPENFAIL)) { 5678348SEric.Yu@Sun.COM /* 5688348SEric.Yu@Sun.COM * Open failed part way through. 5698348SEric.Yu@Sun.COM */ 5708348SEric.Yu@Sun.COM mutex_enter(&stp->sd_lock); 5718348SEric.Yu@Sun.COM stp->sd_flag &= ~STREOPENFAIL; 5728348SEric.Yu@Sun.COM mutex_exit(&stp->sd_lock); 5738348SEric.Yu@Sun.COM (void) sotpi_close(so, flags, cr); 5748348SEric.Yu@Sun.COM return (error); 5758348SEric.Yu@Sun.COM /*NOTREACHED*/ 5768348SEric.Yu@Sun.COM } 5778348SEric.Yu@Sun.COM ASSERT(stp == NULL); 5788348SEric.Yu@Sun.COM } 5798348SEric.Yu@Sun.COM TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 5808348SEric.Yu@Sun.COM "sockfs open:maj %d vp %p so %p error %d", 5818348SEric.Yu@Sun.COM maj, vp, so, error); 5828348SEric.Yu@Sun.COM return (error); 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate /* 5860Sstevel@tonic-gate * Bind the socket to an unspecified address in sockfs only. 5870Sstevel@tonic-gate * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 5880Sstevel@tonic-gate * required in all cases. 5890Sstevel@tonic-gate */ 5900Sstevel@tonic-gate static void 5910Sstevel@tonic-gate so_automatic_bind(struct sonode *so) 5920Sstevel@tonic-gate { 5938348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 5940Sstevel@tonic-gate ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 5970Sstevel@tonic-gate ASSERT(!(so->so_state & SS_ISBOUND)); 5988348SEric.Yu@Sun.COM ASSERT(sti->sti_unbind_mp); 5998348SEric.Yu@Sun.COM 6008348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 6018348SEric.Yu@Sun.COM bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 6028348SEric.Yu@Sun.COM sti->sti_laddr_sa->sa_family = so->so_family; 6030Sstevel@tonic-gate so->so_state |= SS_ISBOUND; 6040Sstevel@tonic-gate } 6050Sstevel@tonic-gate 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate /* 6080Sstevel@tonic-gate * bind the socket. 6090Sstevel@tonic-gate * 6100Sstevel@tonic-gate * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 6110Sstevel@tonic-gate * are passed in we allow rebinding. Note that for backwards compatibility 6120Sstevel@tonic-gate * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 6130Sstevel@tonic-gate * Thus the rebinding code is currently not executed. 6140Sstevel@tonic-gate * 6150Sstevel@tonic-gate * The constraints for rebinding are: 6160Sstevel@tonic-gate * - it is a SOCK_DGRAM, or 6170Sstevel@tonic-gate * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 6180Sstevel@tonic-gate * and no listen() has been done. 6190Sstevel@tonic-gate * This rebinding code was added based on some language in the XNET book 6200Sstevel@tonic-gate * about not returning EINVAL it the protocol allows rebinding. However, 6210Sstevel@tonic-gate * this language is not present in the Posix socket draft. Thus maybe the 6220Sstevel@tonic-gate * rebinding logic should be deleted from the source. 6230Sstevel@tonic-gate * 6240Sstevel@tonic-gate * A null "name" can be used to unbind the socket if: 6250Sstevel@tonic-gate * - it is a SOCK_DGRAM, or 6260Sstevel@tonic-gate * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 6270Sstevel@tonic-gate * and no listen() has been done. 6280Sstevel@tonic-gate */ 6298348SEric.Yu@Sun.COM /* ARGSUSED */ 6300Sstevel@tonic-gate static int 6310Sstevel@tonic-gate sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 6328348SEric.Yu@Sun.COM socklen_t namelen, int backlog, int flags, struct cred *cr) 6330Sstevel@tonic-gate { 6340Sstevel@tonic-gate struct T_bind_req bind_req; 6350Sstevel@tonic-gate struct T_bind_ack *bind_ack; 6360Sstevel@tonic-gate int error = 0; 6370Sstevel@tonic-gate mblk_t *mp; 6380Sstevel@tonic-gate void *addr; 6390Sstevel@tonic-gate t_uscalar_t addrlen; 6400Sstevel@tonic-gate int unbind_on_err = 1; 6410Sstevel@tonic-gate boolean_t clear_acceptconn_on_err = B_FALSE; 6420Sstevel@tonic-gate boolean_t restore_backlog_on_err = B_FALSE; 6430Sstevel@tonic-gate int save_so_backlog; 6440Sstevel@tonic-gate t_scalar_t PRIM_type = O_T_BIND_REQ; 6450Sstevel@tonic-gate boolean_t tcp_udp_xport; 6460Sstevel@tonic-gate void *nl7c = NULL; 6478348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 6480Sstevel@tonic-gate 6490Sstevel@tonic-gate dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 6507240Srh87107 (void *)so, (void *)name, namelen, backlog, flags, 6515240Snordmark pr_state(so->so_state, so->so_mode))); 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate if (!(flags & _SOBIND_LOCK_HELD)) { 6560Sstevel@tonic-gate mutex_enter(&so->so_lock); 6570Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 6580Sstevel@tonic-gate } else { 6590Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 6600Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate /* 6640Sstevel@tonic-gate * Make sure that there is a preallocated unbind_req message 6650Sstevel@tonic-gate * before binding. This message allocated when the socket is 6660Sstevel@tonic-gate * created but it might be have been consumed. 6670Sstevel@tonic-gate */ 6688348SEric.Yu@Sun.COM if (sti->sti_unbind_mp == NULL) { 6690Sstevel@tonic-gate dprintso(so, 1, ("sobind: allocating unbind_req\n")); 6700Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 6718348SEric.Yu@Sun.COM sti->sti_unbind_mp = 6728778SErik.Nordmark@Sun.COM soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 6738778SErik.Nordmark@Sun.COM cr); 6740Sstevel@tonic-gate } 6750Sstevel@tonic-gate 6760Sstevel@tonic-gate if (flags & _SOBIND_REBIND) { 6770Sstevel@tonic-gate /* 6780Sstevel@tonic-gate * Called from solisten after doing an sotpi_unbind() or 6790Sstevel@tonic-gate * potentially without the unbind (latter for AF_INET{,6}). 6800Sstevel@tonic-gate */ 6810Sstevel@tonic-gate ASSERT(name == NULL && namelen == 0); 6820Sstevel@tonic-gate 6830Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 6848348SEric.Yu@Sun.COM ASSERT(sti->sti_ux_bound_vp); 6858348SEric.Yu@Sun.COM addr = &sti->sti_ux_laddr; 6868348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 6875240Snordmark dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 6885240Snordmark "addr 0x%p, vp %p\n", 6890Sstevel@tonic-gate addrlen, 6907240Srh87107 (void *)((struct so_ux_addr *)addr)->soua_vp, 6918348SEric.Yu@Sun.COM (void *)sti->sti_ux_bound_vp)); 6920Sstevel@tonic-gate } else { 6938348SEric.Yu@Sun.COM addr = sti->sti_laddr_sa; 6948348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_laddr_len; 6950Sstevel@tonic-gate } 6960Sstevel@tonic-gate } else if (flags & _SOBIND_UNSPEC) { 6970Sstevel@tonic-gate ASSERT(name == NULL && namelen == 0); 6980Sstevel@tonic-gate 6990Sstevel@tonic-gate /* 7000Sstevel@tonic-gate * The caller checked SS_ISBOUND but not necessarily 7010Sstevel@tonic-gate * under so_lock 7020Sstevel@tonic-gate */ 7030Sstevel@tonic-gate if (so->so_state & SS_ISBOUND) { 7040Sstevel@tonic-gate /* No error */ 7050Sstevel@tonic-gate goto done; 7060Sstevel@tonic-gate } 7070Sstevel@tonic-gate 7080Sstevel@tonic-gate /* Set an initial local address */ 7090Sstevel@tonic-gate switch (so->so_family) { 7100Sstevel@tonic-gate case AF_UNIX: 7110Sstevel@tonic-gate /* 7120Sstevel@tonic-gate * Use an address with same size as struct sockaddr 7130Sstevel@tonic-gate * just like BSD. 7140Sstevel@tonic-gate */ 7158348SEric.Yu@Sun.COM sti->sti_laddr_len = 7165240Snordmark (socklen_t)sizeof (struct sockaddr); 7178348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 7188348SEric.Yu@Sun.COM bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 7198348SEric.Yu@Sun.COM sti->sti_laddr_sa->sa_family = so->so_family; 7200Sstevel@tonic-gate 7210Sstevel@tonic-gate /* 7220Sstevel@tonic-gate * Pass down an address with the implicit bind 7230Sstevel@tonic-gate * magic number and the rest all zeros. 7240Sstevel@tonic-gate * The transport will return a unique address. 7250Sstevel@tonic-gate */ 7268348SEric.Yu@Sun.COM sti->sti_ux_laddr.soua_vp = NULL; 7278348SEric.Yu@Sun.COM sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 7288348SEric.Yu@Sun.COM addr = &sti->sti_ux_laddr; 7298348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 7300Sstevel@tonic-gate break; 7310Sstevel@tonic-gate 7320Sstevel@tonic-gate case AF_INET: 7330Sstevel@tonic-gate case AF_INET6: 7340Sstevel@tonic-gate /* 7350Sstevel@tonic-gate * An unspecified bind in TPI has a NULL address. 7360Sstevel@tonic-gate * Set the address in sockfs to have the sa_family. 7370Sstevel@tonic-gate */ 7388348SEric.Yu@Sun.COM sti->sti_laddr_len = (so->so_family == AF_INET) ? 7390Sstevel@tonic-gate (socklen_t)sizeof (sin_t) : 7400Sstevel@tonic-gate (socklen_t)sizeof (sin6_t); 7418348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 7428348SEric.Yu@Sun.COM bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 7438348SEric.Yu@Sun.COM sti->sti_laddr_sa->sa_family = so->so_family; 7440Sstevel@tonic-gate addr = NULL; 7450Sstevel@tonic-gate addrlen = 0; 7460Sstevel@tonic-gate break; 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate default: 7490Sstevel@tonic-gate /* 7500Sstevel@tonic-gate * An unspecified bind in TPI has a NULL address. 7510Sstevel@tonic-gate * Set the address in sockfs to be zero length. 7520Sstevel@tonic-gate * 7530Sstevel@tonic-gate * Can not assume there is a sa_family for all 7540Sstevel@tonic-gate * protocol families. For example, AF_X25 does not 7550Sstevel@tonic-gate * have a family field. 7560Sstevel@tonic-gate */ 7578348SEric.Yu@Sun.COM bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 7588348SEric.Yu@Sun.COM sti->sti_laddr_len = 0; /* XXX correct? */ 7590Sstevel@tonic-gate addr = NULL; 7600Sstevel@tonic-gate addrlen = 0; 7610Sstevel@tonic-gate break; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate 7640Sstevel@tonic-gate } else { 7650Sstevel@tonic-gate if (so->so_state & SS_ISBOUND) { 7660Sstevel@tonic-gate /* 7670Sstevel@tonic-gate * If it is ok to rebind the socket, first unbind 7680Sstevel@tonic-gate * with the transport. A rebind to the NULL address 7690Sstevel@tonic-gate * is interpreted as an unbind. 7700Sstevel@tonic-gate * Note that a bind to NULL in BSD does unbind the 7710Sstevel@tonic-gate * socket but it fails with EINVAL. 7720Sstevel@tonic-gate * Note that regular sockets set SOV_SOCKBSD i.e. 7730Sstevel@tonic-gate * _SOBIND_SOCKBSD gets set here hence no type of 7740Sstevel@tonic-gate * socket does currently allow rebinding. 7750Sstevel@tonic-gate * 7760Sstevel@tonic-gate * If the name is NULL just do an unbind. 7770Sstevel@tonic-gate */ 7780Sstevel@tonic-gate if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 7790Sstevel@tonic-gate name != NULL) { 7800Sstevel@tonic-gate error = EINVAL; 7810Sstevel@tonic-gate unbind_on_err = 0; 7820Sstevel@tonic-gate eprintsoline(so, error); 7830Sstevel@tonic-gate goto done; 7840Sstevel@tonic-gate } 7850Sstevel@tonic-gate if ((so->so_mode & SM_CONNREQUIRED) && 7860Sstevel@tonic-gate (so->so_state & SS_CANTREBIND)) { 7870Sstevel@tonic-gate error = EINVAL; 7880Sstevel@tonic-gate unbind_on_err = 0; 7890Sstevel@tonic-gate eprintsoline(so, error); 7900Sstevel@tonic-gate goto done; 7910Sstevel@tonic-gate } 7920Sstevel@tonic-gate error = sotpi_unbind(so, 0); 7930Sstevel@tonic-gate if (error) { 7940Sstevel@tonic-gate eprintsoline(so, error); 7950Sstevel@tonic-gate goto done; 7960Sstevel@tonic-gate } 7970Sstevel@tonic-gate ASSERT(!(so->so_state & SS_ISBOUND)); 7980Sstevel@tonic-gate if (name == NULL) { 7990Sstevel@tonic-gate so->so_state &= 8005240Snordmark ~(SS_ISCONNECTED|SS_ISCONNECTING); 8010Sstevel@tonic-gate goto done; 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate } 8048348SEric.Yu@Sun.COM 8050Sstevel@tonic-gate /* X/Open requires this check */ 8060Sstevel@tonic-gate if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 8070Sstevel@tonic-gate if (xnet_check_print) { 8080Sstevel@tonic-gate printf("sockfs: X/Open bind state check " 8090Sstevel@tonic-gate "caused EINVAL\n"); 8100Sstevel@tonic-gate } 8110Sstevel@tonic-gate error = EINVAL; 8120Sstevel@tonic-gate goto done; 8130Sstevel@tonic-gate } 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate switch (so->so_family) { 8160Sstevel@tonic-gate case AF_UNIX: 8170Sstevel@tonic-gate /* 8180Sstevel@tonic-gate * All AF_UNIX addresses are nul terminated 8190Sstevel@tonic-gate * when copied (copyin_name) in so the minimum 8200Sstevel@tonic-gate * length is 3 bytes. 8210Sstevel@tonic-gate */ 8220Sstevel@tonic-gate if (name == NULL || 8230Sstevel@tonic-gate (ssize_t)namelen <= sizeof (short) + 1) { 8240Sstevel@tonic-gate error = EISDIR; 8250Sstevel@tonic-gate eprintsoline(so, error); 8260Sstevel@tonic-gate goto done; 8270Sstevel@tonic-gate } 8280Sstevel@tonic-gate /* 8290Sstevel@tonic-gate * Verify so_family matches the bound family. 8300Sstevel@tonic-gate * BSD does not check this for AF_UNIX resulting 8310Sstevel@tonic-gate * in funny mknods. 8320Sstevel@tonic-gate */ 8330Sstevel@tonic-gate if (name->sa_family != so->so_family) { 8340Sstevel@tonic-gate error = EAFNOSUPPORT; 8350Sstevel@tonic-gate goto done; 8360Sstevel@tonic-gate } 8370Sstevel@tonic-gate break; 8380Sstevel@tonic-gate case AF_INET: 8390Sstevel@tonic-gate if (name == NULL) { 8400Sstevel@tonic-gate error = EINVAL; 8410Sstevel@tonic-gate eprintsoline(so, error); 8420Sstevel@tonic-gate goto done; 8430Sstevel@tonic-gate } 8440Sstevel@tonic-gate if ((size_t)namelen != sizeof (sin_t)) { 8450Sstevel@tonic-gate error = name->sa_family != so->so_family ? 8460Sstevel@tonic-gate EAFNOSUPPORT : EINVAL; 8470Sstevel@tonic-gate eprintsoline(so, error); 8480Sstevel@tonic-gate goto done; 8490Sstevel@tonic-gate } 8500Sstevel@tonic-gate if ((flags & _SOBIND_XPG4_2) && 8510Sstevel@tonic-gate (name->sa_family != so->so_family)) { 8520Sstevel@tonic-gate /* 8530Sstevel@tonic-gate * This check has to be made for X/Open 8540Sstevel@tonic-gate * sockets however application failures have 8550Sstevel@tonic-gate * been observed when it is applied to 8560Sstevel@tonic-gate * all sockets. 8570Sstevel@tonic-gate */ 8580Sstevel@tonic-gate error = EAFNOSUPPORT; 8590Sstevel@tonic-gate eprintsoline(so, error); 8600Sstevel@tonic-gate goto done; 8610Sstevel@tonic-gate } 8620Sstevel@tonic-gate /* 8630Sstevel@tonic-gate * Force a zero sa_family to match so_family. 8640Sstevel@tonic-gate * 8650Sstevel@tonic-gate * Some programs like inetd(1M) don't set the 8660Sstevel@tonic-gate * family field. Other programs leave 8670Sstevel@tonic-gate * sin_family set to garbage - SunOS 4.X does 8680Sstevel@tonic-gate * not check the family field on a bind. 8690Sstevel@tonic-gate * We use the family field that 8700Sstevel@tonic-gate * was passed in to the socket() call. 8710Sstevel@tonic-gate */ 8720Sstevel@tonic-gate name->sa_family = so->so_family; 8730Sstevel@tonic-gate break; 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate case AF_INET6: { 8760Sstevel@tonic-gate #ifdef DEBUG 8770Sstevel@tonic-gate sin6_t *sin6 = (sin6_t *)name; 8780Sstevel@tonic-gate #endif /* DEBUG */ 8790Sstevel@tonic-gate 8800Sstevel@tonic-gate if (name == NULL) { 8810Sstevel@tonic-gate error = EINVAL; 8820Sstevel@tonic-gate eprintsoline(so, error); 8830Sstevel@tonic-gate goto done; 8840Sstevel@tonic-gate } 8850Sstevel@tonic-gate if ((size_t)namelen != sizeof (sin6_t)) { 8860Sstevel@tonic-gate error = name->sa_family != so->so_family ? 8870Sstevel@tonic-gate EAFNOSUPPORT : EINVAL; 8880Sstevel@tonic-gate eprintsoline(so, error); 8890Sstevel@tonic-gate goto done; 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate if (name->sa_family != so->so_family) { 8920Sstevel@tonic-gate /* 8930Sstevel@tonic-gate * With IPv6 we require the family to match 8940Sstevel@tonic-gate * unlike in IPv4. 8950Sstevel@tonic-gate */ 8960Sstevel@tonic-gate error = EAFNOSUPPORT; 8970Sstevel@tonic-gate eprintsoline(so, error); 8980Sstevel@tonic-gate goto done; 8990Sstevel@tonic-gate } 9000Sstevel@tonic-gate #ifdef DEBUG 9010Sstevel@tonic-gate /* 9020Sstevel@tonic-gate * Verify that apps don't forget to clear 9030Sstevel@tonic-gate * sin6_scope_id etc 9040Sstevel@tonic-gate */ 9050Sstevel@tonic-gate if (sin6->sin6_scope_id != 0 && 9060Sstevel@tonic-gate !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 9071548Srshoaib zcmn_err(getzoneid(), CE_WARN, 9080Sstevel@tonic-gate "bind with uninitialized sin6_scope_id " 9090Sstevel@tonic-gate "(%d) on socket. Pid = %d\n", 9100Sstevel@tonic-gate (int)sin6->sin6_scope_id, 9110Sstevel@tonic-gate (int)curproc->p_pid); 9120Sstevel@tonic-gate } 9130Sstevel@tonic-gate if (sin6->__sin6_src_id != 0) { 9141548Srshoaib zcmn_err(getzoneid(), CE_WARN, 9150Sstevel@tonic-gate "bind with uninitialized __sin6_src_id " 9160Sstevel@tonic-gate "(%d) on socket. Pid = %d\n", 9170Sstevel@tonic-gate (int)sin6->__sin6_src_id, 9180Sstevel@tonic-gate (int)curproc->p_pid); 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate #endif /* DEBUG */ 9210Sstevel@tonic-gate break; 9220Sstevel@tonic-gate } 9230Sstevel@tonic-gate default: 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * Don't do any length or sa_family check to allow 9260Sstevel@tonic-gate * non-sockaddr style addresses. 9270Sstevel@tonic-gate */ 9280Sstevel@tonic-gate if (name == NULL) { 9290Sstevel@tonic-gate error = EINVAL; 9300Sstevel@tonic-gate eprintsoline(so, error); 9310Sstevel@tonic-gate goto done; 9320Sstevel@tonic-gate } 9330Sstevel@tonic-gate break; 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate 9368348SEric.Yu@Sun.COM if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 9370Sstevel@tonic-gate error = ENAMETOOLONG; 9380Sstevel@tonic-gate eprintsoline(so, error); 9390Sstevel@tonic-gate goto done; 9400Sstevel@tonic-gate } 9410Sstevel@tonic-gate /* 9420Sstevel@tonic-gate * Save local address. 9430Sstevel@tonic-gate */ 9448348SEric.Yu@Sun.COM sti->sti_laddr_len = (socklen_t)namelen; 9458348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 9468348SEric.Yu@Sun.COM bcopy(name, sti->sti_laddr_sa, namelen); 9478348SEric.Yu@Sun.COM 9488348SEric.Yu@Sun.COM addr = sti->sti_laddr_sa; 9498348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_laddr_len; 9500Sstevel@tonic-gate switch (so->so_family) { 9510Sstevel@tonic-gate case AF_INET6: 9520Sstevel@tonic-gate case AF_INET: 9530Sstevel@tonic-gate break; 9540Sstevel@tonic-gate case AF_UNIX: { 9550Sstevel@tonic-gate struct sockaddr_un *soun = 9568348SEric.Yu@Sun.COM (struct sockaddr_un *)sti->sti_laddr_sa; 9578032SRic.Aleshire@Sun.COM struct vnode *vp, *rvp; 9580Sstevel@tonic-gate struct vattr vattr; 9590Sstevel@tonic-gate 9608348SEric.Yu@Sun.COM ASSERT(sti->sti_ux_bound_vp == NULL); 9610Sstevel@tonic-gate /* 9620Sstevel@tonic-gate * Create vnode for the specified path name. 9638348SEric.Yu@Sun.COM * Keep vnode held with a reference in sti_ux_bound_vp. 9640Sstevel@tonic-gate * Use the vnode pointer as the address used in the 9650Sstevel@tonic-gate * bind with the transport. 9660Sstevel@tonic-gate * 9670Sstevel@tonic-gate * Use the same mode as in BSD. In particular this does 9680Sstevel@tonic-gate * not observe the umask. 9690Sstevel@tonic-gate */ 9700Sstevel@tonic-gate /* MAXPATHLEN + soun_family + nul termination */ 9718348SEric.Yu@Sun.COM if (sti->sti_laddr_len > 9720Sstevel@tonic-gate (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 9730Sstevel@tonic-gate error = ENAMETOOLONG; 9740Sstevel@tonic-gate eprintsoline(so, error); 9750Sstevel@tonic-gate goto done; 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate vattr.va_type = VSOCK; 9783446Smrj vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 9790Sstevel@tonic-gate vattr.va_mask = AT_TYPE|AT_MODE; 9800Sstevel@tonic-gate /* NOTE: holding so_lock */ 9810Sstevel@tonic-gate error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 9825240Snordmark EXCL, 0, &vp, CRMKNOD, 0, 0); 9830Sstevel@tonic-gate if (error) { 9840Sstevel@tonic-gate if (error == EEXIST) 9850Sstevel@tonic-gate error = EADDRINUSE; 9860Sstevel@tonic-gate eprintsoline(so, error); 9870Sstevel@tonic-gate goto done; 9880Sstevel@tonic-gate } 9890Sstevel@tonic-gate /* 9900Sstevel@tonic-gate * Establish pointer from the underlying filesystem 9910Sstevel@tonic-gate * vnode to the socket node. 9928348SEric.Yu@Sun.COM * sti_ux_bound_vp and v_stream->sd_vnode form the 9930Sstevel@tonic-gate * cross-linkage between the underlying filesystem 9940Sstevel@tonic-gate * node and the socket node. 9950Sstevel@tonic-gate */ 9968032SRic.Aleshire@Sun.COM 9978032SRic.Aleshire@Sun.COM if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 9988032SRic.Aleshire@Sun.COM VN_HOLD(rvp); 9998032SRic.Aleshire@Sun.COM VN_RELE(vp); 10008032SRic.Aleshire@Sun.COM vp = rvp; 10018032SRic.Aleshire@Sun.COM } 10028032SRic.Aleshire@Sun.COM 10030Sstevel@tonic-gate ASSERT(SOTOV(so)->v_stream); 10040Sstevel@tonic-gate mutex_enter(&vp->v_lock); 10050Sstevel@tonic-gate vp->v_stream = SOTOV(so)->v_stream; 10068348SEric.Yu@Sun.COM sti->sti_ux_bound_vp = vp; 10070Sstevel@tonic-gate mutex_exit(&vp->v_lock); 10080Sstevel@tonic-gate 10090Sstevel@tonic-gate /* 10100Sstevel@tonic-gate * Use the vnode pointer value as a unique address 10110Sstevel@tonic-gate * (together with the magic number to avoid conflicts 10120Sstevel@tonic-gate * with implicit binds) in the transport provider. 10130Sstevel@tonic-gate */ 10148348SEric.Yu@Sun.COM sti->sti_ux_laddr.soua_vp = 10158348SEric.Yu@Sun.COM (void *)sti->sti_ux_bound_vp; 10168348SEric.Yu@Sun.COM sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 10178348SEric.Yu@Sun.COM addr = &sti->sti_ux_laddr; 10188348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 10190Sstevel@tonic-gate dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 10200Sstevel@tonic-gate addrlen, 10218348SEric.Yu@Sun.COM (void *)((struct so_ux_addr *)addr)->soua_vp)); 10220Sstevel@tonic-gate break; 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate } /* end switch (so->so_family) */ 10250Sstevel@tonic-gate } 10260Sstevel@tonic-gate 10270Sstevel@tonic-gate /* 10280Sstevel@tonic-gate * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 10290Sstevel@tonic-gate * the transport can start passing up T_CONN_IND messages 10300Sstevel@tonic-gate * as soon as it receives the bind req and strsock_proto() 10310Sstevel@tonic-gate * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 10320Sstevel@tonic-gate */ 10330Sstevel@tonic-gate if (flags & _SOBIND_LISTEN) { 10340Sstevel@tonic-gate if ((so->so_state & SS_ACCEPTCONN) == 0) 10350Sstevel@tonic-gate clear_acceptconn_on_err = B_TRUE; 10360Sstevel@tonic-gate save_so_backlog = so->so_backlog; 10370Sstevel@tonic-gate restore_backlog_on_err = B_TRUE; 10380Sstevel@tonic-gate so->so_state |= SS_ACCEPTCONN; 10390Sstevel@tonic-gate so->so_backlog = backlog; 10400Sstevel@tonic-gate } 10410Sstevel@tonic-gate 10420Sstevel@tonic-gate /* 10430Sstevel@tonic-gate * If NL7C addr(s) have been configured check for addr/port match, 10440Sstevel@tonic-gate * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 10450Sstevel@tonic-gate * 10460Sstevel@tonic-gate * NL7C supports the TCP transport only so check AF_INET and AF_INET6 10470Sstevel@tonic-gate * family sockets only. If match mark as such. 10480Sstevel@tonic-gate */ 10491974Sbrutus if (nl7c_enabled && ((addr != NULL && 10500Sstevel@tonic-gate (so->so_family == AF_INET || so->so_family == AF_INET6) && 10510Sstevel@tonic-gate (nl7c = nl7c_lookup_addr(addr, addrlen))) || 10528348SEric.Yu@Sun.COM sti->sti_nl7c_flags == NL7C_AF_NCA)) { 10530Sstevel@tonic-gate /* 10540Sstevel@tonic-gate * NL7C is not supported in non-global zones, 10550Sstevel@tonic-gate * we enforce this restriction here. 10560Sstevel@tonic-gate */ 10570Sstevel@tonic-gate if (so->so_zoneid == GLOBAL_ZONEID) { 10580Sstevel@tonic-gate /* An NL7C socket, mark it */ 10598348SEric.Yu@Sun.COM sti->sti_nl7c_flags |= NL7C_ENABLED; 10601974Sbrutus if (nl7c == NULL) { 10611974Sbrutus /* 10621974Sbrutus * Was an AF_NCA bind() so add it to the 10631974Sbrutus * addr list for reporting purposes. 10641974Sbrutus */ 10651974Sbrutus nl7c = nl7c_add_addr(addr, addrlen); 10661974Sbrutus } 10670Sstevel@tonic-gate } else 10680Sstevel@tonic-gate nl7c = NULL; 10690Sstevel@tonic-gate } 10708348SEric.Yu@Sun.COM 10710Sstevel@tonic-gate /* 10720Sstevel@tonic-gate * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 10730Sstevel@tonic-gate * for other transports we will send in a O_T_BIND_REQ. 10740Sstevel@tonic-gate */ 10750Sstevel@tonic-gate if (tcp_udp_xport && 10760Sstevel@tonic-gate (so->so_family == AF_INET || so->so_family == AF_INET6)) 10770Sstevel@tonic-gate PRIM_type = T_BIND_REQ; 10780Sstevel@tonic-gate 10790Sstevel@tonic-gate bind_req.PRIM_type = PRIM_type; 10800Sstevel@tonic-gate bind_req.ADDR_length = addrlen; 10810Sstevel@tonic-gate bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 10820Sstevel@tonic-gate bind_req.CONIND_number = backlog; 10830Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 10840Sstevel@tonic-gate mp = soallocproto2(&bind_req, sizeof (bind_req), 10858778SErik.Nordmark@Sun.COM addr, addrlen, 0, _ALLOC_SLEEP, cr); 10868348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 10878348SEric.Yu@Sun.COM 10888348SEric.Yu@Sun.COM /* Done using sti_laddr_sa - can drop the lock */ 10890Sstevel@tonic-gate mutex_exit(&so->so_lock); 10900Sstevel@tonic-gate 1091898Skais /* 1092898Skais * Intercept the bind_req message here to check if this <address/port> 1093898Skais * was configured as an SSL proxy server, or if another endpoint was 1094898Skais * already configured to act as a proxy for us. 10951974Sbrutus * 10961974Sbrutus * Note, only if NL7C not enabled for this socket. 1097898Skais */ 10981974Sbrutus if (nl7c == NULL && 10991974Sbrutus (so->so_family == AF_INET || so->so_family == AF_INET6) && 1100898Skais so->so_type == SOCK_STREAM) { 1101898Skais 11028348SEric.Yu@Sun.COM if (sti->sti_kssl_ent != NULL) { 11038348SEric.Yu@Sun.COM kssl_release_ent(sti->sti_kssl_ent, so, 11048348SEric.Yu@Sun.COM sti->sti_kssl_type); 11058348SEric.Yu@Sun.COM sti->sti_kssl_ent = NULL; 1106898Skais } 1107898Skais 11088348SEric.Yu@Sun.COM sti->sti_kssl_type = kssl_check_proxy(mp, so, 11098348SEric.Yu@Sun.COM &sti->sti_kssl_ent); 11108348SEric.Yu@Sun.COM switch (sti->sti_kssl_type) { 1111898Skais case KSSL_NO_PROXY: 1112898Skais break; 1113898Skais 1114898Skais case KSSL_HAS_PROXY: 1115898Skais mutex_enter(&so->so_lock); 1116898Skais goto skip_transport; 1117898Skais 1118898Skais case KSSL_IS_PROXY: 1119898Skais break; 1120898Skais } 1121898Skais } 1122898Skais 11230Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 11245240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 11250Sstevel@tonic-gate if (error) { 11260Sstevel@tonic-gate eprintsoline(so, error); 11270Sstevel@tonic-gate mutex_enter(&so->so_lock); 11280Sstevel@tonic-gate goto done; 11290Sstevel@tonic-gate } 11300Sstevel@tonic-gate 11310Sstevel@tonic-gate mutex_enter(&so->so_lock); 11320Sstevel@tonic-gate error = sowaitprim(so, PRIM_type, T_BIND_ACK, 11330Sstevel@tonic-gate (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 11340Sstevel@tonic-gate if (error) { 11350Sstevel@tonic-gate eprintsoline(so, error); 11360Sstevel@tonic-gate goto done; 11370Sstevel@tonic-gate } 1138898Skais skip_transport: 11390Sstevel@tonic-gate ASSERT(mp); 11400Sstevel@tonic-gate /* 11410Sstevel@tonic-gate * Even if some TPI message (e.g. T_DISCON_IND) was received in 11420Sstevel@tonic-gate * strsock_proto while the lock was dropped above, the bind 11430Sstevel@tonic-gate * is allowed to complete. 11440Sstevel@tonic-gate */ 11450Sstevel@tonic-gate 11460Sstevel@tonic-gate /* Mark as bound. This will be undone if we detect errors below. */ 11470Sstevel@tonic-gate if (flags & _SOBIND_NOXLATE) { 11480Sstevel@tonic-gate ASSERT(so->so_family == AF_UNIX); 11498348SEric.Yu@Sun.COM sti->sti_faddr_noxlate = 1; 11500Sstevel@tonic-gate } 11510Sstevel@tonic-gate ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 11520Sstevel@tonic-gate so->so_state |= SS_ISBOUND; 11538348SEric.Yu@Sun.COM ASSERT(sti->sti_unbind_mp); 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate /* note that we've already set SS_ACCEPTCONN above */ 11560Sstevel@tonic-gate 11570Sstevel@tonic-gate /* 11580Sstevel@tonic-gate * Recompute addrlen - an unspecied bind sent down an 11590Sstevel@tonic-gate * address of length zero but we expect the appropriate length 11600Sstevel@tonic-gate * in return. 11610Sstevel@tonic-gate */ 11620Sstevel@tonic-gate addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 11638348SEric.Yu@Sun.COM sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 11640Sstevel@tonic-gate 11650Sstevel@tonic-gate bind_ack = (struct T_bind_ack *)mp->b_rptr; 11660Sstevel@tonic-gate /* 11670Sstevel@tonic-gate * The alignment restriction is really too strict but 11680Sstevel@tonic-gate * we want enough alignment to inspect the fields of 11690Sstevel@tonic-gate * a sockaddr_in. 11700Sstevel@tonic-gate */ 11710Sstevel@tonic-gate addr = sogetoff(mp, bind_ack->ADDR_offset, 11725240Snordmark bind_ack->ADDR_length, 11735240Snordmark __TPI_ALIGN_SIZE); 11740Sstevel@tonic-gate if (addr == NULL) { 11750Sstevel@tonic-gate freemsg(mp); 11760Sstevel@tonic-gate error = EPROTO; 11770Sstevel@tonic-gate eprintsoline(so, error); 11780Sstevel@tonic-gate goto done; 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate if (!(flags & _SOBIND_UNSPEC)) { 11810Sstevel@tonic-gate /* 11820Sstevel@tonic-gate * Verify that the transport didn't return something we 11830Sstevel@tonic-gate * did not want e.g. an address other than what we asked for. 11840Sstevel@tonic-gate * 11850Sstevel@tonic-gate * NOTE: These checks would go away if/when we switch to 11860Sstevel@tonic-gate * using the new TPI (in which the transport would fail 11870Sstevel@tonic-gate * the request instead of assigning a different address). 11880Sstevel@tonic-gate * 11890Sstevel@tonic-gate * NOTE2: For protocols that we don't know (i.e. any 11900Sstevel@tonic-gate * other than AF_INET6, AF_INET and AF_UNIX), we 11910Sstevel@tonic-gate * cannot know if the transport should be expected to 11920Sstevel@tonic-gate * return the same address as that requested. 11930Sstevel@tonic-gate * 11940Sstevel@tonic-gate * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 11950Sstevel@tonic-gate * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 11960Sstevel@tonic-gate * 11970Sstevel@tonic-gate * For example, in the case of netatalk it may be 11980Sstevel@tonic-gate * inappropriate for the transport to return the 11990Sstevel@tonic-gate * requested address (as it may have allocated a local 12000Sstevel@tonic-gate * port number in behaviour similar to that of an 12010Sstevel@tonic-gate * AF_INET bind request with a port number of zero). 12020Sstevel@tonic-gate * 12030Sstevel@tonic-gate * Given the definition of O_T_BIND_REQ, where the 12040Sstevel@tonic-gate * transport may bind to an address other than the 12050Sstevel@tonic-gate * requested address, it's not possible to determine 12060Sstevel@tonic-gate * whether a returned address that differs from the 12070Sstevel@tonic-gate * requested address is a reason to fail (because the 12080Sstevel@tonic-gate * requested address was not available) or succeed 12090Sstevel@tonic-gate * (because the transport allocated an appropriate 12100Sstevel@tonic-gate * address and/or port). 12110Sstevel@tonic-gate * 12120Sstevel@tonic-gate * sockfs currently requires that the transport return 12130Sstevel@tonic-gate * the requested address in the T_BIND_ACK, unless 12140Sstevel@tonic-gate * there is code here to allow for any discrepancy. 12150Sstevel@tonic-gate * Such code exists for AF_INET and AF_INET6. 12160Sstevel@tonic-gate * 12170Sstevel@tonic-gate * Netatalk chooses to return the requested address 12180Sstevel@tonic-gate * rather than the (correct) allocated address. This 12190Sstevel@tonic-gate * means that netatalk violates the TPI specification 12200Sstevel@tonic-gate * (and would not function correctly if used from a 12210Sstevel@tonic-gate * TLI application), but it does mean that it works 12220Sstevel@tonic-gate * with sockfs. 12230Sstevel@tonic-gate * 12240Sstevel@tonic-gate * As noted above, using the newer XTI bind primitive 12250Sstevel@tonic-gate * (T_BIND_REQ) in preference to O_T_BIND_REQ would 12260Sstevel@tonic-gate * allow sockfs to be more sure about whether or not 12270Sstevel@tonic-gate * the bind request had succeeded (as transports are 12280Sstevel@tonic-gate * not permitted to bind to a different address than 12290Sstevel@tonic-gate * that requested - they must return failure). 12300Sstevel@tonic-gate * Unfortunately, support for T_BIND_REQ may not be 12310Sstevel@tonic-gate * present in all transport implementations (netatalk, 12320Sstevel@tonic-gate * for example, doesn't have it), making the 12330Sstevel@tonic-gate * transition difficult. 12340Sstevel@tonic-gate */ 12350Sstevel@tonic-gate if (bind_ack->ADDR_length != addrlen) { 12360Sstevel@tonic-gate /* Assumes that the requested address was in use */ 12370Sstevel@tonic-gate freemsg(mp); 12380Sstevel@tonic-gate error = EADDRINUSE; 12390Sstevel@tonic-gate eprintsoline(so, error); 12400Sstevel@tonic-gate goto done; 12410Sstevel@tonic-gate } 12420Sstevel@tonic-gate 12430Sstevel@tonic-gate switch (so->so_family) { 12440Sstevel@tonic-gate case AF_INET6: 12450Sstevel@tonic-gate case AF_INET: { 12460Sstevel@tonic-gate sin_t *rname, *aname; 12470Sstevel@tonic-gate 12480Sstevel@tonic-gate rname = (sin_t *)addr; 12498348SEric.Yu@Sun.COM aname = (sin_t *)sti->sti_laddr_sa; 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate /* 12520Sstevel@tonic-gate * Take advantage of the alignment 12530Sstevel@tonic-gate * of sin_port and sin6_port which fall 12540Sstevel@tonic-gate * in the same place in their data structures. 12550Sstevel@tonic-gate * Just use sin_port for either address family. 12560Sstevel@tonic-gate * 12570Sstevel@tonic-gate * This may become a problem if (heaven forbid) 12580Sstevel@tonic-gate * there's a separate ipv6port_reserved... :-P 12590Sstevel@tonic-gate * 12600Sstevel@tonic-gate * Binding to port 0 has the semantics of letting 12610Sstevel@tonic-gate * the transport bind to any port. 12620Sstevel@tonic-gate * 12630Sstevel@tonic-gate * If the transport is TCP or UDP since we had sent 12640Sstevel@tonic-gate * a T_BIND_REQ we would not get a port other than 12650Sstevel@tonic-gate * what we asked for. 12660Sstevel@tonic-gate */ 12670Sstevel@tonic-gate if (tcp_udp_xport) { 12680Sstevel@tonic-gate /* 12690Sstevel@tonic-gate * Pick up the new port number if we bound to 12700Sstevel@tonic-gate * port 0. 12710Sstevel@tonic-gate */ 12720Sstevel@tonic-gate if (aname->sin_port == 0) 12730Sstevel@tonic-gate aname->sin_port = rname->sin_port; 12748348SEric.Yu@Sun.COM sti->sti_laddr_valid = 1; 12750Sstevel@tonic-gate break; 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate if (aname->sin_port != 0 && 12780Sstevel@tonic-gate aname->sin_port != rname->sin_port) { 12790Sstevel@tonic-gate freemsg(mp); 12800Sstevel@tonic-gate error = EADDRINUSE; 12810Sstevel@tonic-gate eprintsoline(so, error); 12820Sstevel@tonic-gate goto done; 12830Sstevel@tonic-gate } 12840Sstevel@tonic-gate /* 12850Sstevel@tonic-gate * Pick up the new port number if we bound to port 0. 12860Sstevel@tonic-gate */ 12870Sstevel@tonic-gate aname->sin_port = rname->sin_port; 12880Sstevel@tonic-gate 12890Sstevel@tonic-gate /* 12900Sstevel@tonic-gate * Unfortunately, addresses aren't _quite_ the same. 12910Sstevel@tonic-gate */ 12920Sstevel@tonic-gate if (so->so_family == AF_INET) { 12930Sstevel@tonic-gate if (aname->sin_addr.s_addr != 12940Sstevel@tonic-gate rname->sin_addr.s_addr) { 12950Sstevel@tonic-gate freemsg(mp); 12960Sstevel@tonic-gate error = EADDRNOTAVAIL; 12970Sstevel@tonic-gate eprintsoline(so, error); 12980Sstevel@tonic-gate goto done; 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate } else { 13010Sstevel@tonic-gate sin6_t *rname6 = (sin6_t *)rname; 13020Sstevel@tonic-gate sin6_t *aname6 = (sin6_t *)aname; 13030Sstevel@tonic-gate 13040Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 13050Sstevel@tonic-gate &rname6->sin6_addr)) { 13060Sstevel@tonic-gate freemsg(mp); 13070Sstevel@tonic-gate error = EADDRNOTAVAIL; 13080Sstevel@tonic-gate eprintsoline(so, error); 13090Sstevel@tonic-gate goto done; 13100Sstevel@tonic-gate } 13110Sstevel@tonic-gate } 13120Sstevel@tonic-gate break; 13130Sstevel@tonic-gate } 13140Sstevel@tonic-gate case AF_UNIX: 13158348SEric.Yu@Sun.COM if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 13160Sstevel@tonic-gate freemsg(mp); 13170Sstevel@tonic-gate error = EADDRINUSE; 13180Sstevel@tonic-gate eprintsoline(so, error); 13190Sstevel@tonic-gate eprintso(so, 13205240Snordmark ("addrlen %d, addr 0x%x, vp %p\n", 13215240Snordmark addrlen, *((int *)addr), 13228348SEric.Yu@Sun.COM (void *)sti->sti_ux_bound_vp)); 13230Sstevel@tonic-gate goto done; 13240Sstevel@tonic-gate } 13258348SEric.Yu@Sun.COM sti->sti_laddr_valid = 1; 13260Sstevel@tonic-gate break; 13270Sstevel@tonic-gate default: 13280Sstevel@tonic-gate /* 13290Sstevel@tonic-gate * NOTE: This assumes that addresses can be 13300Sstevel@tonic-gate * byte-compared for equivalence. 13310Sstevel@tonic-gate */ 13328348SEric.Yu@Sun.COM if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 13330Sstevel@tonic-gate freemsg(mp); 13340Sstevel@tonic-gate error = EADDRINUSE; 13350Sstevel@tonic-gate eprintsoline(so, error); 13360Sstevel@tonic-gate goto done; 13370Sstevel@tonic-gate } 13380Sstevel@tonic-gate /* 13398348SEric.Yu@Sun.COM * Don't mark sti_laddr_valid, as we cannot be 13400Sstevel@tonic-gate * sure that the returned address is the real 13410Sstevel@tonic-gate * bound address when talking to an unknown 13420Sstevel@tonic-gate * transport. 13430Sstevel@tonic-gate */ 13440Sstevel@tonic-gate break; 13450Sstevel@tonic-gate } 13460Sstevel@tonic-gate } else { 13470Sstevel@tonic-gate /* 13480Sstevel@tonic-gate * Save for returned address for getsockname. 13490Sstevel@tonic-gate * Needed for unspecific bind unless transport supports 13500Sstevel@tonic-gate * the TI_GETMYNAME ioctl. 13510Sstevel@tonic-gate * Do this for AF_INET{,6} even though they do, as 13520Sstevel@tonic-gate * caching info here is much better performance than 13530Sstevel@tonic-gate * a TPI/STREAMS trip to the transport for getsockname. 13540Sstevel@tonic-gate * Any which can't for some reason _must_ _not_ set 13558348SEric.Yu@Sun.COM * sti_laddr_valid here for the caching version of 13568348SEric.Yu@Sun.COM * getsockname to not break; 13570Sstevel@tonic-gate */ 13580Sstevel@tonic-gate switch (so->so_family) { 13590Sstevel@tonic-gate case AF_UNIX: 13600Sstevel@tonic-gate /* 13610Sstevel@tonic-gate * Record the address bound with the transport 13620Sstevel@tonic-gate * for use by socketpair. 13630Sstevel@tonic-gate */ 13648348SEric.Yu@Sun.COM bcopy(addr, &sti->sti_ux_laddr, addrlen); 13658348SEric.Yu@Sun.COM sti->sti_laddr_valid = 1; 13660Sstevel@tonic-gate break; 13670Sstevel@tonic-gate case AF_INET: 13680Sstevel@tonic-gate case AF_INET6: 13698348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 13708348SEric.Yu@Sun.COM bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 13718348SEric.Yu@Sun.COM sti->sti_laddr_valid = 1; 13720Sstevel@tonic-gate break; 13730Sstevel@tonic-gate default: 13740Sstevel@tonic-gate /* 13758348SEric.Yu@Sun.COM * Don't mark sti_laddr_valid, as we cannot be 13760Sstevel@tonic-gate * sure that the returned address is the real 13770Sstevel@tonic-gate * bound address when talking to an unknown 13780Sstevel@tonic-gate * transport. 13790Sstevel@tonic-gate */ 13800Sstevel@tonic-gate break; 13810Sstevel@tonic-gate } 13820Sstevel@tonic-gate } 13830Sstevel@tonic-gate 13840Sstevel@tonic-gate if (nl7c != NULL) { 13851974Sbrutus /* Register listen()er sonode pointer with NL7C */ 13861974Sbrutus nl7c_listener_addr(nl7c, so); 13870Sstevel@tonic-gate } 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate freemsg(mp); 13900Sstevel@tonic-gate 13910Sstevel@tonic-gate done: 13920Sstevel@tonic-gate if (error) { 13930Sstevel@tonic-gate /* reset state & backlog to values held on entry */ 13940Sstevel@tonic-gate if (clear_acceptconn_on_err == B_TRUE) 13950Sstevel@tonic-gate so->so_state &= ~SS_ACCEPTCONN; 13960Sstevel@tonic-gate if (restore_backlog_on_err == B_TRUE) 13970Sstevel@tonic-gate so->so_backlog = save_so_backlog; 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate if (unbind_on_err && so->so_state & SS_ISBOUND) { 14000Sstevel@tonic-gate int err; 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate err = sotpi_unbind(so, 0); 14030Sstevel@tonic-gate /* LINTED - statement has no consequent: if */ 14040Sstevel@tonic-gate if (err) { 14050Sstevel@tonic-gate eprintsoline(so, error); 14060Sstevel@tonic-gate } else { 14070Sstevel@tonic-gate ASSERT(!(so->so_state & SS_ISBOUND)); 14080Sstevel@tonic-gate } 14090Sstevel@tonic-gate } 14100Sstevel@tonic-gate } 14110Sstevel@tonic-gate if (!(flags & _SOBIND_LOCK_HELD)) { 14120Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 14130Sstevel@tonic-gate mutex_exit(&so->so_lock); 14140Sstevel@tonic-gate } else { 14150Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 14160Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 14170Sstevel@tonic-gate } 14180Sstevel@tonic-gate return (error); 14190Sstevel@tonic-gate } 14200Sstevel@tonic-gate 14210Sstevel@tonic-gate /* bind the socket */ 1422741Smasputra static int 14230Sstevel@tonic-gate sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 14248348SEric.Yu@Sun.COM int flags, struct cred *cr) 14250Sstevel@tonic-gate { 14260Sstevel@tonic-gate if ((flags & _SOBIND_SOCKETPAIR) == 0) 14278348SEric.Yu@Sun.COM return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 14280Sstevel@tonic-gate 14290Sstevel@tonic-gate flags &= ~_SOBIND_SOCKETPAIR; 14308348SEric.Yu@Sun.COM return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 14310Sstevel@tonic-gate } 14320Sstevel@tonic-gate 14330Sstevel@tonic-gate /* 14340Sstevel@tonic-gate * Unbind a socket - used when bind() fails, when bind() specifies a NULL 14350Sstevel@tonic-gate * address, or when listen needs to unbind and bind. 14360Sstevel@tonic-gate * If the _SOUNBIND_REBIND flag is specified the addresses are retained 14370Sstevel@tonic-gate * so that a sobind can pick them up. 14380Sstevel@tonic-gate */ 14390Sstevel@tonic-gate static int 14400Sstevel@tonic-gate sotpi_unbind(struct sonode *so, int flags) 14410Sstevel@tonic-gate { 14420Sstevel@tonic-gate struct T_unbind_req unbind_req; 14430Sstevel@tonic-gate int error = 0; 14440Sstevel@tonic-gate mblk_t *mp; 14458348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 14460Sstevel@tonic-gate 14470Sstevel@tonic-gate dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 14487240Srh87107 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 14490Sstevel@tonic-gate 14500Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 14510Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate if (!(so->so_state & SS_ISBOUND)) { 14540Sstevel@tonic-gate error = EINVAL; 14550Sstevel@tonic-gate eprintsoline(so, error); 14560Sstevel@tonic-gate goto done; 14570Sstevel@tonic-gate } 14580Sstevel@tonic-gate 14590Sstevel@tonic-gate mutex_exit(&so->so_lock); 14600Sstevel@tonic-gate 14610Sstevel@tonic-gate /* 14620Sstevel@tonic-gate * Flush the read and write side (except stream head read queue) 14630Sstevel@tonic-gate * and send down T_UNBIND_REQ. 14640Sstevel@tonic-gate */ 14650Sstevel@tonic-gate (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 14660Sstevel@tonic-gate 14670Sstevel@tonic-gate unbind_req.PRIM_type = T_UNBIND_REQ; 14680Sstevel@tonic-gate mp = soallocproto1(&unbind_req, sizeof (unbind_req), 14698778SErik.Nordmark@Sun.COM 0, _ALLOC_SLEEP, CRED()); 14700Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 14715240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 14720Sstevel@tonic-gate mutex_enter(&so->so_lock); 14730Sstevel@tonic-gate if (error) { 14740Sstevel@tonic-gate eprintsoline(so, error); 14750Sstevel@tonic-gate goto done; 14760Sstevel@tonic-gate } 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate error = sowaitokack(so, T_UNBIND_REQ); 14790Sstevel@tonic-gate if (error) { 14800Sstevel@tonic-gate eprintsoline(so, error); 14810Sstevel@tonic-gate goto done; 14820Sstevel@tonic-gate } 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate /* 14850Sstevel@tonic-gate * Even if some TPI message (e.g. T_DISCON_IND) was received in 14860Sstevel@tonic-gate * strsock_proto while the lock was dropped above, the unbind 14870Sstevel@tonic-gate * is allowed to complete. 14880Sstevel@tonic-gate */ 14890Sstevel@tonic-gate if (!(flags & _SOUNBIND_REBIND)) { 14900Sstevel@tonic-gate /* 14910Sstevel@tonic-gate * Clear out bound address. 14920Sstevel@tonic-gate */ 14930Sstevel@tonic-gate vnode_t *vp; 14940Sstevel@tonic-gate 14958348SEric.Yu@Sun.COM if ((vp = sti->sti_ux_bound_vp) != NULL) { 1496898Skais 1497898Skais /* Undo any SSL proxy setup */ 1498898Skais if ((so->so_family == AF_INET || 1499898Skais so->so_family == AF_INET6) && 1500898Skais (so->so_type == SOCK_STREAM) && 15018348SEric.Yu@Sun.COM (sti->sti_kssl_ent != NULL)) { 15028348SEric.Yu@Sun.COM kssl_release_ent(sti->sti_kssl_ent, so, 15038348SEric.Yu@Sun.COM sti->sti_kssl_type); 15048348SEric.Yu@Sun.COM sti->sti_kssl_ent = NULL; 15058348SEric.Yu@Sun.COM sti->sti_kssl_type = KSSL_NO_PROXY; 1506898Skais } 15078348SEric.Yu@Sun.COM sti->sti_ux_bound_vp = NULL; 15080Sstevel@tonic-gate vn_rele_stream(vp); 15090Sstevel@tonic-gate } 15100Sstevel@tonic-gate /* Clear out address */ 15118348SEric.Yu@Sun.COM sti->sti_laddr_len = 0; 15128348SEric.Yu@Sun.COM } 15138348SEric.Yu@Sun.COM so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 15148348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 15151974Sbrutus 15160Sstevel@tonic-gate done: 1517898Skais 15180Sstevel@tonic-gate /* If the caller held the lock don't release it here */ 15190Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 15200Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 15210Sstevel@tonic-gate 15220Sstevel@tonic-gate return (error); 15230Sstevel@tonic-gate } 15240Sstevel@tonic-gate 15250Sstevel@tonic-gate /* 15260Sstevel@tonic-gate * listen on the socket. 15270Sstevel@tonic-gate * For TPI conforming transports this has to first unbind with the transport 15280Sstevel@tonic-gate * and then bind again using the new backlog. 15290Sstevel@tonic-gate */ 15308348SEric.Yu@Sun.COM /* ARGSUSED */ 15310Sstevel@tonic-gate int 15328348SEric.Yu@Sun.COM sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 15330Sstevel@tonic-gate { 15340Sstevel@tonic-gate int error = 0; 15358348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 15360Sstevel@tonic-gate 15370Sstevel@tonic-gate dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 15387240Srh87107 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 15390Sstevel@tonic-gate 15408348SEric.Yu@Sun.COM if (sti->sti_serv_type == T_CLTS) 15410Sstevel@tonic-gate return (EOPNOTSUPP); 15420Sstevel@tonic-gate 15430Sstevel@tonic-gate /* 15440Sstevel@tonic-gate * If the socket is ready to accept connections already, then 15450Sstevel@tonic-gate * return without doing anything. This avoids a problem where 15460Sstevel@tonic-gate * a second listen() call fails if a connection is pending and 15470Sstevel@tonic-gate * leaves the socket unbound. Only when we are not unbinding 15480Sstevel@tonic-gate * with the transport can we safely increase the backlog. 15490Sstevel@tonic-gate */ 15500Sstevel@tonic-gate if (so->so_state & SS_ACCEPTCONN && 15510Sstevel@tonic-gate !((so->so_family == AF_INET || so->so_family == AF_INET6) && 15525240Snordmark /*CONSTCOND*/ 15535240Snordmark !solisten_tpi_tcp)) 15540Sstevel@tonic-gate return (0); 15550Sstevel@tonic-gate 15560Sstevel@tonic-gate if (so->so_state & SS_ISCONNECTED) 15570Sstevel@tonic-gate return (EINVAL); 15580Sstevel@tonic-gate 15590Sstevel@tonic-gate mutex_enter(&so->so_lock); 15600Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 15610Sstevel@tonic-gate 15620Sstevel@tonic-gate /* 15630Sstevel@tonic-gate * If the listen doesn't change the backlog we do nothing. 15640Sstevel@tonic-gate * This avoids an EPROTO error from the transport. 15650Sstevel@tonic-gate */ 15660Sstevel@tonic-gate if ((so->so_state & SS_ACCEPTCONN) && 15670Sstevel@tonic-gate so->so_backlog == backlog) 15680Sstevel@tonic-gate goto done; 15690Sstevel@tonic-gate 15700Sstevel@tonic-gate if (!(so->so_state & SS_ISBOUND)) { 15710Sstevel@tonic-gate /* 15720Sstevel@tonic-gate * Must have been explicitly bound in the UNIX domain. 15730Sstevel@tonic-gate */ 15740Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 15750Sstevel@tonic-gate error = EINVAL; 15760Sstevel@tonic-gate goto done; 15770Sstevel@tonic-gate } 15780Sstevel@tonic-gate error = sotpi_bindlisten(so, NULL, 0, backlog, 15798348SEric.Yu@Sun.COM _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 15800Sstevel@tonic-gate } else if (backlog > 0) { 15810Sstevel@tonic-gate /* 15820Sstevel@tonic-gate * AF_INET{,6} hack to avoid losing the port. 15830Sstevel@tonic-gate * Assumes that all AF_INET{,6} transports can handle a 15840Sstevel@tonic-gate * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 15850Sstevel@tonic-gate * has already bound thus it is possible to avoid the unbind. 15860Sstevel@tonic-gate */ 15870Sstevel@tonic-gate if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 15880Sstevel@tonic-gate /*CONSTCOND*/ 15890Sstevel@tonic-gate !solisten_tpi_tcp)) { 15900Sstevel@tonic-gate error = sotpi_unbind(so, _SOUNBIND_REBIND); 15910Sstevel@tonic-gate if (error) 15920Sstevel@tonic-gate goto done; 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate error = sotpi_bindlisten(so, NULL, 0, backlog, 15958348SEric.Yu@Sun.COM _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 15960Sstevel@tonic-gate } else { 15970Sstevel@tonic-gate so->so_state |= SS_ACCEPTCONN; 15980Sstevel@tonic-gate so->so_backlog = backlog; 15990Sstevel@tonic-gate } 16000Sstevel@tonic-gate if (error) 16010Sstevel@tonic-gate goto done; 16020Sstevel@tonic-gate ASSERT(so->so_state & SS_ACCEPTCONN); 16030Sstevel@tonic-gate done: 16040Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 16050Sstevel@tonic-gate mutex_exit(&so->so_lock); 16060Sstevel@tonic-gate return (error); 16070Sstevel@tonic-gate } 16080Sstevel@tonic-gate 16090Sstevel@tonic-gate /* 16100Sstevel@tonic-gate * Disconnect either a specified seqno or all (-1). 16110Sstevel@tonic-gate * The former is used on listening sockets only. 16120Sstevel@tonic-gate * 16130Sstevel@tonic-gate * When seqno == -1 sodisconnect could call sotpi_unbind. However, 16140Sstevel@tonic-gate * the current use of sodisconnect(seqno == -1) is only for shutdown 16150Sstevel@tonic-gate * so there is no point (and potentially incorrect) to unbind. 16160Sstevel@tonic-gate */ 16178348SEric.Yu@Sun.COM static int 16180Sstevel@tonic-gate sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 16190Sstevel@tonic-gate { 16200Sstevel@tonic-gate struct T_discon_req discon_req; 16210Sstevel@tonic-gate int error = 0; 16220Sstevel@tonic-gate mblk_t *mp; 16230Sstevel@tonic-gate 16240Sstevel@tonic-gate dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 16257240Srh87107 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 16260Sstevel@tonic-gate 16270Sstevel@tonic-gate if (!(flags & _SODISCONNECT_LOCK_HELD)) { 16280Sstevel@tonic-gate mutex_enter(&so->so_lock); 16290Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 16300Sstevel@tonic-gate } else { 16310Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 16320Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 16330Sstevel@tonic-gate } 16340Sstevel@tonic-gate 16350Sstevel@tonic-gate if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 16360Sstevel@tonic-gate error = EINVAL; 16370Sstevel@tonic-gate eprintsoline(so, error); 16380Sstevel@tonic-gate goto done; 16390Sstevel@tonic-gate } 16400Sstevel@tonic-gate 16410Sstevel@tonic-gate mutex_exit(&so->so_lock); 16420Sstevel@tonic-gate /* 16430Sstevel@tonic-gate * Flush the write side (unless this is a listener) 16440Sstevel@tonic-gate * and then send down a T_DISCON_REQ. 16450Sstevel@tonic-gate * (Don't flush on listener since it could flush {O_}T_CONN_RES 16460Sstevel@tonic-gate * and other messages.) 16470Sstevel@tonic-gate */ 16480Sstevel@tonic-gate if (!(so->so_state & SS_ACCEPTCONN)) 16490Sstevel@tonic-gate (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 16500Sstevel@tonic-gate 16510Sstevel@tonic-gate discon_req.PRIM_type = T_DISCON_REQ; 16520Sstevel@tonic-gate discon_req.SEQ_number = seqno; 16530Sstevel@tonic-gate mp = soallocproto1(&discon_req, sizeof (discon_req), 16548778SErik.Nordmark@Sun.COM 0, _ALLOC_SLEEP, CRED()); 16550Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 16565240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 16570Sstevel@tonic-gate mutex_enter(&so->so_lock); 16580Sstevel@tonic-gate if (error) { 16590Sstevel@tonic-gate eprintsoline(so, error); 16600Sstevel@tonic-gate goto done; 16610Sstevel@tonic-gate } 16620Sstevel@tonic-gate 16630Sstevel@tonic-gate error = sowaitokack(so, T_DISCON_REQ); 16640Sstevel@tonic-gate if (error) { 16650Sstevel@tonic-gate eprintsoline(so, error); 16660Sstevel@tonic-gate goto done; 16670Sstevel@tonic-gate } 16680Sstevel@tonic-gate /* 16690Sstevel@tonic-gate * Even if some TPI message (e.g. T_DISCON_IND) was received in 16700Sstevel@tonic-gate * strsock_proto while the lock was dropped above, the disconnect 16710Sstevel@tonic-gate * is allowed to complete. However, it is not possible to 16720Sstevel@tonic-gate * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 16730Sstevel@tonic-gate */ 16748348SEric.Yu@Sun.COM so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 16758348SEric.Yu@Sun.COM SOTOTPI(so)->sti_laddr_valid = 0; 16768348SEric.Yu@Sun.COM SOTOTPI(so)->sti_faddr_valid = 0; 16770Sstevel@tonic-gate done: 16780Sstevel@tonic-gate if (!(flags & _SODISCONNECT_LOCK_HELD)) { 16790Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 16800Sstevel@tonic-gate mutex_exit(&so->so_lock); 16810Sstevel@tonic-gate } else { 16820Sstevel@tonic-gate /* If the caller held the lock don't release it here */ 16830Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 16840Sstevel@tonic-gate ASSERT(so->so_flag & SOLOCKED); 16850Sstevel@tonic-gate } 16860Sstevel@tonic-gate return (error); 16870Sstevel@tonic-gate } 16880Sstevel@tonic-gate 16898348SEric.Yu@Sun.COM /* ARGSUSED */ 16900Sstevel@tonic-gate int 16918348SEric.Yu@Sun.COM sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 16928348SEric.Yu@Sun.COM struct sonode **nsop) 16930Sstevel@tonic-gate { 16940Sstevel@tonic-gate struct T_conn_ind *conn_ind; 16950Sstevel@tonic-gate struct T_conn_res *conn_res; 16960Sstevel@tonic-gate int error = 0; 16974379Sja97890 mblk_t *mp, *ctxmp, *ack_mp; 16980Sstevel@tonic-gate struct sonode *nso; 16990Sstevel@tonic-gate vnode_t *nvp; 17000Sstevel@tonic-gate void *src; 17010Sstevel@tonic-gate t_uscalar_t srclen; 17020Sstevel@tonic-gate void *opt; 17030Sstevel@tonic-gate t_uscalar_t optlen; 17040Sstevel@tonic-gate t_scalar_t PRIM_type; 17050Sstevel@tonic-gate t_scalar_t SEQ_number; 17064379Sja97890 size_t sinlen; 17078348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 17088348SEric.Yu@Sun.COM sotpi_info_t *nsti; 17090Sstevel@tonic-gate 17100Sstevel@tonic-gate dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 17117240Srh87107 (void *)so, fflag, (void *)nsop, 17127240Srh87107 pr_state(so->so_state, so->so_mode))); 17130Sstevel@tonic-gate 17140Sstevel@tonic-gate /* 17150Sstevel@tonic-gate * Defer single-threading the accepting socket until 17160Sstevel@tonic-gate * the T_CONN_IND has been received and parsed and the 17170Sstevel@tonic-gate * new sonode has been opened. 17180Sstevel@tonic-gate */ 17190Sstevel@tonic-gate 17200Sstevel@tonic-gate /* Check that we are not already connected */ 17210Sstevel@tonic-gate if ((so->so_state & SS_ACCEPTCONN) == 0) 17220Sstevel@tonic-gate goto conn_bad; 17230Sstevel@tonic-gate again: 17240Sstevel@tonic-gate if ((error = sowaitconnind(so, fflag, &mp)) != 0) 17250Sstevel@tonic-gate goto e_bad; 17260Sstevel@tonic-gate 17278348SEric.Yu@Sun.COM ASSERT(mp != NULL); 17280Sstevel@tonic-gate conn_ind = (struct T_conn_ind *)mp->b_rptr; 1729898Skais ctxmp = mp->b_cont; 1730898Skais 17310Sstevel@tonic-gate /* 17320Sstevel@tonic-gate * Save SEQ_number for error paths. 17330Sstevel@tonic-gate */ 17340Sstevel@tonic-gate SEQ_number = conn_ind->SEQ_number; 17350Sstevel@tonic-gate 17360Sstevel@tonic-gate srclen = conn_ind->SRC_length; 17370Sstevel@tonic-gate src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 17380Sstevel@tonic-gate if (src == NULL) { 17390Sstevel@tonic-gate error = EPROTO; 17400Sstevel@tonic-gate freemsg(mp); 17410Sstevel@tonic-gate eprintsoline(so, error); 17420Sstevel@tonic-gate goto disconnect_unlocked; 17430Sstevel@tonic-gate } 17440Sstevel@tonic-gate optlen = conn_ind->OPT_length; 17450Sstevel@tonic-gate switch (so->so_family) { 17460Sstevel@tonic-gate case AF_INET: 17470Sstevel@tonic-gate case AF_INET6: 17488348SEric.Yu@Sun.COM if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 17490Sstevel@tonic-gate bcopy(mp->b_rptr + conn_ind->OPT_offset, 17500Sstevel@tonic-gate &opt, conn_ind->OPT_length); 17510Sstevel@tonic-gate } else { 17520Sstevel@tonic-gate /* 17530Sstevel@tonic-gate * The transport (in this case TCP) hasn't sent up 17540Sstevel@tonic-gate * a pointer to an instance for the accept fast-path. 17550Sstevel@tonic-gate * Disable fast-path completely because the call to 17560Sstevel@tonic-gate * sotpi_create() below would otherwise create an 17570Sstevel@tonic-gate * incomplete TCP instance, which would lead to 17580Sstevel@tonic-gate * problems when sockfs sends a normal T_CONN_RES 17590Sstevel@tonic-gate * message down the new stream. 17600Sstevel@tonic-gate */ 17618348SEric.Yu@Sun.COM if (sti->sti_direct) { 1762741Smasputra int rval; 1763741Smasputra /* 1764741Smasputra * For consistency we inform tcp to disable 1765741Smasputra * direct interface on the listener, though 1766741Smasputra * we can certainly live without doing this 1767741Smasputra * because no data will ever travel upstream 1768741Smasputra * on the listening socket. 1769741Smasputra */ 17708348SEric.Yu@Sun.COM sti->sti_direct = 0; 1771741Smasputra (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 17728778SErik.Nordmark@Sun.COM 0, 0, K_TO_K, cr, &rval); 1773741Smasputra } 17740Sstevel@tonic-gate opt = NULL; 17750Sstevel@tonic-gate optlen = 0; 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate break; 17780Sstevel@tonic-gate case AF_UNIX: 17790Sstevel@tonic-gate default: 17800Sstevel@tonic-gate if (optlen != 0) { 17810Sstevel@tonic-gate opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 17820Sstevel@tonic-gate __TPI_ALIGN_SIZE); 17830Sstevel@tonic-gate if (opt == NULL) { 17840Sstevel@tonic-gate error = EPROTO; 17850Sstevel@tonic-gate freemsg(mp); 17860Sstevel@tonic-gate eprintsoline(so, error); 17870Sstevel@tonic-gate goto disconnect_unlocked; 17880Sstevel@tonic-gate } 17890Sstevel@tonic-gate } 17900Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 17918348SEric.Yu@Sun.COM if (!sti->sti_faddr_noxlate) { 17920Sstevel@tonic-gate src = NULL; 17930Sstevel@tonic-gate srclen = 0; 17940Sstevel@tonic-gate } 17950Sstevel@tonic-gate /* Extract src address from options */ 17960Sstevel@tonic-gate if (optlen != 0) 17970Sstevel@tonic-gate so_getopt_srcaddr(opt, optlen, &src, &srclen); 17980Sstevel@tonic-gate } 17990Sstevel@tonic-gate break; 18000Sstevel@tonic-gate } 18010Sstevel@tonic-gate 18020Sstevel@tonic-gate /* 18030Sstevel@tonic-gate * Create the new socket. 18040Sstevel@tonic-gate */ 18058348SEric.Yu@Sun.COM nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 18060Sstevel@tonic-gate if (nso == NULL) { 18070Sstevel@tonic-gate ASSERT(error != 0); 18080Sstevel@tonic-gate /* 18090Sstevel@tonic-gate * Accept can not fail with ENOBUFS. sotpi_create 18100Sstevel@tonic-gate * sleeps waiting for memory until a signal is caught 18110Sstevel@tonic-gate * so return EINTR. 18120Sstevel@tonic-gate */ 18130Sstevel@tonic-gate freemsg(mp); 18140Sstevel@tonic-gate if (error == ENOBUFS) 18150Sstevel@tonic-gate error = EINTR; 18160Sstevel@tonic-gate goto e_disc_unl; 18170Sstevel@tonic-gate } 18180Sstevel@tonic-gate nvp = SOTOV(nso); 18198348SEric.Yu@Sun.COM nsti = SOTOTPI(nso); 18200Sstevel@tonic-gate 1821898Skais /* 1822898Skais * If the transport sent up an SSL connection context, then attach 1823898Skais * it the new socket, and set the (sd_wputdatafunc)() and 1824898Skais * (sd_rputdatafunc)() stream head hooks to intercept and process 1825898Skais * SSL records. 1826898Skais */ 1827898Skais if (ctxmp != NULL) { 1828898Skais /* 1829898Skais * This kssl_ctx_t is already held for us by the transport. 1830898Skais * So, we don't need to do a kssl_hold_ctx() here. 1831898Skais */ 18328348SEric.Yu@Sun.COM nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1833898Skais freemsg(ctxmp); 1834898Skais mp->b_cont = NULL; 1835898Skais strsetrwputdatahooks(nvp, strsock_kssl_input, 1836898Skais strsock_kssl_output); 1837898Skais } 18380Sstevel@tonic-gate #ifdef DEBUG 18390Sstevel@tonic-gate /* 18400Sstevel@tonic-gate * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 18410Sstevel@tonic-gate * it's inherited early to allow debugging of the accept code itself. 18420Sstevel@tonic-gate */ 18430Sstevel@tonic-gate nso->so_options |= so->so_options & SO_DEBUG; 18440Sstevel@tonic-gate #endif /* DEBUG */ 18450Sstevel@tonic-gate 18460Sstevel@tonic-gate /* 18470Sstevel@tonic-gate * Save the SRC address from the T_CONN_IND 18480Sstevel@tonic-gate * for getpeername to work on AF_UNIX and on transports that do not 18490Sstevel@tonic-gate * support TI_GETPEERNAME. 18500Sstevel@tonic-gate * 18510Sstevel@tonic-gate * NOTE: AF_UNIX NUL termination is ensured by the sender's 18520Sstevel@tonic-gate * copyin_name(). 18530Sstevel@tonic-gate */ 18548348SEric.Yu@Sun.COM if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 18550Sstevel@tonic-gate error = EINVAL; 18560Sstevel@tonic-gate freemsg(mp); 18570Sstevel@tonic-gate eprintsoline(so, error); 18580Sstevel@tonic-gate goto disconnect_vp_unlocked; 18590Sstevel@tonic-gate } 18608348SEric.Yu@Sun.COM nsti->sti_faddr_len = (socklen_t)srclen; 18618348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 18628348SEric.Yu@Sun.COM bcopy(src, nsti->sti_faddr_sa, srclen); 18638348SEric.Yu@Sun.COM nsti->sti_faddr_valid = 1; 18640Sstevel@tonic-gate 18658778SErik.Nordmark@Sun.COM /* 18668778SErik.Nordmark@Sun.COM * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 18678778SErik.Nordmark@Sun.COM */ 18680Sstevel@tonic-gate if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 18690Sstevel@tonic-gate (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 18708778SErik.Nordmark@Sun.COM cred_t *cr; 18718778SErik.Nordmark@Sun.COM pid_t cpid; 18728778SErik.Nordmark@Sun.COM 18738778SErik.Nordmark@Sun.COM cr = msg_getcred(mp, &cpid); 18748778SErik.Nordmark@Sun.COM if (cr != NULL) { 18750Sstevel@tonic-gate crhold(cr); 18760Sstevel@tonic-gate nso->so_peercred = cr; 18778778SErik.Nordmark@Sun.COM nso->so_cpid = cpid; 18780Sstevel@tonic-gate } 18790Sstevel@tonic-gate freemsg(mp); 18800Sstevel@tonic-gate 18810Sstevel@tonic-gate mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 18828965SAnders.Persson@Sun.COM sizeof (intptr_t), 0, _ALLOC_INTR, cr); 18830Sstevel@tonic-gate if (mp == NULL) { 18840Sstevel@tonic-gate /* 18850Sstevel@tonic-gate * Accept can not fail with ENOBUFS. 18860Sstevel@tonic-gate * A signal was caught so return EINTR. 18870Sstevel@tonic-gate */ 18880Sstevel@tonic-gate error = EINTR; 18890Sstevel@tonic-gate eprintsoline(so, error); 18900Sstevel@tonic-gate goto disconnect_vp_unlocked; 18910Sstevel@tonic-gate } 18920Sstevel@tonic-gate conn_res = (struct T_conn_res *)mp->b_rptr; 18930Sstevel@tonic-gate } else { 18948778SErik.Nordmark@Sun.COM /* 18958778SErik.Nordmark@Sun.COM * For efficency reasons we use msg_extractcred; no crhold 18968778SErik.Nordmark@Sun.COM * needed since db_credp is cleared (i.e., we move the cred 18978778SErik.Nordmark@Sun.COM * from the message to so_peercred. 18988778SErik.Nordmark@Sun.COM */ 18998778SErik.Nordmark@Sun.COM nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 19000Sstevel@tonic-gate 19010Sstevel@tonic-gate mp->b_rptr = DB_BASE(mp); 19020Sstevel@tonic-gate conn_res = (struct T_conn_res *)mp->b_rptr; 19030Sstevel@tonic-gate mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 19048965SAnders.Persson@Sun.COM 19058965SAnders.Persson@Sun.COM mblk_setcred(mp, cr, curproc->p_pid); 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate 19080Sstevel@tonic-gate /* 19090Sstevel@tonic-gate * New socket must be bound at least in sockfs and, except for AF_INET, 19100Sstevel@tonic-gate * (or AF_INET6) it also has to be bound in the transport provider. 19114379Sja97890 * We set the local address in the sonode from the T_OK_ACK of the 19124379Sja97890 * T_CONN_RES. For this reason the address we bind to here isn't 19134379Sja97890 * important. 19140Sstevel@tonic-gate */ 19150Sstevel@tonic-gate if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 19160Sstevel@tonic-gate /*CONSTCOND*/ 19170Sstevel@tonic-gate nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 19180Sstevel@tonic-gate /* 19190Sstevel@tonic-gate * Optimization for AF_INET{,6} transports 19200Sstevel@tonic-gate * that can handle a T_CONN_RES without being bound. 19210Sstevel@tonic-gate */ 19220Sstevel@tonic-gate mutex_enter(&nso->so_lock); 19230Sstevel@tonic-gate so_automatic_bind(nso); 19240Sstevel@tonic-gate mutex_exit(&nso->so_lock); 19250Sstevel@tonic-gate } else { 19260Sstevel@tonic-gate /* Perform NULL bind with the transport provider. */ 19278348SEric.Yu@Sun.COM if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 19288348SEric.Yu@Sun.COM cr)) != 0) { 19290Sstevel@tonic-gate ASSERT(error != ENOBUFS); 19300Sstevel@tonic-gate freemsg(mp); 19310Sstevel@tonic-gate eprintsoline(nso, error); 19320Sstevel@tonic-gate goto disconnect_vp_unlocked; 19330Sstevel@tonic-gate } 19340Sstevel@tonic-gate } 19350Sstevel@tonic-gate 19360Sstevel@tonic-gate /* 19370Sstevel@tonic-gate * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 19380Sstevel@tonic-gate * so that any data arriving on the new socket will cause the 19390Sstevel@tonic-gate * appropriate signals to be delivered for the new socket. 19400Sstevel@tonic-gate * 19410Sstevel@tonic-gate * No other thread (except strsock_proto and strsock_misc) 19420Sstevel@tonic-gate * can access the new socket thus we relax the locking. 19430Sstevel@tonic-gate */ 19440Sstevel@tonic-gate nso->so_pgrp = so->so_pgrp; 19458348SEric.Yu@Sun.COM nso->so_state |= so->so_state & SS_ASYNC; 19468348SEric.Yu@Sun.COM nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 19470Sstevel@tonic-gate 19480Sstevel@tonic-gate if (nso->so_pgrp != 0) { 19498778SErik.Nordmark@Sun.COM if ((error = so_set_events(nso, nvp, cr)) != 0) { 19500Sstevel@tonic-gate eprintsoline(nso, error); 19510Sstevel@tonic-gate error = 0; 19520Sstevel@tonic-gate nso->so_pgrp = 0; 19530Sstevel@tonic-gate } 19540Sstevel@tonic-gate } 19550Sstevel@tonic-gate 19560Sstevel@tonic-gate /* 19570Sstevel@tonic-gate * Make note of the socket level options. TCP and IP level options 19580Sstevel@tonic-gate * are already inherited. We could do all this after accept is 19590Sstevel@tonic-gate * successful but doing it here simplifies code and no harm done 19600Sstevel@tonic-gate * for error case. 19610Sstevel@tonic-gate */ 19620Sstevel@tonic-gate nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 19630Sstevel@tonic-gate SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 19640Sstevel@tonic-gate SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 19650Sstevel@tonic-gate nso->so_sndbuf = so->so_sndbuf; 19660Sstevel@tonic-gate nso->so_rcvbuf = so->so_rcvbuf; 19670Sstevel@tonic-gate if (nso->so_options & SO_LINGER) 19680Sstevel@tonic-gate nso->so_linger = so->so_linger; 19690Sstevel@tonic-gate 19708348SEric.Yu@Sun.COM /* 19718348SEric.Yu@Sun.COM * Note that the following sti_direct code path should be 19728348SEric.Yu@Sun.COM * removed once we are confident that the direct sockets 19738348SEric.Yu@Sun.COM * do not result in any degradation. 19748348SEric.Yu@Sun.COM */ 19758348SEric.Yu@Sun.COM if (sti->sti_direct) { 19760Sstevel@tonic-gate 19770Sstevel@tonic-gate ASSERT(opt != NULL); 19780Sstevel@tonic-gate 19790Sstevel@tonic-gate conn_res->OPT_length = optlen; 19800Sstevel@tonic-gate conn_res->OPT_offset = MBLKL(mp); 19810Sstevel@tonic-gate bcopy(&opt, mp->b_wptr, optlen); 19820Sstevel@tonic-gate mp->b_wptr += optlen; 19830Sstevel@tonic-gate conn_res->PRIM_type = T_CONN_RES; 19840Sstevel@tonic-gate conn_res->ACCEPTOR_id = 0; 19850Sstevel@tonic-gate PRIM_type = T_CONN_RES; 19860Sstevel@tonic-gate 19870Sstevel@tonic-gate /* Send down the T_CONN_RES on acceptor STREAM */ 19880Sstevel@tonic-gate error = kstrputmsg(SOTOV(nso), mp, NULL, 19890Sstevel@tonic-gate 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 19900Sstevel@tonic-gate if (error) { 19910Sstevel@tonic-gate mutex_enter(&so->so_lock); 19920Sstevel@tonic-gate so_lock_single(so); 19930Sstevel@tonic-gate eprintsoline(so, error); 19940Sstevel@tonic-gate goto disconnect_vp; 19950Sstevel@tonic-gate } 19960Sstevel@tonic-gate mutex_enter(&nso->so_lock); 19970Sstevel@tonic-gate error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 19980Sstevel@tonic-gate (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 19990Sstevel@tonic-gate if (error) { 20000Sstevel@tonic-gate mutex_exit(&nso->so_lock); 20010Sstevel@tonic-gate mutex_enter(&so->so_lock); 20020Sstevel@tonic-gate so_lock_single(so); 20030Sstevel@tonic-gate eprintsoline(so, error); 20040Sstevel@tonic-gate goto disconnect_vp; 20050Sstevel@tonic-gate } 20060Sstevel@tonic-gate if (nso->so_family == AF_INET) { 20070Sstevel@tonic-gate sin_t *sin; 20080Sstevel@tonic-gate 20090Sstevel@tonic-gate sin = (sin_t *)(ack_mp->b_rptr + 20100Sstevel@tonic-gate sizeof (struct T_ok_ack)); 20118348SEric.Yu@Sun.COM bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 20128348SEric.Yu@Sun.COM nsti->sti_laddr_len = sizeof (sin_t); 20130Sstevel@tonic-gate } else { 20140Sstevel@tonic-gate sin6_t *sin6; 20150Sstevel@tonic-gate 20160Sstevel@tonic-gate sin6 = (sin6_t *)(ack_mp->b_rptr + 20170Sstevel@tonic-gate sizeof (struct T_ok_ack)); 20188348SEric.Yu@Sun.COM bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 20198348SEric.Yu@Sun.COM nsti->sti_laddr_len = sizeof (sin6_t); 20200Sstevel@tonic-gate } 20210Sstevel@tonic-gate freemsg(ack_mp); 20220Sstevel@tonic-gate 20238348SEric.Yu@Sun.COM nso->so_state |= SS_ISCONNECTED; 20248348SEric.Yu@Sun.COM nso->so_proto_handle = (sock_lower_handle_t)opt; 20258348SEric.Yu@Sun.COM nsti->sti_laddr_valid = 1; 20268348SEric.Yu@Sun.COM 20278348SEric.Yu@Sun.COM if (sti->sti_nl7c_flags & NL7C_ENABLED) { 20280Sstevel@tonic-gate /* 20291974Sbrutus * A NL7C marked listen()er so the new socket 20301974Sbrutus * inherits the listen()er's NL7C state, except 20311974Sbrutus * for NL7C_POLLIN. 20320Sstevel@tonic-gate * 20331974Sbrutus * Only call NL7C to process the new socket if 20341974Sbrutus * the listen socket allows blocking i/o. 20350Sstevel@tonic-gate */ 20368348SEric.Yu@Sun.COM nsti->sti_nl7c_flags = 20378348SEric.Yu@Sun.COM sti->sti_nl7c_flags & (~NL7C_POLLIN); 20381974Sbrutus if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 20391974Sbrutus /* 20401974Sbrutus * Nonblocking accept() just make it 20411974Sbrutus * persist to defer processing to the 20421974Sbrutus * read-side syscall (e.g. read). 20431974Sbrutus */ 20448348SEric.Yu@Sun.COM nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 20451974Sbrutus } else if (nl7c_process(nso, B_FALSE)) { 20460Sstevel@tonic-gate /* 20470Sstevel@tonic-gate * NL7C has completed processing on the 20480Sstevel@tonic-gate * socket, close the socket and back to 20490Sstevel@tonic-gate * the top to await the next T_CONN_IND. 20500Sstevel@tonic-gate */ 20510Sstevel@tonic-gate mutex_exit(&nso->so_lock); 20520Sstevel@tonic-gate (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 20538778SErik.Nordmark@Sun.COM cr, NULL); 20540Sstevel@tonic-gate VN_RELE(nvp); 20550Sstevel@tonic-gate goto again; 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate /* Pass the new socket out */ 20580Sstevel@tonic-gate } 20590Sstevel@tonic-gate 20600Sstevel@tonic-gate mutex_exit(&nso->so_lock); 20610Sstevel@tonic-gate 20620Sstevel@tonic-gate /* 20632811Sja97890 * It's possible, through the use of autopush for example, 20648348SEric.Yu@Sun.COM * that the acceptor stream may not support sti_direct 20658348SEric.Yu@Sun.COM * semantics. If the new socket does not support sti_direct 20662811Sja97890 * we issue a _SIOCSOCKFALLBACK to inform the transport 20672811Sja97890 * as we would in the I_PUSH case. 20682811Sja97890 */ 20698348SEric.Yu@Sun.COM if (nsti->sti_direct == 0) { 20702811Sja97890 int rval; 20712811Sja97890 20722811Sja97890 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 20738778SErik.Nordmark@Sun.COM 0, 0, K_TO_K, cr, &rval)) != 0) { 20742811Sja97890 mutex_enter(&so->so_lock); 20752811Sja97890 so_lock_single(so); 20762811Sja97890 eprintsoline(so, error); 20772811Sja97890 goto disconnect_vp; 20782811Sja97890 } 20792811Sja97890 } 20802811Sja97890 20812811Sja97890 /* 20820Sstevel@tonic-gate * Pass out new socket. 20830Sstevel@tonic-gate */ 20840Sstevel@tonic-gate if (nsop != NULL) 20850Sstevel@tonic-gate *nsop = nso; 20860Sstevel@tonic-gate 20870Sstevel@tonic-gate return (0); 20880Sstevel@tonic-gate } 20890Sstevel@tonic-gate 20900Sstevel@tonic-gate /* 20910Sstevel@tonic-gate * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 20920Sstevel@tonic-gate * which don't support the FireEngine accept fast-path. It is also 20930Sstevel@tonic-gate * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 20940Sstevel@tonic-gate * again. Neither sockfs nor TCP attempt to find out if some other 20950Sstevel@tonic-gate * random module has been inserted in between (in which case we 20960Sstevel@tonic-gate * should follow TLI accept behaviour). We blindly assume the worst 20970Sstevel@tonic-gate * case and revert back to old behaviour i.e. TCP will not send us 20980Sstevel@tonic-gate * any option (eager) and the accept should happen on the listener 20990Sstevel@tonic-gate * queue. Any queued T_conn_ind have already got their options removed 21000Sstevel@tonic-gate * by so_sock2_stream() when "sockmod" was I_POP'd. 21010Sstevel@tonic-gate */ 21020Sstevel@tonic-gate /* 21030Sstevel@tonic-gate * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 21040Sstevel@tonic-gate */ 21050Sstevel@tonic-gate if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 21060Sstevel@tonic-gate #ifdef _ILP32 21070Sstevel@tonic-gate queue_t *q; 21080Sstevel@tonic-gate 21090Sstevel@tonic-gate /* 21100Sstevel@tonic-gate * Find read queue in driver 21110Sstevel@tonic-gate * Can safely do this since we "own" nso/nvp. 21120Sstevel@tonic-gate */ 21130Sstevel@tonic-gate q = strvp2wq(nvp)->q_next; 21140Sstevel@tonic-gate while (SAMESTR(q)) 21150Sstevel@tonic-gate q = q->q_next; 21160Sstevel@tonic-gate q = RD(q); 21170Sstevel@tonic-gate conn_res->ACCEPTOR_id = (t_uscalar_t)q; 21180Sstevel@tonic-gate #else 21190Sstevel@tonic-gate conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 21200Sstevel@tonic-gate #endif /* _ILP32 */ 21210Sstevel@tonic-gate conn_res->PRIM_type = O_T_CONN_RES; 21220Sstevel@tonic-gate PRIM_type = O_T_CONN_RES; 21230Sstevel@tonic-gate } else { 21248348SEric.Yu@Sun.COM conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 21250Sstevel@tonic-gate conn_res->PRIM_type = T_CONN_RES; 21260Sstevel@tonic-gate PRIM_type = T_CONN_RES; 21270Sstevel@tonic-gate } 21280Sstevel@tonic-gate conn_res->SEQ_number = SEQ_number; 21290Sstevel@tonic-gate conn_res->OPT_length = 0; 21300Sstevel@tonic-gate conn_res->OPT_offset = 0; 21310Sstevel@tonic-gate 21320Sstevel@tonic-gate mutex_enter(&so->so_lock); 21330Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 21340Sstevel@tonic-gate mutex_exit(&so->so_lock); 21350Sstevel@tonic-gate 21360Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 21370Sstevel@tonic-gate 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 21380Sstevel@tonic-gate mutex_enter(&so->so_lock); 21390Sstevel@tonic-gate if (error) { 21400Sstevel@tonic-gate eprintsoline(so, error); 21410Sstevel@tonic-gate goto disconnect_vp; 21420Sstevel@tonic-gate } 21434379Sja97890 error = sowaitprim(so, PRIM_type, T_OK_ACK, 21444379Sja97890 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 21450Sstevel@tonic-gate if (error) { 21460Sstevel@tonic-gate eprintsoline(so, error); 21470Sstevel@tonic-gate goto disconnect_vp; 21480Sstevel@tonic-gate } 21494379Sja97890 /* 21504379Sja97890 * If there is a sin/sin6 appended onto the T_OK_ACK use 21514379Sja97890 * that to set the local address. If this is not present 21524379Sja97890 * then we zero out the address and don't set the 21538348SEric.Yu@Sun.COM * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 21544678Sja97890 * the pathname from the listening socket. 21554379Sja97890 */ 21564379Sja97890 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 21574379Sja97890 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 21584379Sja97890 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 21594379Sja97890 ack_mp->b_rptr += sizeof (struct T_ok_ack); 21608348SEric.Yu@Sun.COM bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 21618348SEric.Yu@Sun.COM nsti->sti_laddr_len = sinlen; 21628348SEric.Yu@Sun.COM nsti->sti_laddr_valid = 1; 21634678Sja97890 } else if (nso->so_family == AF_UNIX) { 21644678Sja97890 ASSERT(so->so_family == AF_UNIX); 21658348SEric.Yu@Sun.COM nsti->sti_laddr_len = sti->sti_laddr_len; 21668348SEric.Yu@Sun.COM ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 21678348SEric.Yu@Sun.COM bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 21688348SEric.Yu@Sun.COM nsti->sti_laddr_len); 21698348SEric.Yu@Sun.COM nsti->sti_laddr_valid = 1; 21704379Sja97890 } else { 21718348SEric.Yu@Sun.COM nsti->sti_laddr_len = sti->sti_laddr_len; 21728348SEric.Yu@Sun.COM ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 21738348SEric.Yu@Sun.COM bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 21748348SEric.Yu@Sun.COM nsti->sti_laddr_sa->sa_family = nso->so_family; 21754379Sja97890 } 21764379Sja97890 freemsg(ack_mp); 21774379Sja97890 21780Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 21790Sstevel@tonic-gate mutex_exit(&so->so_lock); 21800Sstevel@tonic-gate 21810Sstevel@tonic-gate nso->so_state |= SS_ISCONNECTED; 21820Sstevel@tonic-gate 21830Sstevel@tonic-gate /* 21840Sstevel@tonic-gate * Pass out new socket. 21850Sstevel@tonic-gate */ 21860Sstevel@tonic-gate if (nsop != NULL) 21870Sstevel@tonic-gate *nsop = nso; 21880Sstevel@tonic-gate 21890Sstevel@tonic-gate return (0); 21900Sstevel@tonic-gate 21910Sstevel@tonic-gate 21920Sstevel@tonic-gate eproto_disc_unl: 21930Sstevel@tonic-gate error = EPROTO; 21940Sstevel@tonic-gate e_disc_unl: 21950Sstevel@tonic-gate eprintsoline(so, error); 21960Sstevel@tonic-gate goto disconnect_unlocked; 21970Sstevel@tonic-gate 21980Sstevel@tonic-gate pr_disc_vp_unl: 21990Sstevel@tonic-gate eprintsoline(so, error); 22000Sstevel@tonic-gate disconnect_vp_unlocked: 22018778SErik.Nordmark@Sun.COM (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 22020Sstevel@tonic-gate VN_RELE(nvp); 22030Sstevel@tonic-gate disconnect_unlocked: 22040Sstevel@tonic-gate (void) sodisconnect(so, SEQ_number, 0); 22050Sstevel@tonic-gate return (error); 22060Sstevel@tonic-gate 22070Sstevel@tonic-gate pr_disc_vp: 22080Sstevel@tonic-gate eprintsoline(so, error); 22090Sstevel@tonic-gate disconnect_vp: 22100Sstevel@tonic-gate (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 22110Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 22120Sstevel@tonic-gate mutex_exit(&so->so_lock); 22138778SErik.Nordmark@Sun.COM (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 22140Sstevel@tonic-gate VN_RELE(nvp); 22150Sstevel@tonic-gate return (error); 22160Sstevel@tonic-gate 22170Sstevel@tonic-gate conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 22180Sstevel@tonic-gate error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 22190Sstevel@tonic-gate ? EOPNOTSUPP : EINVAL; 22200Sstevel@tonic-gate e_bad: 22210Sstevel@tonic-gate eprintsoline(so, error); 22220Sstevel@tonic-gate return (error); 22230Sstevel@tonic-gate } 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate /* 22260Sstevel@tonic-gate * connect a socket. 22270Sstevel@tonic-gate * 22280Sstevel@tonic-gate * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 22290Sstevel@tonic-gate * unconnect (by specifying a null address). 22300Sstevel@tonic-gate */ 22310Sstevel@tonic-gate int 22320Sstevel@tonic-gate sotpi_connect(struct sonode *so, 2233*12643SAnders.Persson@Sun.COM struct sockaddr *name, 22340Sstevel@tonic-gate socklen_t namelen, 22350Sstevel@tonic-gate int fflag, 22368348SEric.Yu@Sun.COM int flags, 22378348SEric.Yu@Sun.COM struct cred *cr) 22380Sstevel@tonic-gate { 22390Sstevel@tonic-gate struct T_conn_req conn_req; 22400Sstevel@tonic-gate int error = 0; 22410Sstevel@tonic-gate mblk_t *mp; 22420Sstevel@tonic-gate void *src; 22430Sstevel@tonic-gate socklen_t srclen; 22440Sstevel@tonic-gate void *addr; 22450Sstevel@tonic-gate socklen_t addrlen; 22460Sstevel@tonic-gate boolean_t need_unlock; 22478348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 22480Sstevel@tonic-gate 22490Sstevel@tonic-gate dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 22507240Srh87107 (void *)so, (void *)name, namelen, fflag, flags, 22515240Snordmark pr_state(so->so_state, so->so_mode))); 22520Sstevel@tonic-gate 22530Sstevel@tonic-gate /* 22540Sstevel@tonic-gate * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 22550Sstevel@tonic-gate * avoid sleeping for memory with SOLOCKED held. 22568348SEric.Yu@Sun.COM * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 22570Sstevel@tonic-gate * + sizeof (struct T_opthdr). 22580Sstevel@tonic-gate * (the AF_UNIX so_ux_addr_xlate() does not make the address 22598348SEric.Yu@Sun.COM * exceed sti_faddr_maxlen). 22600Sstevel@tonic-gate */ 22610Sstevel@tonic-gate mp = soallocproto(sizeof (struct T_conn_req) + 22628778SErik.Nordmark@Sun.COM 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 22638778SErik.Nordmark@Sun.COM cr); 22640Sstevel@tonic-gate if (mp == NULL) { 22650Sstevel@tonic-gate /* 22660Sstevel@tonic-gate * Connect can not fail with ENOBUFS. A signal was 22670Sstevel@tonic-gate * caught so return EINTR. 22680Sstevel@tonic-gate */ 22690Sstevel@tonic-gate error = EINTR; 22700Sstevel@tonic-gate eprintsoline(so, error); 22710Sstevel@tonic-gate return (error); 22720Sstevel@tonic-gate } 22730Sstevel@tonic-gate 22740Sstevel@tonic-gate mutex_enter(&so->so_lock); 22750Sstevel@tonic-gate /* 22765694Sjprakash * Make sure there is a preallocated T_unbind_req message 22775694Sjprakash * before any binding. This message is allocated when the 22785694Sjprakash * socket is created. Since another thread can consume 22795694Sjprakash * so_unbind_mp by the time we return from so_lock_single(), 22805694Sjprakash * we should check the availability of so_unbind_mp after 22815694Sjprakash * we return from so_lock_single(). 22820Sstevel@tonic-gate */ 22835694Sjprakash 22845694Sjprakash so_lock_single(so); /* Set SOLOCKED */ 22855694Sjprakash need_unlock = B_TRUE; 22865694Sjprakash 22878348SEric.Yu@Sun.COM if (sti->sti_unbind_mp == NULL) { 22880Sstevel@tonic-gate dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 22890Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 22908348SEric.Yu@Sun.COM sti->sti_unbind_mp = 22918778SErik.Nordmark@Sun.COM soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 22928348SEric.Yu@Sun.COM if (sti->sti_unbind_mp == NULL) { 22930Sstevel@tonic-gate error = EINTR; 22940Sstevel@tonic-gate goto done; 22950Sstevel@tonic-gate } 22960Sstevel@tonic-gate } 22970Sstevel@tonic-gate 22980Sstevel@tonic-gate /* 22990Sstevel@tonic-gate * Can't have done a listen before connecting. 23000Sstevel@tonic-gate */ 23010Sstevel@tonic-gate if (so->so_state & SS_ACCEPTCONN) { 23020Sstevel@tonic-gate error = EOPNOTSUPP; 23030Sstevel@tonic-gate goto done; 23040Sstevel@tonic-gate } 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate /* 23070Sstevel@tonic-gate * Must be bound with the transport 23080Sstevel@tonic-gate */ 23090Sstevel@tonic-gate if (!(so->so_state & SS_ISBOUND)) { 23100Sstevel@tonic-gate if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 23110Sstevel@tonic-gate /*CONSTCOND*/ 23120Sstevel@tonic-gate so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 23130Sstevel@tonic-gate /* 23140Sstevel@tonic-gate * Optimization for AF_INET{,6} transports 23150Sstevel@tonic-gate * that can handle a T_CONN_REQ without being bound. 23160Sstevel@tonic-gate */ 23170Sstevel@tonic-gate so_automatic_bind(so); 23180Sstevel@tonic-gate } else { 23190Sstevel@tonic-gate error = sotpi_bind(so, NULL, 0, 23208348SEric.Yu@Sun.COM _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 23210Sstevel@tonic-gate if (error) 23220Sstevel@tonic-gate goto done; 23230Sstevel@tonic-gate } 23240Sstevel@tonic-gate ASSERT(so->so_state & SS_ISBOUND); 23250Sstevel@tonic-gate flags |= _SOCONNECT_DID_BIND; 23260Sstevel@tonic-gate } 23270Sstevel@tonic-gate 23280Sstevel@tonic-gate /* 23290Sstevel@tonic-gate * Handle a connect to a name parameter of type AF_UNSPEC like a 23300Sstevel@tonic-gate * connect to a null address. This is the portable method to 23310Sstevel@tonic-gate * unconnect a socket. 23320Sstevel@tonic-gate */ 23330Sstevel@tonic-gate if ((namelen >= sizeof (sa_family_t)) && 23340Sstevel@tonic-gate (name->sa_family == AF_UNSPEC)) { 23350Sstevel@tonic-gate name = NULL; 23360Sstevel@tonic-gate namelen = 0; 23370Sstevel@tonic-gate } 23380Sstevel@tonic-gate 23390Sstevel@tonic-gate /* 23400Sstevel@tonic-gate * Check that we are not already connected. 23410Sstevel@tonic-gate * A connection-oriented socket cannot be reconnected. 23420Sstevel@tonic-gate * A connected connection-less socket can be 23430Sstevel@tonic-gate * - connected to a different address by a subsequent connect 23440Sstevel@tonic-gate * - "unconnected" by a connect to the NULL address 23450Sstevel@tonic-gate */ 23460Sstevel@tonic-gate if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 23470Sstevel@tonic-gate ASSERT(!(flags & _SOCONNECT_DID_BIND)); 23480Sstevel@tonic-gate if (so->so_mode & SM_CONNREQUIRED) { 23490Sstevel@tonic-gate /* Connection-oriented socket */ 23500Sstevel@tonic-gate error = so->so_state & SS_ISCONNECTED ? 23510Sstevel@tonic-gate EISCONN : EALREADY; 23520Sstevel@tonic-gate goto done; 23530Sstevel@tonic-gate } 23540Sstevel@tonic-gate /* Connection-less socket */ 23550Sstevel@tonic-gate if (name == NULL) { 23560Sstevel@tonic-gate /* 23570Sstevel@tonic-gate * Remove the connected state and clear SO_DGRAM_ERRIND 23580Sstevel@tonic-gate * since it was set when the socket was connected. 23590Sstevel@tonic-gate * If this is UDP also send down a T_DISCON_REQ. 23600Sstevel@tonic-gate */ 23610Sstevel@tonic-gate int val; 23620Sstevel@tonic-gate 23630Sstevel@tonic-gate if ((so->so_family == AF_INET || 23645240Snordmark so->so_family == AF_INET6) && 23650Sstevel@tonic-gate (so->so_type == SOCK_DGRAM || 23665240Snordmark so->so_type == SOCK_RAW) && 23670Sstevel@tonic-gate /*CONSTCOND*/ 23680Sstevel@tonic-gate !soconnect_tpi_udp) { 23690Sstevel@tonic-gate /* XXX What about implicitly unbinding here? */ 23700Sstevel@tonic-gate error = sodisconnect(so, -1, 23715240Snordmark _SODISCONNECT_LOCK_HELD); 23720Sstevel@tonic-gate } else { 23730Sstevel@tonic-gate so->so_state &= 23748348SEric.Yu@Sun.COM ~(SS_ISCONNECTED | SS_ISCONNECTING); 23758348SEric.Yu@Sun.COM sti->sti_faddr_valid = 0; 23768348SEric.Yu@Sun.COM sti->sti_faddr_len = 0; 23770Sstevel@tonic-gate } 23780Sstevel@tonic-gate 23798348SEric.Yu@Sun.COM /* Remove SOLOCKED since setsockopt will grab it */ 23800Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 23810Sstevel@tonic-gate mutex_exit(&so->so_lock); 23820Sstevel@tonic-gate 23830Sstevel@tonic-gate val = 0; 23848348SEric.Yu@Sun.COM (void) sotpi_setsockopt(so, SOL_SOCKET, 23858348SEric.Yu@Sun.COM SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 23868348SEric.Yu@Sun.COM cr); 23870Sstevel@tonic-gate 23880Sstevel@tonic-gate mutex_enter(&so->so_lock); 23890Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 23900Sstevel@tonic-gate goto done; 23910Sstevel@tonic-gate } 23920Sstevel@tonic-gate } 23930Sstevel@tonic-gate ASSERT(so->so_state & SS_ISBOUND); 23940Sstevel@tonic-gate 23950Sstevel@tonic-gate if (name == NULL || namelen == 0) { 23960Sstevel@tonic-gate error = EINVAL; 23970Sstevel@tonic-gate goto done; 23980Sstevel@tonic-gate } 23990Sstevel@tonic-gate /* 24008348SEric.Yu@Sun.COM * Mark the socket if sti_faddr_sa represents the transport level 24010Sstevel@tonic-gate * address. 24020Sstevel@tonic-gate */ 24030Sstevel@tonic-gate if (flags & _SOCONNECT_NOXLATE) { 24040Sstevel@tonic-gate struct sockaddr_ux *soaddr_ux; 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate ASSERT(so->so_family == AF_UNIX); 24070Sstevel@tonic-gate if (namelen != sizeof (struct sockaddr_ux)) { 24080Sstevel@tonic-gate error = EINVAL; 24090Sstevel@tonic-gate goto done; 24100Sstevel@tonic-gate } 24110Sstevel@tonic-gate soaddr_ux = (struct sockaddr_ux *)name; 24120Sstevel@tonic-gate name = (struct sockaddr *)&soaddr_ux->sou_addr; 24130Sstevel@tonic-gate namelen = sizeof (soaddr_ux->sou_addr); 24148348SEric.Yu@Sun.COM sti->sti_faddr_noxlate = 1; 24150Sstevel@tonic-gate } 24160Sstevel@tonic-gate 24170Sstevel@tonic-gate /* 24180Sstevel@tonic-gate * Length and family checks. 24190Sstevel@tonic-gate */ 24200Sstevel@tonic-gate error = so_addr_verify(so, name, namelen); 24210Sstevel@tonic-gate if (error) 24220Sstevel@tonic-gate goto bad; 24230Sstevel@tonic-gate 24240Sstevel@tonic-gate /* 24250Sstevel@tonic-gate * Save foreign address. Needed for AF_UNIX as well as 24260Sstevel@tonic-gate * transport providers that do not support TI_GETPEERNAME. 24270Sstevel@tonic-gate * Also used for cached foreign address for TCP and UDP. 24280Sstevel@tonic-gate */ 24298348SEric.Yu@Sun.COM if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 24300Sstevel@tonic-gate error = EINVAL; 24310Sstevel@tonic-gate goto done; 24320Sstevel@tonic-gate } 24338348SEric.Yu@Sun.COM sti->sti_faddr_len = (socklen_t)namelen; 24348348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 24358348SEric.Yu@Sun.COM bcopy(name, sti->sti_faddr_sa, namelen); 24368348SEric.Yu@Sun.COM sti->sti_faddr_valid = 1; 24370Sstevel@tonic-gate 24380Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 24398348SEric.Yu@Sun.COM if (sti->sti_faddr_noxlate) { 24400Sstevel@tonic-gate /* 24410Sstevel@tonic-gate * Already have a transport internal address. Do not 24420Sstevel@tonic-gate * pass any (transport internal) source address. 24430Sstevel@tonic-gate */ 24448348SEric.Yu@Sun.COM addr = sti->sti_faddr_sa; 24458348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_faddr_len; 24460Sstevel@tonic-gate src = NULL; 24470Sstevel@tonic-gate srclen = 0; 24480Sstevel@tonic-gate } else { 24490Sstevel@tonic-gate /* 24500Sstevel@tonic-gate * Pass the sockaddr_un source address as an option 24510Sstevel@tonic-gate * and translate the remote address. 24528348SEric.Yu@Sun.COM * Holding so_lock thus sti_laddr_sa can not change. 24530Sstevel@tonic-gate */ 24548348SEric.Yu@Sun.COM src = sti->sti_laddr_sa; 24558348SEric.Yu@Sun.COM srclen = (t_uscalar_t)sti->sti_laddr_len; 24560Sstevel@tonic-gate dprintso(so, 1, 24575240Snordmark ("sotpi_connect UNIX: srclen %d, src %p\n", 24585240Snordmark srclen, src)); 24590Sstevel@tonic-gate error = so_ux_addr_xlate(so, 24608348SEric.Yu@Sun.COM sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 24615240Snordmark (flags & _SOCONNECT_XPG4_2), 24625240Snordmark &addr, &addrlen); 24630Sstevel@tonic-gate if (error) 24640Sstevel@tonic-gate goto bad; 24650Sstevel@tonic-gate } 24660Sstevel@tonic-gate } else { 24678348SEric.Yu@Sun.COM addr = sti->sti_faddr_sa; 24688348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_faddr_len; 24690Sstevel@tonic-gate src = NULL; 24700Sstevel@tonic-gate srclen = 0; 24710Sstevel@tonic-gate } 24720Sstevel@tonic-gate /* 24730Sstevel@tonic-gate * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 24740Sstevel@tonic-gate * option which asks the transport provider to send T_UDERR_IND 24750Sstevel@tonic-gate * messages. These T_UDERR_IND messages are used to return connected 24760Sstevel@tonic-gate * style errors (e.g. ECONNRESET) for connected datagram sockets. 24770Sstevel@tonic-gate * 24780Sstevel@tonic-gate * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 24790Sstevel@tonic-gate * we send down a T_CONN_REQ. This is needed to let the 24800Sstevel@tonic-gate * transport assign a local address that is consistent with 24810Sstevel@tonic-gate * the remote address. Applications depend on a getsockname() 24820Sstevel@tonic-gate * after a connect() to retrieve the "source" IP address for 24830Sstevel@tonic-gate * the connected socket. Invalidate the cached local address 24840Sstevel@tonic-gate * to force getsockname() to enquire of the transport. 24850Sstevel@tonic-gate */ 24860Sstevel@tonic-gate if (!(so->so_mode & SM_CONNREQUIRED)) { 24870Sstevel@tonic-gate /* 24880Sstevel@tonic-gate * Datagram socket. 24890Sstevel@tonic-gate */ 24900Sstevel@tonic-gate int32_t val; 24910Sstevel@tonic-gate 24920Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 24930Sstevel@tonic-gate mutex_exit(&so->so_lock); 24940Sstevel@tonic-gate 24950Sstevel@tonic-gate val = 1; 24960Sstevel@tonic-gate (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 24978348SEric.Yu@Sun.COM &val, (t_uscalar_t)sizeof (val), cr); 24980Sstevel@tonic-gate 24990Sstevel@tonic-gate mutex_enter(&so->so_lock); 25000Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 25010Sstevel@tonic-gate if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 25020Sstevel@tonic-gate (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 25030Sstevel@tonic-gate soconnect_tpi_udp) { 25040Sstevel@tonic-gate soisconnected(so); 25050Sstevel@tonic-gate goto done; 25060Sstevel@tonic-gate } 25070Sstevel@tonic-gate /* 25080Sstevel@tonic-gate * Send down T_CONN_REQ etc. 25090Sstevel@tonic-gate * Clear fflag to avoid returning EWOULDBLOCK. 25100Sstevel@tonic-gate */ 25110Sstevel@tonic-gate fflag = 0; 25120Sstevel@tonic-gate ASSERT(so->so_family != AF_UNIX); 25138348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 25148348SEric.Yu@Sun.COM } else if (sti->sti_laddr_len != 0) { 25150Sstevel@tonic-gate /* 25160Sstevel@tonic-gate * If the local address or port was "any" then it may be 25170Sstevel@tonic-gate * changed by the transport as a result of the 25180Sstevel@tonic-gate * connect. Invalidate the cached version if we have one. 25190Sstevel@tonic-gate */ 25200Sstevel@tonic-gate switch (so->so_family) { 25210Sstevel@tonic-gate case AF_INET: 25228348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 25238348SEric.Yu@Sun.COM if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 25240Sstevel@tonic-gate INADDR_ANY || 25258348SEric.Yu@Sun.COM ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 25268348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 25270Sstevel@tonic-gate break; 25280Sstevel@tonic-gate 25290Sstevel@tonic-gate case AF_INET6: 25308348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len == 25318348SEric.Yu@Sun.COM (socklen_t)sizeof (sin6_t)); 25320Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED( 25338348SEric.Yu@Sun.COM &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 25340Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY( 25358348SEric.Yu@Sun.COM &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 25368348SEric.Yu@Sun.COM ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 25378348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 25380Sstevel@tonic-gate break; 25390Sstevel@tonic-gate 25400Sstevel@tonic-gate default: 25410Sstevel@tonic-gate break; 25420Sstevel@tonic-gate } 25430Sstevel@tonic-gate } 25440Sstevel@tonic-gate 25450Sstevel@tonic-gate /* 25460Sstevel@tonic-gate * Check for failure of an earlier call 25470Sstevel@tonic-gate */ 25480Sstevel@tonic-gate if (so->so_error != 0) 25490Sstevel@tonic-gate goto so_bad; 25500Sstevel@tonic-gate 25510Sstevel@tonic-gate /* 25520Sstevel@tonic-gate * Send down T_CONN_REQ. Message was allocated above. 25530Sstevel@tonic-gate */ 25540Sstevel@tonic-gate conn_req.PRIM_type = T_CONN_REQ; 25550Sstevel@tonic-gate conn_req.DEST_length = addrlen; 25560Sstevel@tonic-gate conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 25570Sstevel@tonic-gate if (srclen == 0) { 25580Sstevel@tonic-gate conn_req.OPT_length = 0; 25590Sstevel@tonic-gate conn_req.OPT_offset = 0; 25600Sstevel@tonic-gate soappendmsg(mp, &conn_req, sizeof (conn_req)); 25610Sstevel@tonic-gate soappendmsg(mp, addr, addrlen); 25620Sstevel@tonic-gate } else { 25630Sstevel@tonic-gate /* 25640Sstevel@tonic-gate * There is a AF_UNIX sockaddr_un to include as a source 25650Sstevel@tonic-gate * address option. 25660Sstevel@tonic-gate */ 25670Sstevel@tonic-gate struct T_opthdr toh; 25680Sstevel@tonic-gate 25690Sstevel@tonic-gate toh.level = SOL_SOCKET; 25700Sstevel@tonic-gate toh.name = SO_SRCADDR; 25710Sstevel@tonic-gate toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 25720Sstevel@tonic-gate toh.status = 0; 25730Sstevel@tonic-gate conn_req.OPT_length = 25745240Snordmark (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 25750Sstevel@tonic-gate conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 25765240Snordmark _TPI_ALIGN_TOPT(addrlen)); 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate soappendmsg(mp, &conn_req, sizeof (conn_req)); 25790Sstevel@tonic-gate soappendmsg(mp, addr, addrlen); 25800Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 25810Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 25820Sstevel@tonic-gate soappendmsg(mp, src, srclen); 25830Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 25840Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 25850Sstevel@tonic-gate } 25860Sstevel@tonic-gate /* 25870Sstevel@tonic-gate * Set SS_ISCONNECTING before sending down the T_CONN_REQ 25880Sstevel@tonic-gate * in order to have the right state when the T_CONN_CON shows up. 25890Sstevel@tonic-gate */ 25900Sstevel@tonic-gate soisconnecting(so); 25910Sstevel@tonic-gate mutex_exit(&so->so_lock); 25920Sstevel@tonic-gate 259311861SMarek.Pospisil@Sun.COM if (AU_AUDITING()) 25940Sstevel@tonic-gate audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 25950Sstevel@tonic-gate 25960Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 25975240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 25980Sstevel@tonic-gate mp = NULL; 25990Sstevel@tonic-gate mutex_enter(&so->so_lock); 26000Sstevel@tonic-gate if (error != 0) 26010Sstevel@tonic-gate goto bad; 26020Sstevel@tonic-gate 26030Sstevel@tonic-gate if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 26040Sstevel@tonic-gate goto bad; 26050Sstevel@tonic-gate 26060Sstevel@tonic-gate /* Allow other threads to access the socket */ 26070Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 26080Sstevel@tonic-gate need_unlock = B_FALSE; 26090Sstevel@tonic-gate 26100Sstevel@tonic-gate /* 26110Sstevel@tonic-gate * Wait until we get a T_CONN_CON or an error 26120Sstevel@tonic-gate */ 26130Sstevel@tonic-gate if ((error = sowaitconnected(so, fflag, 0)) != 0) { 26140Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 26150Sstevel@tonic-gate need_unlock = B_TRUE; 26160Sstevel@tonic-gate } 26170Sstevel@tonic-gate 26180Sstevel@tonic-gate done: 26190Sstevel@tonic-gate freemsg(mp); 26200Sstevel@tonic-gate switch (error) { 26210Sstevel@tonic-gate case EINPROGRESS: 26220Sstevel@tonic-gate case EALREADY: 26230Sstevel@tonic-gate case EISCONN: 26240Sstevel@tonic-gate case EINTR: 26250Sstevel@tonic-gate /* Non-fatal errors */ 26268348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 26270Sstevel@tonic-gate /* FALLTHRU */ 26280Sstevel@tonic-gate case 0: 26290Sstevel@tonic-gate break; 26300Sstevel@tonic-gate default: 26310Sstevel@tonic-gate ASSERT(need_unlock); 26320Sstevel@tonic-gate /* 26330Sstevel@tonic-gate * Fatal errors: clear SS_ISCONNECTING in case it was set, 26340Sstevel@tonic-gate * and invalidate local-address cache 26350Sstevel@tonic-gate */ 26368348SEric.Yu@Sun.COM so->so_state &= ~SS_ISCONNECTING; 26378348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 26380Sstevel@tonic-gate /* A discon_ind might have already unbound us */ 26390Sstevel@tonic-gate if ((flags & _SOCONNECT_DID_BIND) && 26400Sstevel@tonic-gate (so->so_state & SS_ISBOUND)) { 26410Sstevel@tonic-gate int err; 26420Sstevel@tonic-gate 26430Sstevel@tonic-gate err = sotpi_unbind(so, 0); 26440Sstevel@tonic-gate /* LINTED - statement has no conseq */ 26450Sstevel@tonic-gate if (err) { 26460Sstevel@tonic-gate eprintsoline(so, err); 26470Sstevel@tonic-gate } 26480Sstevel@tonic-gate } 26490Sstevel@tonic-gate break; 26500Sstevel@tonic-gate } 26510Sstevel@tonic-gate if (need_unlock) 26520Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 26530Sstevel@tonic-gate mutex_exit(&so->so_lock); 26540Sstevel@tonic-gate return (error); 26550Sstevel@tonic-gate 26568348SEric.Yu@Sun.COM so_bad: error = sogeterr(so, B_TRUE); 26570Sstevel@tonic-gate bad: eprintsoline(so, error); 26580Sstevel@tonic-gate goto done; 26590Sstevel@tonic-gate } 26600Sstevel@tonic-gate 26618348SEric.Yu@Sun.COM /* ARGSUSED */ 26620Sstevel@tonic-gate int 26638348SEric.Yu@Sun.COM sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 26640Sstevel@tonic-gate { 26650Sstevel@tonic-gate struct T_ordrel_req ordrel_req; 26660Sstevel@tonic-gate mblk_t *mp; 26670Sstevel@tonic-gate uint_t old_state, state_change; 26680Sstevel@tonic-gate int error = 0; 26698348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 26700Sstevel@tonic-gate 26710Sstevel@tonic-gate dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 26727240Srh87107 (void *)so, how, pr_state(so->so_state, so->so_mode))); 26730Sstevel@tonic-gate 26740Sstevel@tonic-gate mutex_enter(&so->so_lock); 26750Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 26760Sstevel@tonic-gate 26770Sstevel@tonic-gate /* 26780Sstevel@tonic-gate * SunOS 4.X has no check for datagram sockets. 26790Sstevel@tonic-gate * 5.X checks that it is connected (ENOTCONN) 26800Sstevel@tonic-gate * X/Open requires that we check the connected state. 26810Sstevel@tonic-gate */ 26820Sstevel@tonic-gate if (!(so->so_state & SS_ISCONNECTED)) { 26830Sstevel@tonic-gate if (!xnet_skip_checks) { 26840Sstevel@tonic-gate error = ENOTCONN; 26850Sstevel@tonic-gate if (xnet_check_print) { 26860Sstevel@tonic-gate printf("sockfs: X/Open shutdown check " 26875240Snordmark "caused ENOTCONN\n"); 26880Sstevel@tonic-gate } 26890Sstevel@tonic-gate } 26900Sstevel@tonic-gate goto done; 26910Sstevel@tonic-gate } 26920Sstevel@tonic-gate /* 26930Sstevel@tonic-gate * Record the current state and then perform any state changes. 26940Sstevel@tonic-gate * Then use the difference between the old and new states to 26950Sstevel@tonic-gate * determine which messages need to be sent. 26960Sstevel@tonic-gate * This prevents e.g. duplicate T_ORDREL_REQ when there are 26970Sstevel@tonic-gate * duplicate calls to shutdown(). 26980Sstevel@tonic-gate */ 26990Sstevel@tonic-gate old_state = so->so_state; 27000Sstevel@tonic-gate 27010Sstevel@tonic-gate switch (how) { 27020Sstevel@tonic-gate case 0: 27030Sstevel@tonic-gate socantrcvmore(so); 27040Sstevel@tonic-gate break; 27050Sstevel@tonic-gate case 1: 27060Sstevel@tonic-gate socantsendmore(so); 27070Sstevel@tonic-gate break; 27080Sstevel@tonic-gate case 2: 27090Sstevel@tonic-gate socantsendmore(so); 27100Sstevel@tonic-gate socantrcvmore(so); 27110Sstevel@tonic-gate break; 27120Sstevel@tonic-gate default: 27130Sstevel@tonic-gate error = EINVAL; 27140Sstevel@tonic-gate goto done; 27150Sstevel@tonic-gate } 27160Sstevel@tonic-gate 27170Sstevel@tonic-gate /* 27180Sstevel@tonic-gate * Assumes that the SS_CANT* flags are never cleared in the above code. 27190Sstevel@tonic-gate */ 27200Sstevel@tonic-gate state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 27215240Snordmark (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 27220Sstevel@tonic-gate ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 27230Sstevel@tonic-gate 27240Sstevel@tonic-gate switch (state_change) { 27250Sstevel@tonic-gate case 0: 27260Sstevel@tonic-gate dprintso(so, 1, 27270Sstevel@tonic-gate ("sotpi_shutdown: nothing to send in state 0x%x\n", 27280Sstevel@tonic-gate so->so_state)); 27290Sstevel@tonic-gate goto done; 27300Sstevel@tonic-gate 27310Sstevel@tonic-gate case SS_CANTRCVMORE: 27320Sstevel@tonic-gate mutex_exit(&so->so_lock); 27330Sstevel@tonic-gate strseteof(SOTOV(so), 1); 27340Sstevel@tonic-gate /* 27350Sstevel@tonic-gate * strseteof takes care of read side wakeups, 27360Sstevel@tonic-gate * pollwakeups, and signals. 27370Sstevel@tonic-gate */ 27380Sstevel@tonic-gate /* 27390Sstevel@tonic-gate * Get the read lock before flushing data to avoid problems 27400Sstevel@tonic-gate * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 27410Sstevel@tonic-gate */ 27420Sstevel@tonic-gate mutex_enter(&so->so_lock); 27430Sstevel@tonic-gate (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 27440Sstevel@tonic-gate mutex_exit(&so->so_lock); 27450Sstevel@tonic-gate 27460Sstevel@tonic-gate /* Flush read side queue */ 27470Sstevel@tonic-gate strflushrq(SOTOV(so), FLUSHALL); 27480Sstevel@tonic-gate 27490Sstevel@tonic-gate mutex_enter(&so->so_lock); 27500Sstevel@tonic-gate so_unlock_read(so); /* Clear SOREADLOCKED */ 27510Sstevel@tonic-gate break; 27520Sstevel@tonic-gate 27530Sstevel@tonic-gate case SS_CANTSENDMORE: 27540Sstevel@tonic-gate mutex_exit(&so->so_lock); 27550Sstevel@tonic-gate strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 27560Sstevel@tonic-gate mutex_enter(&so->so_lock); 27570Sstevel@tonic-gate break; 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate case SS_CANTSENDMORE|SS_CANTRCVMORE: 27600Sstevel@tonic-gate mutex_exit(&so->so_lock); 27610Sstevel@tonic-gate strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 27620Sstevel@tonic-gate strseteof(SOTOV(so), 1); 27630Sstevel@tonic-gate /* 27640Sstevel@tonic-gate * strseteof takes care of read side wakeups, 27650Sstevel@tonic-gate * pollwakeups, and signals. 27660Sstevel@tonic-gate */ 27670Sstevel@tonic-gate /* 27680Sstevel@tonic-gate * Get the read lock before flushing data to avoid problems 27690Sstevel@tonic-gate * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 27700Sstevel@tonic-gate */ 27710Sstevel@tonic-gate mutex_enter(&so->so_lock); 27720Sstevel@tonic-gate (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 27730Sstevel@tonic-gate mutex_exit(&so->so_lock); 27740Sstevel@tonic-gate 27750Sstevel@tonic-gate /* Flush read side queue */ 27760Sstevel@tonic-gate strflushrq(SOTOV(so), FLUSHALL); 27770Sstevel@tonic-gate 27780Sstevel@tonic-gate mutex_enter(&so->so_lock); 27790Sstevel@tonic-gate so_unlock_read(so); /* Clear SOREADLOCKED */ 27800Sstevel@tonic-gate break; 27810Sstevel@tonic-gate } 27820Sstevel@tonic-gate 27830Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 27840Sstevel@tonic-gate 27850Sstevel@tonic-gate /* 27860Sstevel@tonic-gate * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 27870Sstevel@tonic-gate * was set due to this call and the new state has both of them set: 27880Sstevel@tonic-gate * Send the AF_UNIX close indication 27890Sstevel@tonic-gate * For T_COTS send a discon_ind 27900Sstevel@tonic-gate * 27910Sstevel@tonic-gate * If cantsend was set due to this call: 27920Sstevel@tonic-gate * For T_COTSORD send an ordrel_ind 27930Sstevel@tonic-gate * 27940Sstevel@tonic-gate * Note that for T_CLTS there is no message sent here. 27950Sstevel@tonic-gate */ 27960Sstevel@tonic-gate if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 27970Sstevel@tonic-gate (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 27980Sstevel@tonic-gate /* 27990Sstevel@tonic-gate * For SunOS 4.X compatibility we tell the other end 28000Sstevel@tonic-gate * that we are unable to receive at this point. 28010Sstevel@tonic-gate */ 28028348SEric.Yu@Sun.COM if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 28030Sstevel@tonic-gate so_unix_close(so); 28040Sstevel@tonic-gate 28058348SEric.Yu@Sun.COM if (sti->sti_serv_type == T_COTS) 28060Sstevel@tonic-gate error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 28070Sstevel@tonic-gate } 28080Sstevel@tonic-gate if ((state_change & SS_CANTSENDMORE) && 28098348SEric.Yu@Sun.COM (sti->sti_serv_type == T_COTS_ORD)) { 28100Sstevel@tonic-gate /* Send an orderly release */ 28110Sstevel@tonic-gate ordrel_req.PRIM_type = T_ORDREL_REQ; 28120Sstevel@tonic-gate 28130Sstevel@tonic-gate mutex_exit(&so->so_lock); 28140Sstevel@tonic-gate mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 28158778SErik.Nordmark@Sun.COM 0, _ALLOC_SLEEP, cr); 28160Sstevel@tonic-gate /* 28170Sstevel@tonic-gate * Send down the T_ORDREL_REQ even if there is flow control. 28180Sstevel@tonic-gate * This prevents shutdown from blocking. 28190Sstevel@tonic-gate * Note that there is no T_OK_ACK for ordrel_req. 28200Sstevel@tonic-gate */ 28210Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 28225240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 28230Sstevel@tonic-gate mutex_enter(&so->so_lock); 28240Sstevel@tonic-gate if (error) { 28250Sstevel@tonic-gate eprintsoline(so, error); 28260Sstevel@tonic-gate goto done; 28270Sstevel@tonic-gate } 28280Sstevel@tonic-gate } 28290Sstevel@tonic-gate 28300Sstevel@tonic-gate done: 28310Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 28320Sstevel@tonic-gate mutex_exit(&so->so_lock); 28330Sstevel@tonic-gate return (error); 28340Sstevel@tonic-gate } 28350Sstevel@tonic-gate 28360Sstevel@tonic-gate /* 28370Sstevel@tonic-gate * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 28380Sstevel@tonic-gate * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 28390Sstevel@tonic-gate * that we have closed. 28400Sstevel@tonic-gate * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 28410Sstevel@tonic-gate * T_UNITDATA_REQ containing the same option. 28420Sstevel@tonic-gate * 28430Sstevel@tonic-gate * For SOCK_DGRAM half-connections (somebody connected to this end 28440Sstevel@tonic-gate * but this end is not connect) we don't know where to send any 28450Sstevel@tonic-gate * SO_UNIX_CLOSE. 28460Sstevel@tonic-gate * 28470Sstevel@tonic-gate * We have to ignore stream head errors just in case there has been 28480Sstevel@tonic-gate * a shutdown(output). 28490Sstevel@tonic-gate * Ignore any flow control to try to get the message more quickly to the peer. 28500Sstevel@tonic-gate * While locally ignoring flow control solves the problem when there 28510Sstevel@tonic-gate * is only the loopback transport on the stream it would not provide 28520Sstevel@tonic-gate * the correct AF_UNIX socket semantics when one or more modules have 28530Sstevel@tonic-gate * been pushed. 28540Sstevel@tonic-gate */ 28550Sstevel@tonic-gate void 28560Sstevel@tonic-gate so_unix_close(struct sonode *so) 28570Sstevel@tonic-gate { 28580Sstevel@tonic-gate int error; 28590Sstevel@tonic-gate struct T_opthdr toh; 28600Sstevel@tonic-gate mblk_t *mp; 28618348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 28620Sstevel@tonic-gate 28630Sstevel@tonic-gate ASSERT(MUTEX_HELD(&so->so_lock)); 28640Sstevel@tonic-gate 28650Sstevel@tonic-gate ASSERT(so->so_family == AF_UNIX); 28660Sstevel@tonic-gate 28670Sstevel@tonic-gate if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 28680Sstevel@tonic-gate (SS_ISCONNECTED|SS_ISBOUND)) 28690Sstevel@tonic-gate return; 28700Sstevel@tonic-gate 28710Sstevel@tonic-gate dprintso(so, 1, ("so_unix_close(%p) %s\n", 28727240Srh87107 (void *)so, pr_state(so->so_state, so->so_mode))); 28730Sstevel@tonic-gate 28740Sstevel@tonic-gate toh.level = SOL_SOCKET; 28750Sstevel@tonic-gate toh.name = SO_UNIX_CLOSE; 28760Sstevel@tonic-gate 28770Sstevel@tonic-gate /* zero length + header */ 28780Sstevel@tonic-gate toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 28790Sstevel@tonic-gate toh.status = 0; 28800Sstevel@tonic-gate 28810Sstevel@tonic-gate if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 28820Sstevel@tonic-gate struct T_optdata_req tdr; 28830Sstevel@tonic-gate 28840Sstevel@tonic-gate tdr.PRIM_type = T_OPTDATA_REQ; 28850Sstevel@tonic-gate tdr.DATA_flag = 0; 28860Sstevel@tonic-gate 28870Sstevel@tonic-gate tdr.OPT_length = (t_scalar_t)sizeof (toh); 28880Sstevel@tonic-gate tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 28890Sstevel@tonic-gate 28900Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 28910Sstevel@tonic-gate mp = soallocproto2(&tdr, sizeof (tdr), 28928778SErik.Nordmark@Sun.COM &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 28930Sstevel@tonic-gate } else { 28940Sstevel@tonic-gate struct T_unitdata_req tudr; 28950Sstevel@tonic-gate void *addr; 28960Sstevel@tonic-gate socklen_t addrlen; 28970Sstevel@tonic-gate void *src; 28980Sstevel@tonic-gate socklen_t srclen; 28990Sstevel@tonic-gate struct T_opthdr toh2; 29000Sstevel@tonic-gate t_scalar_t size; 29010Sstevel@tonic-gate 29020Sstevel@tonic-gate /* Connecteded DGRAM socket */ 29030Sstevel@tonic-gate 29040Sstevel@tonic-gate /* 29050Sstevel@tonic-gate * For AF_UNIX the destination address is translated to 29060Sstevel@tonic-gate * an internal name and the source address is passed as 29070Sstevel@tonic-gate * an option. 29080Sstevel@tonic-gate */ 29090Sstevel@tonic-gate /* 29100Sstevel@tonic-gate * Length and family checks. 29110Sstevel@tonic-gate */ 29128348SEric.Yu@Sun.COM error = so_addr_verify(so, sti->sti_faddr_sa, 29138348SEric.Yu@Sun.COM (t_uscalar_t)sti->sti_faddr_len); 29140Sstevel@tonic-gate if (error) { 29150Sstevel@tonic-gate eprintsoline(so, error); 29160Sstevel@tonic-gate return; 29170Sstevel@tonic-gate } 29188348SEric.Yu@Sun.COM if (sti->sti_faddr_noxlate) { 29190Sstevel@tonic-gate /* 29200Sstevel@tonic-gate * Already have a transport internal address. Do not 29210Sstevel@tonic-gate * pass any (transport internal) source address. 29220Sstevel@tonic-gate */ 29238348SEric.Yu@Sun.COM addr = sti->sti_faddr_sa; 29248348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_faddr_len; 29250Sstevel@tonic-gate src = NULL; 29260Sstevel@tonic-gate srclen = 0; 29270Sstevel@tonic-gate } else { 29280Sstevel@tonic-gate /* 29290Sstevel@tonic-gate * Pass the sockaddr_un source address as an option 29300Sstevel@tonic-gate * and translate the remote address. 29318348SEric.Yu@Sun.COM * Holding so_lock thus sti_laddr_sa can not change. 29320Sstevel@tonic-gate */ 29338348SEric.Yu@Sun.COM src = sti->sti_laddr_sa; 29348348SEric.Yu@Sun.COM srclen = (socklen_t)sti->sti_laddr_len; 29350Sstevel@tonic-gate dprintso(so, 1, 29365240Snordmark ("so_ux_close: srclen %d, src %p\n", 29375240Snordmark srclen, src)); 29380Sstevel@tonic-gate error = so_ux_addr_xlate(so, 29398348SEric.Yu@Sun.COM sti->sti_faddr_sa, 29408348SEric.Yu@Sun.COM (socklen_t)sti->sti_faddr_len, 0, 29415240Snordmark &addr, &addrlen); 29420Sstevel@tonic-gate if (error) { 29430Sstevel@tonic-gate eprintsoline(so, error); 29440Sstevel@tonic-gate return; 29450Sstevel@tonic-gate } 29460Sstevel@tonic-gate } 29470Sstevel@tonic-gate tudr.PRIM_type = T_UNITDATA_REQ; 29480Sstevel@tonic-gate tudr.DEST_length = addrlen; 29490Sstevel@tonic-gate tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 29500Sstevel@tonic-gate if (srclen == 0) { 29510Sstevel@tonic-gate tudr.OPT_length = (t_scalar_t)sizeof (toh); 29520Sstevel@tonic-gate tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 29535240Snordmark _TPI_ALIGN_TOPT(addrlen)); 29540Sstevel@tonic-gate 29550Sstevel@tonic-gate size = tudr.OPT_offset + tudr.OPT_length; 29560Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 29570Sstevel@tonic-gate mp = soallocproto2(&tudr, sizeof (tudr), 29588778SErik.Nordmark@Sun.COM addr, addrlen, size, _ALLOC_SLEEP, CRED()); 29590Sstevel@tonic-gate mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 29600Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 29610Sstevel@tonic-gate } else { 29620Sstevel@tonic-gate /* 29630Sstevel@tonic-gate * There is a AF_UNIX sockaddr_un to include as a 29640Sstevel@tonic-gate * source address option. 29650Sstevel@tonic-gate */ 29660Sstevel@tonic-gate tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 29670Sstevel@tonic-gate _TPI_ALIGN_TOPT(srclen)); 29680Sstevel@tonic-gate tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 29690Sstevel@tonic-gate _TPI_ALIGN_TOPT(addrlen)); 29700Sstevel@tonic-gate 29710Sstevel@tonic-gate toh2.level = SOL_SOCKET; 29720Sstevel@tonic-gate toh2.name = SO_SRCADDR; 29730Sstevel@tonic-gate toh2.len = (t_uscalar_t)(srclen + 29745240Snordmark sizeof (struct T_opthdr)); 29750Sstevel@tonic-gate toh2.status = 0; 29760Sstevel@tonic-gate 29770Sstevel@tonic-gate size = tudr.OPT_offset + tudr.OPT_length; 29780Sstevel@tonic-gate 29790Sstevel@tonic-gate /* NOTE: holding so_lock while sleeping */ 29800Sstevel@tonic-gate mp = soallocproto2(&tudr, sizeof (tudr), 29818778SErik.Nordmark@Sun.COM addr, addrlen, size, _ALLOC_SLEEP, CRED()); 29820Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 29830Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 29840Sstevel@tonic-gate soappendmsg(mp, &toh2, sizeof (toh2)); 29850Sstevel@tonic-gate soappendmsg(mp, src, srclen); 29860Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 29870Sstevel@tonic-gate } 29880Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 29890Sstevel@tonic-gate } 29900Sstevel@tonic-gate mutex_exit(&so->so_lock); 29910Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 29925240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 29930Sstevel@tonic-gate mutex_enter(&so->so_lock); 29940Sstevel@tonic-gate } 29950Sstevel@tonic-gate 29960Sstevel@tonic-gate /* 29970Sstevel@tonic-gate * Called by sotpi_recvmsg when reading a non-zero amount of data. 29980Sstevel@tonic-gate * In addition, the caller typically verifies that there is some 29990Sstevel@tonic-gate * potential state to clear by checking 30000Sstevel@tonic-gate * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 30010Sstevel@tonic-gate * before calling this routine. 30020Sstevel@tonic-gate * Note that such a check can be made without holding so_lock since 30030Sstevel@tonic-gate * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 30048348SEric.Yu@Sun.COM * decrements sti_oobsigcnt. 30050Sstevel@tonic-gate * 30060Sstevel@tonic-gate * When data is read *after* the point that all pending 30070Sstevel@tonic-gate * oob data has been consumed the oob indication is cleared. 30080Sstevel@tonic-gate * 30090Sstevel@tonic-gate * This logic keeps select/poll returning POLLRDBAND and 30100Sstevel@tonic-gate * SIOCATMARK returning true until we have read past 30110Sstevel@tonic-gate * the mark. 30120Sstevel@tonic-gate */ 30130Sstevel@tonic-gate static void 30140Sstevel@tonic-gate sorecv_update_oobstate(struct sonode *so) 30150Sstevel@tonic-gate { 30168348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 30178348SEric.Yu@Sun.COM 30180Sstevel@tonic-gate mutex_enter(&so->so_lock); 30190Sstevel@tonic-gate ASSERT(so_verify_oobstate(so)); 30200Sstevel@tonic-gate dprintso(so, 1, 30215240Snordmark ("sorecv_update_oobstate: counts %d/%d state %s\n", 30228348SEric.Yu@Sun.COM sti->sti_oobsigcnt, 30238348SEric.Yu@Sun.COM sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 30248348SEric.Yu@Sun.COM if (sti->sti_oobsigcnt == 0) { 30250Sstevel@tonic-gate /* No more pending oob indications */ 30260Sstevel@tonic-gate so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 30270Sstevel@tonic-gate freemsg(so->so_oobmsg); 30280Sstevel@tonic-gate so->so_oobmsg = NULL; 30290Sstevel@tonic-gate } 30300Sstevel@tonic-gate ASSERT(so_verify_oobstate(so)); 30310Sstevel@tonic-gate mutex_exit(&so->so_lock); 30320Sstevel@tonic-gate } 30330Sstevel@tonic-gate 30340Sstevel@tonic-gate /* 30350Sstevel@tonic-gate * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 30360Sstevel@tonic-gate */ 30370Sstevel@tonic-gate static int 30380Sstevel@tonic-gate nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 30390Sstevel@tonic-gate { 30408348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 30410Sstevel@tonic-gate int error = 0; 30420Sstevel@tonic-gate mblk_t *tmp = NULL; 30430Sstevel@tonic-gate mblk_t *pmp = NULL; 30448348SEric.Yu@Sun.COM mblk_t *nmp = sti->sti_nl7c_rcv_mp; 30450Sstevel@tonic-gate 30460Sstevel@tonic-gate ASSERT(nmp != NULL); 30470Sstevel@tonic-gate 30480Sstevel@tonic-gate while (nmp != NULL && uiop->uio_resid > 0) { 30490Sstevel@tonic-gate ssize_t n; 30500Sstevel@tonic-gate 30510Sstevel@tonic-gate if (DB_TYPE(nmp) == M_DATA) { 30520Sstevel@tonic-gate /* 30530Sstevel@tonic-gate * We have some data, uiomove up to resid bytes. 30540Sstevel@tonic-gate */ 30550Sstevel@tonic-gate n = MIN(MBLKL(nmp), uiop->uio_resid); 30560Sstevel@tonic-gate if (n > 0) 30570Sstevel@tonic-gate error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 30580Sstevel@tonic-gate nmp->b_rptr += n; 30590Sstevel@tonic-gate if (nmp->b_rptr == nmp->b_wptr) { 30600Sstevel@tonic-gate pmp = nmp; 30610Sstevel@tonic-gate nmp = nmp->b_cont; 30620Sstevel@tonic-gate } 30631974Sbrutus if (error) 30641974Sbrutus break; 30650Sstevel@tonic-gate } else { 30660Sstevel@tonic-gate /* 30670Sstevel@tonic-gate * We only handle data, save for caller to handle. 30680Sstevel@tonic-gate */ 30690Sstevel@tonic-gate if (pmp != NULL) { 30700Sstevel@tonic-gate pmp->b_cont = nmp->b_cont; 30710Sstevel@tonic-gate } 30720Sstevel@tonic-gate nmp->b_cont = NULL; 30730Sstevel@tonic-gate if (*rmp == NULL) { 30740Sstevel@tonic-gate *rmp = nmp; 30750Sstevel@tonic-gate } else { 30761974Sbrutus tmp->b_cont = nmp; 30770Sstevel@tonic-gate } 30780Sstevel@tonic-gate nmp = nmp->b_cont; 30790Sstevel@tonic-gate tmp = nmp; 30800Sstevel@tonic-gate } 30810Sstevel@tonic-gate } 30820Sstevel@tonic-gate if (pmp != NULL) { 30830Sstevel@tonic-gate /* Free any mblk_t(s) which we have consumed */ 30840Sstevel@tonic-gate pmp->b_cont = NULL; 30858348SEric.Yu@Sun.COM freemsg(sti->sti_nl7c_rcv_mp); 30868348SEric.Yu@Sun.COM } 30878348SEric.Yu@Sun.COM if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 30881974Sbrutus /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 30891974Sbrutus if (error == 0) { 30908348SEric.Yu@Sun.COM rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 30911974Sbrutus 30921974Sbrutus error = p->r_v.r_v2; 30931974Sbrutus p->r_v.r_v2 = 0; 30941974Sbrutus } 30958348SEric.Yu@Sun.COM rp->r_vals = sti->sti_nl7c_rcv_rval; 30968348SEric.Yu@Sun.COM sti->sti_nl7c_rcv_rval = 0; 30970Sstevel@tonic-gate } else { 30980Sstevel@tonic-gate /* More mblk_t(s) to process so no rval to return */ 30990Sstevel@tonic-gate rp->r_vals = 0; 31000Sstevel@tonic-gate } 31010Sstevel@tonic-gate return (error); 31020Sstevel@tonic-gate } 31030Sstevel@tonic-gate /* 31040Sstevel@tonic-gate * Receive the next message on the queue. 31050Sstevel@tonic-gate * If msg_controllen is non-zero when called the caller is interested in 31060Sstevel@tonic-gate * any received control info (options). 31070Sstevel@tonic-gate * If msg_namelen is non-zero when called the caller is interested in 31080Sstevel@tonic-gate * any received source address. 31090Sstevel@tonic-gate * The routine returns with msg_control and msg_name pointing to 31100Sstevel@tonic-gate * kmem_alloc'ed memory which the caller has to free. 31110Sstevel@tonic-gate */ 31128348SEric.Yu@Sun.COM /* ARGSUSED */ 31130Sstevel@tonic-gate int 31148348SEric.Yu@Sun.COM sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 31158348SEric.Yu@Sun.COM struct cred *cr) 31160Sstevel@tonic-gate { 31170Sstevel@tonic-gate union T_primitives *tpr; 31180Sstevel@tonic-gate mblk_t *mp; 31190Sstevel@tonic-gate uchar_t pri; 31200Sstevel@tonic-gate int pflag, opflag; 31210Sstevel@tonic-gate void *control; 31220Sstevel@tonic-gate t_uscalar_t controllen; 31230Sstevel@tonic-gate t_uscalar_t namelen; 31240Sstevel@tonic-gate int so_state = so->so_state; /* Snapshot */ 31250Sstevel@tonic-gate ssize_t saved_resid; 31260Sstevel@tonic-gate rval_t rval; 31270Sstevel@tonic-gate int flags; 31280Sstevel@tonic-gate clock_t timout; 31296707Sbrutus int error = 0; 31308348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 31310Sstevel@tonic-gate 31320Sstevel@tonic-gate flags = msg->msg_flags; 31330Sstevel@tonic-gate msg->msg_flags = 0; 31340Sstevel@tonic-gate 31350Sstevel@tonic-gate dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 31367240Srh87107 (void *)so, (void *)msg, flags, 31375240Snordmark pr_state(so->so_state, so->so_mode), so->so_error)); 31380Sstevel@tonic-gate 31398348SEric.Yu@Sun.COM if (so->so_version == SOV_STREAM) { 31408348SEric.Yu@Sun.COM so_update_attrs(so, SOACC); 31418348SEric.Yu@Sun.COM /* The imaginary "sockmod" has been popped - act as a stream */ 31428348SEric.Yu@Sun.COM return (strread(SOTOV(so), uiop, cr)); 31438348SEric.Yu@Sun.COM } 31448348SEric.Yu@Sun.COM 31450Sstevel@tonic-gate /* 31460Sstevel@tonic-gate * If we are not connected because we have never been connected 31470Sstevel@tonic-gate * we return ENOTCONN. If we have been connected (but are no longer 31480Sstevel@tonic-gate * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 31490Sstevel@tonic-gate * the EOF. 31500Sstevel@tonic-gate * 31510Sstevel@tonic-gate * An alternative would be to post an ENOTCONN error in stream head 31520Sstevel@tonic-gate * (read+write) and clear it when we're connected. However, that error 31530Sstevel@tonic-gate * would cause incorrect poll/select behavior! 31540Sstevel@tonic-gate */ 31550Sstevel@tonic-gate if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 31560Sstevel@tonic-gate (so->so_mode & SM_CONNREQUIRED)) { 31570Sstevel@tonic-gate return (ENOTCONN); 31580Sstevel@tonic-gate } 31590Sstevel@tonic-gate 31600Sstevel@tonic-gate /* 31610Sstevel@tonic-gate * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 31620Sstevel@tonic-gate * after checking that the read queue is empty) and returns zero. 31630Sstevel@tonic-gate * This implementation will sleep (in kstrgetmsg) even if uio_resid 31640Sstevel@tonic-gate * is zero. 31650Sstevel@tonic-gate */ 31660Sstevel@tonic-gate 31670Sstevel@tonic-gate if (flags & MSG_OOB) { 31680Sstevel@tonic-gate /* Check that the transport supports OOB */ 31690Sstevel@tonic-gate if (!(so->so_mode & SM_EXDATA)) 31700Sstevel@tonic-gate return (EOPNOTSUPP); 31718348SEric.Yu@Sun.COM so_update_attrs(so, SOACC); 31728348SEric.Yu@Sun.COM return (sorecvoob(so, msg, uiop, flags, 31738348SEric.Yu@Sun.COM (so->so_options & SO_OOBINLINE))); 31748348SEric.Yu@Sun.COM } 31758348SEric.Yu@Sun.COM 31768348SEric.Yu@Sun.COM so_update_attrs(so, SOACC); 31770Sstevel@tonic-gate 31780Sstevel@tonic-gate /* 31790Sstevel@tonic-gate * Set msg_controllen and msg_namelen to zero here to make it 31800Sstevel@tonic-gate * simpler in the cases that no control or name is returned. 31810Sstevel@tonic-gate */ 31820Sstevel@tonic-gate controllen = msg->msg_controllen; 31830Sstevel@tonic-gate namelen = msg->msg_namelen; 31840Sstevel@tonic-gate msg->msg_controllen = 0; 31850Sstevel@tonic-gate msg->msg_namelen = 0; 31860Sstevel@tonic-gate 31870Sstevel@tonic-gate dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 31885240Snordmark namelen, controllen)); 31890Sstevel@tonic-gate 31901974Sbrutus mutex_enter(&so->so_lock); 31910Sstevel@tonic-gate /* 31920Sstevel@tonic-gate * If an NL7C enabled socket and not waiting for write data. 31930Sstevel@tonic-gate */ 31948348SEric.Yu@Sun.COM if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 31950Sstevel@tonic-gate NL7C_ENABLED) { 31968348SEric.Yu@Sun.COM if (sti->sti_nl7c_uri) { 31971974Sbrutus /* Close uri processing for a previous request */ 31980Sstevel@tonic-gate nl7c_close(so); 31990Sstevel@tonic-gate } 32008348SEric.Yu@Sun.COM if ((so_state & SS_CANTRCVMORE) && 32018348SEric.Yu@Sun.COM sti->sti_nl7c_rcv_mp == NULL) { 32021974Sbrutus /* Nothing to process, EOF */ 32031974Sbrutus mutex_exit(&so->so_lock); 32041974Sbrutus return (0); 32058348SEric.Yu@Sun.COM } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 32061974Sbrutus /* Persistent NL7C socket, try to process request */ 32071974Sbrutus boolean_t ret; 32081974Sbrutus 32091974Sbrutus ret = nl7c_process(so, 32101974Sbrutus (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 32118348SEric.Yu@Sun.COM rval.r_vals = sti->sti_nl7c_rcv_rval; 32121974Sbrutus error = rval.r_v.r_v2; 32131974Sbrutus if (error) { 32141974Sbrutus /* Error of some sort, return it */ 32151974Sbrutus mutex_exit(&so->so_lock); 32161974Sbrutus return (error); 32171974Sbrutus } 32188348SEric.Yu@Sun.COM if (sti->sti_nl7c_flags && 32198348SEric.Yu@Sun.COM ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 32201974Sbrutus /* 32211974Sbrutus * Still an NL7C socket and no data 32221974Sbrutus * to pass up to the caller. 32231974Sbrutus */ 32241974Sbrutus mutex_exit(&so->so_lock); 32251974Sbrutus if (ret) { 32261974Sbrutus /* EOF */ 32271974Sbrutus return (0); 32281974Sbrutus } else { 32291974Sbrutus /* Need more data */ 32301974Sbrutus return (EAGAIN); 32311974Sbrutus } 32321974Sbrutus } 32331974Sbrutus } else { 32340Sstevel@tonic-gate /* 32351974Sbrutus * Not persistent so no further NL7C processing. 32360Sstevel@tonic-gate */ 32378348SEric.Yu@Sun.COM sti->sti_nl7c_flags = 0; 32380Sstevel@tonic-gate } 32390Sstevel@tonic-gate } 32400Sstevel@tonic-gate /* 32410Sstevel@tonic-gate * Only one reader is allowed at any given time. This is needed 32420Sstevel@tonic-gate * for T_EXDATA handling and, in the future, MSG_WAITALL. 32430Sstevel@tonic-gate * 32440Sstevel@tonic-gate * This is slightly different that BSD behavior in that it fails with 32450Sstevel@tonic-gate * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 32460Sstevel@tonic-gate * is single-threaded using sblock(), which is dropped while waiting 32470Sstevel@tonic-gate * for data to appear. The difference shows up e.g. if one 32480Sstevel@tonic-gate * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 32490Sstevel@tonic-gate * does use nonblocking io and different threads are reading each 32500Sstevel@tonic-gate * file descriptor. In BSD there would never be an EWOULDBLOCK error 32510Sstevel@tonic-gate * in this case as long as the read queue doesn't get empty. 32520Sstevel@tonic-gate * In this implementation the thread using nonblocking io can 32530Sstevel@tonic-gate * get an EWOULDBLOCK error due to the blocking thread executing 32540Sstevel@tonic-gate * e.g. in the uiomove in kstrgetmsg. 32550Sstevel@tonic-gate * This difference is not believed to be significant. 32560Sstevel@tonic-gate */ 32573749Sethindra /* Set SOREADLOCKED */ 32583749Sethindra error = so_lock_read_intr(so, 32593749Sethindra uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 32600Sstevel@tonic-gate mutex_exit(&so->so_lock); 32610Sstevel@tonic-gate if (error) 32620Sstevel@tonic-gate return (error); 32630Sstevel@tonic-gate 32640Sstevel@tonic-gate /* 32650Sstevel@tonic-gate * Tell kstrgetmsg to not inspect the stream head errors until all 32660Sstevel@tonic-gate * queued data has been consumed. 32670Sstevel@tonic-gate * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 32680Sstevel@tonic-gate * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 32690Sstevel@tonic-gate * 32700Sstevel@tonic-gate * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 32710Sstevel@tonic-gate * to T_OPTDATA_IND that do not contain any user-visible control msg. 32720Sstevel@tonic-gate * Note that MSG_WAITALL set with MSG_PEEK is a noop. 32730Sstevel@tonic-gate */ 32740Sstevel@tonic-gate pflag = MSG_ANY | MSG_DELAYERROR; 32750Sstevel@tonic-gate if (flags & MSG_PEEK) { 32760Sstevel@tonic-gate pflag |= MSG_IPEEK; 32770Sstevel@tonic-gate flags &= ~MSG_WAITALL; 32780Sstevel@tonic-gate } 32790Sstevel@tonic-gate if (so->so_mode & SM_ATOMIC) 32800Sstevel@tonic-gate pflag |= MSG_DISCARDTAIL; 32810Sstevel@tonic-gate 32820Sstevel@tonic-gate if (flags & MSG_DONTWAIT) 32830Sstevel@tonic-gate timout = 0; 32840Sstevel@tonic-gate else 32850Sstevel@tonic-gate timout = -1; 32860Sstevel@tonic-gate opflag = pflag; 32870Sstevel@tonic-gate retry: 32880Sstevel@tonic-gate saved_resid = uiop->uio_resid; 32890Sstevel@tonic-gate pri = 0; 32900Sstevel@tonic-gate mp = NULL; 32918348SEric.Yu@Sun.COM if (sti->sti_nl7c_rcv_mp != NULL) { 32921974Sbrutus /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 32930Sstevel@tonic-gate error = nl7c_sorecv(so, &mp, uiop, &rval); 32940Sstevel@tonic-gate } else { 32950Sstevel@tonic-gate error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 32960Sstevel@tonic-gate timout, &rval); 32970Sstevel@tonic-gate } 32988348SEric.Yu@Sun.COM if (error != 0) { 32998348SEric.Yu@Sun.COM /* kstrgetmsg returns ETIME when timeout expires */ 33008348SEric.Yu@Sun.COM if (error == ETIME) 33018348SEric.Yu@Sun.COM error = EWOULDBLOCK; 33026707Sbrutus goto out; 33030Sstevel@tonic-gate } 33040Sstevel@tonic-gate /* 33050Sstevel@tonic-gate * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 33060Sstevel@tonic-gate * For non-datagrams MOREDATA is used to set MSG_EOR. 33070Sstevel@tonic-gate */ 33080Sstevel@tonic-gate ASSERT(!(rval.r_val1 & MORECTL)); 33090Sstevel@tonic-gate if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 33100Sstevel@tonic-gate msg->msg_flags |= MSG_TRUNC; 33110Sstevel@tonic-gate 33120Sstevel@tonic-gate if (mp == NULL) { 33130Sstevel@tonic-gate dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 33140Sstevel@tonic-gate /* 33150Sstevel@tonic-gate * 4.3BSD and 4.4BSD clears the mark when peeking across it. 33160Sstevel@tonic-gate * The draft Posix socket spec states that the mark should 33170Sstevel@tonic-gate * not be cleared when peeking. We follow the latter. 33180Sstevel@tonic-gate */ 33190Sstevel@tonic-gate if ((so->so_state & 33200Sstevel@tonic-gate (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 33210Sstevel@tonic-gate (uiop->uio_resid != saved_resid) && 33220Sstevel@tonic-gate !(flags & MSG_PEEK)) { 33230Sstevel@tonic-gate sorecv_update_oobstate(so); 33240Sstevel@tonic-gate } 33250Sstevel@tonic-gate 33260Sstevel@tonic-gate mutex_enter(&so->so_lock); 33270Sstevel@tonic-gate /* Set MSG_EOR based on MOREDATA */ 33280Sstevel@tonic-gate if (!(rval.r_val1 & MOREDATA)) { 33290Sstevel@tonic-gate if (so->so_state & SS_SAVEDEOR) { 33300Sstevel@tonic-gate msg->msg_flags |= MSG_EOR; 33310Sstevel@tonic-gate so->so_state &= ~SS_SAVEDEOR; 33320Sstevel@tonic-gate } 33330Sstevel@tonic-gate } 33340Sstevel@tonic-gate /* 33350Sstevel@tonic-gate * If some data was received (i.e. not EOF) and the 33360Sstevel@tonic-gate * read/recv* has not been satisfied wait for some more. 33370Sstevel@tonic-gate */ 33380Sstevel@tonic-gate if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 33390Sstevel@tonic-gate uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 33400Sstevel@tonic-gate mutex_exit(&so->so_lock); 33410Sstevel@tonic-gate pflag = opflag | MSG_NOMARK; 33420Sstevel@tonic-gate goto retry; 33430Sstevel@tonic-gate } 33446707Sbrutus goto out_locked; 33450Sstevel@tonic-gate } 33460Sstevel@tonic-gate 33470Sstevel@tonic-gate /* strsock_proto has already verified length and alignment */ 33480Sstevel@tonic-gate tpr = (union T_primitives *)mp->b_rptr; 33490Sstevel@tonic-gate dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 33500Sstevel@tonic-gate 33510Sstevel@tonic-gate switch (tpr->type) { 33520Sstevel@tonic-gate case T_DATA_IND: { 33530Sstevel@tonic-gate if ((so->so_state & 33540Sstevel@tonic-gate (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 33550Sstevel@tonic-gate (uiop->uio_resid != saved_resid) && 33560Sstevel@tonic-gate !(flags & MSG_PEEK)) { 33570Sstevel@tonic-gate sorecv_update_oobstate(so); 33580Sstevel@tonic-gate } 33590Sstevel@tonic-gate 33600Sstevel@tonic-gate /* 33610Sstevel@tonic-gate * Set msg_flags to MSG_EOR based on 33620Sstevel@tonic-gate * MORE_flag and MOREDATA. 33630Sstevel@tonic-gate */ 33640Sstevel@tonic-gate mutex_enter(&so->so_lock); 33650Sstevel@tonic-gate so->so_state &= ~SS_SAVEDEOR; 33660Sstevel@tonic-gate if (!(tpr->data_ind.MORE_flag & 1)) { 33670Sstevel@tonic-gate if (!(rval.r_val1 & MOREDATA)) 33680Sstevel@tonic-gate msg->msg_flags |= MSG_EOR; 33690Sstevel@tonic-gate else 33700Sstevel@tonic-gate so->so_state |= SS_SAVEDEOR; 33710Sstevel@tonic-gate } 33720Sstevel@tonic-gate freemsg(mp); 33730Sstevel@tonic-gate /* 33740Sstevel@tonic-gate * If some data was received (i.e. not EOF) and the 33750Sstevel@tonic-gate * read/recv* has not been satisfied wait for some more. 33760Sstevel@tonic-gate */ 33770Sstevel@tonic-gate if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 33780Sstevel@tonic-gate uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 33790Sstevel@tonic-gate mutex_exit(&so->so_lock); 33800Sstevel@tonic-gate pflag = opflag | MSG_NOMARK; 33810Sstevel@tonic-gate goto retry; 33820Sstevel@tonic-gate } 33836707Sbrutus goto out_locked; 33840Sstevel@tonic-gate } 33850Sstevel@tonic-gate case T_UNITDATA_IND: { 33860Sstevel@tonic-gate void *addr; 33870Sstevel@tonic-gate t_uscalar_t addrlen; 33880Sstevel@tonic-gate void *abuf; 33890Sstevel@tonic-gate t_uscalar_t optlen; 33900Sstevel@tonic-gate void *opt; 33910Sstevel@tonic-gate 33920Sstevel@tonic-gate if ((so->so_state & 33930Sstevel@tonic-gate (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 33940Sstevel@tonic-gate (uiop->uio_resid != saved_resid) && 33950Sstevel@tonic-gate !(flags & MSG_PEEK)) { 33960Sstevel@tonic-gate sorecv_update_oobstate(so); 33970Sstevel@tonic-gate } 33980Sstevel@tonic-gate 33990Sstevel@tonic-gate if (namelen != 0) { 34000Sstevel@tonic-gate /* Caller wants source address */ 34010Sstevel@tonic-gate addrlen = tpr->unitdata_ind.SRC_length; 34020Sstevel@tonic-gate addr = sogetoff(mp, 34035240Snordmark tpr->unitdata_ind.SRC_offset, 34045240Snordmark addrlen, 1); 34050Sstevel@tonic-gate if (addr == NULL) { 34060Sstevel@tonic-gate freemsg(mp); 34070Sstevel@tonic-gate error = EPROTO; 34080Sstevel@tonic-gate eprintsoline(so, error); 34096707Sbrutus goto out; 34100Sstevel@tonic-gate } 34110Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 34120Sstevel@tonic-gate /* 34130Sstevel@tonic-gate * Can not use the transport level address. 34140Sstevel@tonic-gate * If there is a SO_SRCADDR option carrying 34150Sstevel@tonic-gate * the socket level address it will be 34160Sstevel@tonic-gate * extracted below. 34170Sstevel@tonic-gate */ 34180Sstevel@tonic-gate addr = NULL; 34190Sstevel@tonic-gate addrlen = 0; 34200Sstevel@tonic-gate } 34210Sstevel@tonic-gate } 34220Sstevel@tonic-gate optlen = tpr->unitdata_ind.OPT_length; 34230Sstevel@tonic-gate if (optlen != 0) { 34240Sstevel@tonic-gate t_uscalar_t ncontrollen; 34250Sstevel@tonic-gate 34260Sstevel@tonic-gate /* 34270Sstevel@tonic-gate * Extract any source address option. 34280Sstevel@tonic-gate * Determine how large cmsg buffer is needed. 34290Sstevel@tonic-gate */ 34300Sstevel@tonic-gate opt = sogetoff(mp, 34315240Snordmark tpr->unitdata_ind.OPT_offset, 34325240Snordmark optlen, __TPI_ALIGN_SIZE); 34330Sstevel@tonic-gate 34340Sstevel@tonic-gate if (opt == NULL) { 34350Sstevel@tonic-gate freemsg(mp); 34360Sstevel@tonic-gate error = EPROTO; 34370Sstevel@tonic-gate eprintsoline(so, error); 34386707Sbrutus goto out; 34390Sstevel@tonic-gate } 34400Sstevel@tonic-gate if (so->so_family == AF_UNIX) 34410Sstevel@tonic-gate so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 34420Sstevel@tonic-gate ncontrollen = so_cmsglen(mp, opt, optlen, 34435240Snordmark !(flags & MSG_XPG4_2)); 34440Sstevel@tonic-gate if (controllen != 0) 34450Sstevel@tonic-gate controllen = ncontrollen; 34460Sstevel@tonic-gate else if (ncontrollen != 0) 34470Sstevel@tonic-gate msg->msg_flags |= MSG_CTRUNC; 34480Sstevel@tonic-gate } else { 34490Sstevel@tonic-gate controllen = 0; 34500Sstevel@tonic-gate } 34510Sstevel@tonic-gate 34520Sstevel@tonic-gate if (namelen != 0) { 34530Sstevel@tonic-gate /* 34540Sstevel@tonic-gate * Return address to caller. 34550Sstevel@tonic-gate * Caller handles truncation if length 34560Sstevel@tonic-gate * exceeds msg_namelen. 34570Sstevel@tonic-gate * NOTE: AF_UNIX NUL termination is ensured by 34580Sstevel@tonic-gate * the sender's copyin_name(). 34590Sstevel@tonic-gate */ 34600Sstevel@tonic-gate abuf = kmem_alloc(addrlen, KM_SLEEP); 34610Sstevel@tonic-gate 34620Sstevel@tonic-gate bcopy(addr, abuf, addrlen); 34630Sstevel@tonic-gate msg->msg_name = abuf; 34640Sstevel@tonic-gate msg->msg_namelen = addrlen; 34650Sstevel@tonic-gate } 34660Sstevel@tonic-gate 34670Sstevel@tonic-gate if (controllen != 0) { 34680Sstevel@tonic-gate /* 34690Sstevel@tonic-gate * Return control msg to caller. 34700Sstevel@tonic-gate * Caller handles truncation if length 34710Sstevel@tonic-gate * exceeds msg_controllen. 34720Sstevel@tonic-gate */ 34734420Samehta control = kmem_zalloc(controllen, KM_SLEEP); 34740Sstevel@tonic-gate 34750Sstevel@tonic-gate error = so_opt2cmsg(mp, opt, optlen, 34765240Snordmark !(flags & MSG_XPG4_2), 34775240Snordmark control, controllen); 34780Sstevel@tonic-gate if (error) { 34790Sstevel@tonic-gate freemsg(mp); 34800Sstevel@tonic-gate if (msg->msg_namelen != 0) 34810Sstevel@tonic-gate kmem_free(msg->msg_name, 34825240Snordmark msg->msg_namelen); 34830Sstevel@tonic-gate kmem_free(control, controllen); 34840Sstevel@tonic-gate eprintsoline(so, error); 34856707Sbrutus goto out; 34860Sstevel@tonic-gate } 34870Sstevel@tonic-gate msg->msg_control = control; 34880Sstevel@tonic-gate msg->msg_controllen = controllen; 34890Sstevel@tonic-gate } 34900Sstevel@tonic-gate 34910Sstevel@tonic-gate freemsg(mp); 34926707Sbrutus goto out; 34930Sstevel@tonic-gate } 34940Sstevel@tonic-gate case T_OPTDATA_IND: { 34950Sstevel@tonic-gate struct T_optdata_req *tdr; 34960Sstevel@tonic-gate void *opt; 34970Sstevel@tonic-gate t_uscalar_t optlen; 34980Sstevel@tonic-gate 34990Sstevel@tonic-gate if ((so->so_state & 35000Sstevel@tonic-gate (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 35010Sstevel@tonic-gate (uiop->uio_resid != saved_resid) && 35020Sstevel@tonic-gate !(flags & MSG_PEEK)) { 35030Sstevel@tonic-gate sorecv_update_oobstate(so); 35040Sstevel@tonic-gate } 35050Sstevel@tonic-gate 35060Sstevel@tonic-gate tdr = (struct T_optdata_req *)mp->b_rptr; 35070Sstevel@tonic-gate optlen = tdr->OPT_length; 35080Sstevel@tonic-gate if (optlen != 0) { 35090Sstevel@tonic-gate t_uscalar_t ncontrollen; 35100Sstevel@tonic-gate /* 35110Sstevel@tonic-gate * Determine how large cmsg buffer is needed. 35120Sstevel@tonic-gate */ 35130Sstevel@tonic-gate opt = sogetoff(mp, 35145240Snordmark tpr->optdata_ind.OPT_offset, 35155240Snordmark optlen, __TPI_ALIGN_SIZE); 35160Sstevel@tonic-gate 35170Sstevel@tonic-gate if (opt == NULL) { 35180Sstevel@tonic-gate freemsg(mp); 35190Sstevel@tonic-gate error = EPROTO; 35200Sstevel@tonic-gate eprintsoline(so, error); 35216707Sbrutus goto out; 35220Sstevel@tonic-gate } 35230Sstevel@tonic-gate 35240Sstevel@tonic-gate ncontrollen = so_cmsglen(mp, opt, optlen, 35255240Snordmark !(flags & MSG_XPG4_2)); 35260Sstevel@tonic-gate if (controllen != 0) 35270Sstevel@tonic-gate controllen = ncontrollen; 35280Sstevel@tonic-gate else if (ncontrollen != 0) 35290Sstevel@tonic-gate msg->msg_flags |= MSG_CTRUNC; 35300Sstevel@tonic-gate } else { 35310Sstevel@tonic-gate controllen = 0; 35320Sstevel@tonic-gate } 35330Sstevel@tonic-gate 35340Sstevel@tonic-gate if (controllen != 0) { 35350Sstevel@tonic-gate /* 35360Sstevel@tonic-gate * Return control msg to caller. 35370Sstevel@tonic-gate * Caller handles truncation if length 35380Sstevel@tonic-gate * exceeds msg_controllen. 35390Sstevel@tonic-gate */ 35404420Samehta control = kmem_zalloc(controllen, KM_SLEEP); 35410Sstevel@tonic-gate 35420Sstevel@tonic-gate error = so_opt2cmsg(mp, opt, optlen, 35435240Snordmark !(flags & MSG_XPG4_2), 35445240Snordmark control, controllen); 35450Sstevel@tonic-gate if (error) { 35460Sstevel@tonic-gate freemsg(mp); 35470Sstevel@tonic-gate kmem_free(control, controllen); 35480Sstevel@tonic-gate eprintsoline(so, error); 35496707Sbrutus goto out; 35500Sstevel@tonic-gate } 35510Sstevel@tonic-gate msg->msg_control = control; 35520Sstevel@tonic-gate msg->msg_controllen = controllen; 35530Sstevel@tonic-gate } 35540Sstevel@tonic-gate 35550Sstevel@tonic-gate /* 35560Sstevel@tonic-gate * Set msg_flags to MSG_EOR based on 35570Sstevel@tonic-gate * DATA_flag and MOREDATA. 35580Sstevel@tonic-gate */ 35590Sstevel@tonic-gate mutex_enter(&so->so_lock); 35600Sstevel@tonic-gate so->so_state &= ~SS_SAVEDEOR; 35610Sstevel@tonic-gate if (!(tpr->data_ind.MORE_flag & 1)) { 35620Sstevel@tonic-gate if (!(rval.r_val1 & MOREDATA)) 35630Sstevel@tonic-gate msg->msg_flags |= MSG_EOR; 35640Sstevel@tonic-gate else 35650Sstevel@tonic-gate so->so_state |= SS_SAVEDEOR; 35660Sstevel@tonic-gate } 35670Sstevel@tonic-gate freemsg(mp); 35680Sstevel@tonic-gate /* 35690Sstevel@tonic-gate * If some data was received (i.e. not EOF) and the 35700Sstevel@tonic-gate * read/recv* has not been satisfied wait for some more. 35710Sstevel@tonic-gate * Not possible to wait if control info was received. 35720Sstevel@tonic-gate */ 35730Sstevel@tonic-gate if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 35740Sstevel@tonic-gate controllen == 0 && 35750Sstevel@tonic-gate uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 35760Sstevel@tonic-gate mutex_exit(&so->so_lock); 35770Sstevel@tonic-gate pflag = opflag | MSG_NOMARK; 35780Sstevel@tonic-gate goto retry; 35790Sstevel@tonic-gate } 35806707Sbrutus goto out_locked; 35810Sstevel@tonic-gate } 35820Sstevel@tonic-gate case T_EXDATA_IND: { 35830Sstevel@tonic-gate dprintso(so, 1, 35845240Snordmark ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 35855240Snordmark "state %s\n", 35868348SEric.Yu@Sun.COM sti->sti_oobsigcnt, sti->sti_oobcnt, 35875240Snordmark saved_resid - uiop->uio_resid, 35885240Snordmark pr_state(so->so_state, so->so_mode))); 35890Sstevel@tonic-gate /* 35900Sstevel@tonic-gate * kstrgetmsg handles MSGMARK so there is nothing to 35910Sstevel@tonic-gate * inspect in the T_EXDATA_IND. 35920Sstevel@tonic-gate * strsock_proto makes the stream head queue the T_EXDATA_IND 35930Sstevel@tonic-gate * as a separate message with no M_DATA component. Furthermore, 35940Sstevel@tonic-gate * the stream head does not consolidate M_DATA messages onto 35950Sstevel@tonic-gate * an MSGMARK'ed message ensuring that the T_EXDATA_IND 35960Sstevel@tonic-gate * remains a message by itself. This is needed since MSGMARK 35970Sstevel@tonic-gate * marks both the whole message as well as the last byte 35980Sstevel@tonic-gate * of the message. 35990Sstevel@tonic-gate */ 36000Sstevel@tonic-gate freemsg(mp); 36010Sstevel@tonic-gate ASSERT(uiop->uio_resid == saved_resid); /* No data */ 36020Sstevel@tonic-gate if (flags & MSG_PEEK) { 36030Sstevel@tonic-gate /* 36040Sstevel@tonic-gate * Even though we are peeking we consume the 36050Sstevel@tonic-gate * T_EXDATA_IND thereby moving the mark information 36060Sstevel@tonic-gate * to SS_RCVATMARK. Then the oob code below will 36070Sstevel@tonic-gate * retry the peeking kstrgetmsg. 36080Sstevel@tonic-gate * Note that the stream head read queue is 36090Sstevel@tonic-gate * never flushed without holding SOREADLOCKED 36100Sstevel@tonic-gate * thus the T_EXDATA_IND can not disappear 36110Sstevel@tonic-gate * underneath us. 36120Sstevel@tonic-gate */ 36130Sstevel@tonic-gate dprintso(so, 1, 36145240Snordmark ("sotpi_recvmsg: consume EXDATA_IND " 36155240Snordmark "counts %d/%d state %s\n", 36168348SEric.Yu@Sun.COM sti->sti_oobsigcnt, 36178348SEric.Yu@Sun.COM sti->sti_oobcnt, 36185240Snordmark pr_state(so->so_state, so->so_mode))); 36190Sstevel@tonic-gate 36200Sstevel@tonic-gate pflag = MSG_ANY | MSG_DELAYERROR; 36210Sstevel@tonic-gate if (so->so_mode & SM_ATOMIC) 36220Sstevel@tonic-gate pflag |= MSG_DISCARDTAIL; 36230Sstevel@tonic-gate 36240Sstevel@tonic-gate pri = 0; 36250Sstevel@tonic-gate mp = NULL; 36260Sstevel@tonic-gate 36270Sstevel@tonic-gate error = kstrgetmsg(SOTOV(so), &mp, uiop, 36285240Snordmark &pri, &pflag, (clock_t)-1, &rval); 36290Sstevel@tonic-gate ASSERT(uiop->uio_resid == saved_resid); 36300Sstevel@tonic-gate 36310Sstevel@tonic-gate if (error) { 36320Sstevel@tonic-gate #ifdef SOCK_DEBUG 36330Sstevel@tonic-gate if (error != EWOULDBLOCK && error != EINTR) { 36340Sstevel@tonic-gate eprintsoline(so, error); 36350Sstevel@tonic-gate } 36360Sstevel@tonic-gate #endif /* SOCK_DEBUG */ 36376707Sbrutus goto out; 36380Sstevel@tonic-gate } 36390Sstevel@tonic-gate ASSERT(mp); 36400Sstevel@tonic-gate tpr = (union T_primitives *)mp->b_rptr; 36410Sstevel@tonic-gate ASSERT(tpr->type == T_EXDATA_IND); 36420Sstevel@tonic-gate freemsg(mp); 36430Sstevel@tonic-gate } /* end "if (flags & MSG_PEEK)" */ 36440Sstevel@tonic-gate 36450Sstevel@tonic-gate /* 36460Sstevel@tonic-gate * Decrement the number of queued and pending oob. 36470Sstevel@tonic-gate * 36480Sstevel@tonic-gate * SS_RCVATMARK is cleared when we read past a mark. 36490Sstevel@tonic-gate * SS_HAVEOOBDATA is cleared when we've read past the 36500Sstevel@tonic-gate * last mark. 36510Sstevel@tonic-gate * SS_OOBPEND is cleared if we've read past the last 36520Sstevel@tonic-gate * mark and no (new) SIGURG has been posted. 36530Sstevel@tonic-gate */ 36540Sstevel@tonic-gate mutex_enter(&so->so_lock); 36550Sstevel@tonic-gate ASSERT(so_verify_oobstate(so)); 36568348SEric.Yu@Sun.COM ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 36578348SEric.Yu@Sun.COM ASSERT(sti->sti_oobsigcnt > 0); 36588348SEric.Yu@Sun.COM sti->sti_oobsigcnt--; 36598348SEric.Yu@Sun.COM ASSERT(sti->sti_oobcnt > 0); 36608348SEric.Yu@Sun.COM sti->sti_oobcnt--; 36610Sstevel@tonic-gate /* 36620Sstevel@tonic-gate * Since the T_EXDATA_IND has been removed from the stream 36630Sstevel@tonic-gate * head, but we have not read data past the mark, 36640Sstevel@tonic-gate * sockfs needs to track that the socket is still at the mark. 36650Sstevel@tonic-gate * 36660Sstevel@tonic-gate * Since no data was received call kstrgetmsg again to wait 36670Sstevel@tonic-gate * for data. 36680Sstevel@tonic-gate */ 36690Sstevel@tonic-gate so->so_state |= SS_RCVATMARK; 36700Sstevel@tonic-gate mutex_exit(&so->so_lock); 36710Sstevel@tonic-gate dprintso(so, 1, 36720Sstevel@tonic-gate ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 36738348SEric.Yu@Sun.COM sti->sti_oobsigcnt, sti->sti_oobcnt, 36740Sstevel@tonic-gate pr_state(so->so_state, so->so_mode))); 36750Sstevel@tonic-gate pflag = opflag; 36760Sstevel@tonic-gate goto retry; 36770Sstevel@tonic-gate } 36780Sstevel@tonic-gate default: 36798348SEric.Yu@Sun.COM cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 36808348SEric.Yu@Sun.COM (void *)so, tpr->type, (void *)mp); 36810Sstevel@tonic-gate ASSERT(0); 36820Sstevel@tonic-gate freemsg(mp); 36830Sstevel@tonic-gate error = EPROTO; 36840Sstevel@tonic-gate eprintsoline(so, error); 36856707Sbrutus goto out; 36860Sstevel@tonic-gate } 36870Sstevel@tonic-gate /* NOTREACHED */ 36886707Sbrutus out: 36890Sstevel@tonic-gate mutex_enter(&so->so_lock); 36906707Sbrutus out_locked: 36910Sstevel@tonic-gate so_unlock_read(so); /* Clear SOREADLOCKED */ 36920Sstevel@tonic-gate mutex_exit(&so->so_lock); 36930Sstevel@tonic-gate return (error); 36940Sstevel@tonic-gate } 36950Sstevel@tonic-gate 36960Sstevel@tonic-gate /* 36970Sstevel@tonic-gate * Sending data with options on a datagram socket. 36980Sstevel@tonic-gate * Assumes caller has verified that SS_ISBOUND etc. are set. 36990Sstevel@tonic-gate */ 37000Sstevel@tonic-gate static int 3701741Smasputra sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3702741Smasputra struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 37030Sstevel@tonic-gate { 37040Sstevel@tonic-gate struct T_unitdata_req tudr; 37050Sstevel@tonic-gate mblk_t *mp; 37060Sstevel@tonic-gate int error; 37070Sstevel@tonic-gate void *addr; 37080Sstevel@tonic-gate socklen_t addrlen; 37090Sstevel@tonic-gate void *src; 37100Sstevel@tonic-gate socklen_t srclen; 37110Sstevel@tonic-gate ssize_t len; 37120Sstevel@tonic-gate int size; 37130Sstevel@tonic-gate struct T_opthdr toh; 37140Sstevel@tonic-gate struct fdbuf *fdbuf; 37150Sstevel@tonic-gate t_uscalar_t optlen; 37160Sstevel@tonic-gate void *fds; 37170Sstevel@tonic-gate int fdlen; 37188348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 37190Sstevel@tonic-gate 37200Sstevel@tonic-gate ASSERT(name && namelen); 37210Sstevel@tonic-gate ASSERT(control && controllen); 37220Sstevel@tonic-gate 37230Sstevel@tonic-gate len = uiop->uio_resid; 37248348SEric.Yu@Sun.COM if (len > (ssize_t)sti->sti_tidu_size) { 37250Sstevel@tonic-gate return (EMSGSIZE); 37260Sstevel@tonic-gate } 37270Sstevel@tonic-gate 37280Sstevel@tonic-gate /* 37290Sstevel@tonic-gate * For AF_UNIX the destination address is translated to an internal 37300Sstevel@tonic-gate * name and the source address is passed as an option. 37310Sstevel@tonic-gate * Also, file descriptors are passed as file pointers in an 37320Sstevel@tonic-gate * option. 37330Sstevel@tonic-gate */ 37340Sstevel@tonic-gate 37350Sstevel@tonic-gate /* 37360Sstevel@tonic-gate * Length and family checks. 37370Sstevel@tonic-gate */ 37380Sstevel@tonic-gate error = so_addr_verify(so, name, namelen); 37390Sstevel@tonic-gate if (error) { 37400Sstevel@tonic-gate eprintsoline(so, error); 37410Sstevel@tonic-gate return (error); 37420Sstevel@tonic-gate } 37430Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 37448348SEric.Yu@Sun.COM if (sti->sti_faddr_noxlate) { 37450Sstevel@tonic-gate /* 37460Sstevel@tonic-gate * Already have a transport internal address. Do not 37470Sstevel@tonic-gate * pass any (transport internal) source address. 37480Sstevel@tonic-gate */ 37490Sstevel@tonic-gate addr = name; 37500Sstevel@tonic-gate addrlen = namelen; 37510Sstevel@tonic-gate src = NULL; 37520Sstevel@tonic-gate srclen = 0; 37530Sstevel@tonic-gate } else { 37540Sstevel@tonic-gate /* 37550Sstevel@tonic-gate * Pass the sockaddr_un source address as an option 37560Sstevel@tonic-gate * and translate the remote address. 37570Sstevel@tonic-gate * 37588348SEric.Yu@Sun.COM * Note that this code does not prevent sti_laddr_sa 37590Sstevel@tonic-gate * from changing while it is being used. Thus 37600Sstevel@tonic-gate * if an unbind+bind occurs concurrently with this 37610Sstevel@tonic-gate * send the peer might see a partially new and a 37620Sstevel@tonic-gate * partially old "from" address. 37630Sstevel@tonic-gate */ 37648348SEric.Yu@Sun.COM src = sti->sti_laddr_sa; 37658348SEric.Yu@Sun.COM srclen = (t_uscalar_t)sti->sti_laddr_len; 37660Sstevel@tonic-gate dprintso(so, 1, 37670Sstevel@tonic-gate ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 37680Sstevel@tonic-gate srclen, src)); 37690Sstevel@tonic-gate error = so_ux_addr_xlate(so, name, namelen, 37705240Snordmark (flags & MSG_XPG4_2), 37715240Snordmark &addr, &addrlen); 37720Sstevel@tonic-gate if (error) { 37730Sstevel@tonic-gate eprintsoline(so, error); 37740Sstevel@tonic-gate return (error); 37750Sstevel@tonic-gate } 37760Sstevel@tonic-gate } 37770Sstevel@tonic-gate } else { 37780Sstevel@tonic-gate addr = name; 37790Sstevel@tonic-gate addrlen = namelen; 37800Sstevel@tonic-gate src = NULL; 37810Sstevel@tonic-gate srclen = 0; 37820Sstevel@tonic-gate } 37830Sstevel@tonic-gate optlen = so_optlen(control, controllen, 37845240Snordmark !(flags & MSG_XPG4_2)); 37850Sstevel@tonic-gate tudr.PRIM_type = T_UNITDATA_REQ; 37860Sstevel@tonic-gate tudr.DEST_length = addrlen; 37870Sstevel@tonic-gate tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 37880Sstevel@tonic-gate if (srclen != 0) 37890Sstevel@tonic-gate tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 37900Sstevel@tonic-gate _TPI_ALIGN_TOPT(srclen)); 37910Sstevel@tonic-gate else 37920Sstevel@tonic-gate tudr.OPT_length = optlen; 37930Sstevel@tonic-gate tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 37945240Snordmark _TPI_ALIGN_TOPT(addrlen)); 37950Sstevel@tonic-gate 37960Sstevel@tonic-gate size = tudr.OPT_offset + tudr.OPT_length; 37970Sstevel@tonic-gate 37980Sstevel@tonic-gate /* 37990Sstevel@tonic-gate * File descriptors only when SM_FDPASSING set. 38000Sstevel@tonic-gate */ 38010Sstevel@tonic-gate error = so_getfdopt(control, controllen, 38025240Snordmark !(flags & MSG_XPG4_2), &fds, &fdlen); 38030Sstevel@tonic-gate if (error) 38040Sstevel@tonic-gate return (error); 38050Sstevel@tonic-gate if (fdlen != -1) { 38060Sstevel@tonic-gate if (!(so->so_mode & SM_FDPASSING)) 38070Sstevel@tonic-gate return (EOPNOTSUPP); 38080Sstevel@tonic-gate 38090Sstevel@tonic-gate error = fdbuf_create(fds, fdlen, &fdbuf); 38100Sstevel@tonic-gate if (error) 38110Sstevel@tonic-gate return (error); 38120Sstevel@tonic-gate mp = fdbuf_allocmsg(size, fdbuf); 38130Sstevel@tonic-gate } else { 38148778SErik.Nordmark@Sun.COM mp = soallocproto(size, _ALLOC_INTR, CRED()); 3815455Smeem if (mp == NULL) { 3816455Smeem /* 3817455Smeem * Caught a signal waiting for memory. 3818455Smeem * Let send* return EINTR. 3819455Smeem */ 3820455Smeem return (EINTR); 3821455Smeem } 38220Sstevel@tonic-gate } 38230Sstevel@tonic-gate soappendmsg(mp, &tudr, sizeof (tudr)); 38240Sstevel@tonic-gate soappendmsg(mp, addr, addrlen); 38250Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 38260Sstevel@tonic-gate 38270Sstevel@tonic-gate if (fdlen != -1) { 38280Sstevel@tonic-gate ASSERT(fdbuf != NULL); 38290Sstevel@tonic-gate toh.level = SOL_SOCKET; 38300Sstevel@tonic-gate toh.name = SO_FILEP; 38310Sstevel@tonic-gate toh.len = fdbuf->fd_size + 38325240Snordmark (t_uscalar_t)sizeof (struct T_opthdr); 38330Sstevel@tonic-gate toh.status = 0; 38340Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 38350Sstevel@tonic-gate soappendmsg(mp, fdbuf, fdbuf->fd_size); 38360Sstevel@tonic-gate ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 38370Sstevel@tonic-gate } 38380Sstevel@tonic-gate if (srclen != 0) { 38390Sstevel@tonic-gate /* 38400Sstevel@tonic-gate * There is a AF_UNIX sockaddr_un to include as a source 38410Sstevel@tonic-gate * address option. 38420Sstevel@tonic-gate */ 38430Sstevel@tonic-gate toh.level = SOL_SOCKET; 38440Sstevel@tonic-gate toh.name = SO_SRCADDR; 38450Sstevel@tonic-gate toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 38460Sstevel@tonic-gate toh.status = 0; 38470Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 38480Sstevel@tonic-gate soappendmsg(mp, src, srclen); 38490Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 38500Sstevel@tonic-gate ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 38510Sstevel@tonic-gate } 38520Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 38530Sstevel@tonic-gate so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 38540Sstevel@tonic-gate /* At most 3 bytes left in the message */ 38550Sstevel@tonic-gate ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 38560Sstevel@tonic-gate ASSERT(MBLKL(mp) <= (ssize_t)size); 38570Sstevel@tonic-gate 38580Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 385911861SMarek.Pospisil@Sun.COM if (AU_AUDITING()) 38600Sstevel@tonic-gate audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 38610Sstevel@tonic-gate 38620Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 38630Sstevel@tonic-gate #ifdef SOCK_DEBUG 38640Sstevel@tonic-gate if (error) { 38650Sstevel@tonic-gate eprintsoline(so, error); 38660Sstevel@tonic-gate } 38670Sstevel@tonic-gate #endif /* SOCK_DEBUG */ 38680Sstevel@tonic-gate return (error); 38690Sstevel@tonic-gate } 38700Sstevel@tonic-gate 38710Sstevel@tonic-gate /* 38720Sstevel@tonic-gate * Sending data with options on a connected stream socket. 38730Sstevel@tonic-gate * Assumes caller has verified that SS_ISCONNECTED is set. 38740Sstevel@tonic-gate */ 38750Sstevel@tonic-gate static int 38768348SEric.Yu@Sun.COM sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 38778348SEric.Yu@Sun.COM t_uscalar_t controllen, int flags) 38780Sstevel@tonic-gate { 38790Sstevel@tonic-gate struct T_optdata_req tdr; 38800Sstevel@tonic-gate mblk_t *mp; 38810Sstevel@tonic-gate int error; 38820Sstevel@tonic-gate ssize_t iosize; 38830Sstevel@tonic-gate int size; 38840Sstevel@tonic-gate struct fdbuf *fdbuf; 38850Sstevel@tonic-gate t_uscalar_t optlen; 38860Sstevel@tonic-gate void *fds; 38870Sstevel@tonic-gate int fdlen; 38880Sstevel@tonic-gate struct T_opthdr toh; 38898348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 38900Sstevel@tonic-gate 38910Sstevel@tonic-gate dprintso(so, 1, 38925240Snordmark ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 38930Sstevel@tonic-gate 38940Sstevel@tonic-gate /* 38950Sstevel@tonic-gate * Has to be bound and connected. However, since no locks are 38960Sstevel@tonic-gate * held the state could have changed after sotpi_sendmsg checked it 38970Sstevel@tonic-gate * thus it is not possible to ASSERT on the state. 38980Sstevel@tonic-gate */ 38990Sstevel@tonic-gate 39000Sstevel@tonic-gate /* Options on connection-oriented only when SM_OPTDATA set. */ 39010Sstevel@tonic-gate if (!(so->so_mode & SM_OPTDATA)) 39020Sstevel@tonic-gate return (EOPNOTSUPP); 39030Sstevel@tonic-gate 39040Sstevel@tonic-gate do { 39050Sstevel@tonic-gate /* 39060Sstevel@tonic-gate * Set the MORE flag if uio_resid does not fit in this 39070Sstevel@tonic-gate * message or if the caller passed in "more". 39080Sstevel@tonic-gate * Error for transports with zero tidu_size. 39090Sstevel@tonic-gate */ 39100Sstevel@tonic-gate tdr.PRIM_type = T_OPTDATA_REQ; 39118348SEric.Yu@Sun.COM iosize = sti->sti_tidu_size; 39120Sstevel@tonic-gate if (iosize <= 0) 39130Sstevel@tonic-gate return (EMSGSIZE); 39140Sstevel@tonic-gate if (uiop->uio_resid > iosize) { 39150Sstevel@tonic-gate tdr.DATA_flag = 1; 39160Sstevel@tonic-gate } else { 39170Sstevel@tonic-gate if (more) 39180Sstevel@tonic-gate tdr.DATA_flag = 1; 39190Sstevel@tonic-gate else 39200Sstevel@tonic-gate tdr.DATA_flag = 0; 39210Sstevel@tonic-gate iosize = uiop->uio_resid; 39220Sstevel@tonic-gate } 39230Sstevel@tonic-gate dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 39245240Snordmark tdr.DATA_flag, iosize)); 39250Sstevel@tonic-gate 39260Sstevel@tonic-gate optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 39270Sstevel@tonic-gate tdr.OPT_length = optlen; 39280Sstevel@tonic-gate tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 39290Sstevel@tonic-gate 39300Sstevel@tonic-gate size = (int)sizeof (tdr) + optlen; 39310Sstevel@tonic-gate /* 39320Sstevel@tonic-gate * File descriptors only when SM_FDPASSING set. 39330Sstevel@tonic-gate */ 39340Sstevel@tonic-gate error = so_getfdopt(control, controllen, 39355240Snordmark !(flags & MSG_XPG4_2), &fds, &fdlen); 39360Sstevel@tonic-gate if (error) 39370Sstevel@tonic-gate return (error); 39380Sstevel@tonic-gate if (fdlen != -1) { 39390Sstevel@tonic-gate if (!(so->so_mode & SM_FDPASSING)) 39400Sstevel@tonic-gate return (EOPNOTSUPP); 39410Sstevel@tonic-gate 39420Sstevel@tonic-gate error = fdbuf_create(fds, fdlen, &fdbuf); 39430Sstevel@tonic-gate if (error) 39440Sstevel@tonic-gate return (error); 39450Sstevel@tonic-gate mp = fdbuf_allocmsg(size, fdbuf); 39460Sstevel@tonic-gate } else { 39478778SErik.Nordmark@Sun.COM mp = soallocproto(size, _ALLOC_INTR, CRED()); 3948455Smeem if (mp == NULL) { 3949455Smeem /* 3950455Smeem * Caught a signal waiting for memory. 3951455Smeem * Let send* return EINTR. 3952455Smeem */ 39538348SEric.Yu@Sun.COM return (EINTR); 3954455Smeem } 39550Sstevel@tonic-gate } 39560Sstevel@tonic-gate soappendmsg(mp, &tdr, sizeof (tdr)); 39570Sstevel@tonic-gate 39580Sstevel@tonic-gate if (fdlen != -1) { 39590Sstevel@tonic-gate ASSERT(fdbuf != NULL); 39600Sstevel@tonic-gate toh.level = SOL_SOCKET; 39610Sstevel@tonic-gate toh.name = SO_FILEP; 39620Sstevel@tonic-gate toh.len = fdbuf->fd_size + 39635240Snordmark (t_uscalar_t)sizeof (struct T_opthdr); 39640Sstevel@tonic-gate toh.status = 0; 39650Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 39660Sstevel@tonic-gate soappendmsg(mp, fdbuf, fdbuf->fd_size); 39670Sstevel@tonic-gate ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 39680Sstevel@tonic-gate } 39690Sstevel@tonic-gate so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 39700Sstevel@tonic-gate /* At most 3 bytes left in the message */ 39710Sstevel@tonic-gate ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 39720Sstevel@tonic-gate ASSERT(MBLKL(mp) <= (ssize_t)size); 39730Sstevel@tonic-gate 39740Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 39750Sstevel@tonic-gate 39760Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 39775240Snordmark 0, MSG_BAND, 0); 39780Sstevel@tonic-gate if (error) { 39790Sstevel@tonic-gate eprintsoline(so, error); 39800Sstevel@tonic-gate return (error); 39810Sstevel@tonic-gate } 39820Sstevel@tonic-gate control = NULL; 39830Sstevel@tonic-gate if (uiop->uio_resid > 0) { 39840Sstevel@tonic-gate /* 39850Sstevel@tonic-gate * Recheck for fatal errors. Fail write even though 39860Sstevel@tonic-gate * some data have been written. This is consistent 39870Sstevel@tonic-gate * with strwrite semantics and BSD sockets semantics. 39880Sstevel@tonic-gate */ 39890Sstevel@tonic-gate if (so->so_state & SS_CANTSENDMORE) { 39900Sstevel@tonic-gate eprintsoline(so, error); 39910Sstevel@tonic-gate return (EPIPE); 39920Sstevel@tonic-gate } 39930Sstevel@tonic-gate if (so->so_error != 0) { 39940Sstevel@tonic-gate mutex_enter(&so->so_lock); 39958348SEric.Yu@Sun.COM error = sogeterr(so, B_TRUE); 39960Sstevel@tonic-gate mutex_exit(&so->so_lock); 39970Sstevel@tonic-gate if (error != 0) { 39980Sstevel@tonic-gate eprintsoline(so, error); 39990Sstevel@tonic-gate return (error); 40000Sstevel@tonic-gate } 40010Sstevel@tonic-gate } 40020Sstevel@tonic-gate } 40030Sstevel@tonic-gate } while (uiop->uio_resid > 0); 40040Sstevel@tonic-gate return (0); 40050Sstevel@tonic-gate } 40060Sstevel@tonic-gate 40070Sstevel@tonic-gate /* 40080Sstevel@tonic-gate * Sending data on a datagram socket. 40090Sstevel@tonic-gate * Assumes caller has verified that SS_ISBOUND etc. are set. 40100Sstevel@tonic-gate * 40110Sstevel@tonic-gate * For AF_UNIX the destination address is translated to an internal 40120Sstevel@tonic-gate * name and the source address is passed as an option. 40130Sstevel@tonic-gate */ 40140Sstevel@tonic-gate int 4015741Smasputra sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4016741Smasputra struct uio *uiop, int flags) 40170Sstevel@tonic-gate { 40180Sstevel@tonic-gate struct T_unitdata_req tudr; 40190Sstevel@tonic-gate mblk_t *mp; 40200Sstevel@tonic-gate int error; 40210Sstevel@tonic-gate void *addr; 40220Sstevel@tonic-gate socklen_t addrlen; 40230Sstevel@tonic-gate void *src; 40240Sstevel@tonic-gate socklen_t srclen; 40250Sstevel@tonic-gate ssize_t len; 40268348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 40270Sstevel@tonic-gate 4028741Smasputra ASSERT(name != NULL && namelen != 0); 40290Sstevel@tonic-gate 40300Sstevel@tonic-gate len = uiop->uio_resid; 40318348SEric.Yu@Sun.COM if (len > sti->sti_tidu_size) { 40320Sstevel@tonic-gate error = EMSGSIZE; 40330Sstevel@tonic-gate goto done; 40340Sstevel@tonic-gate } 40350Sstevel@tonic-gate 4036741Smasputra /* Length and family checks */ 40370Sstevel@tonic-gate error = so_addr_verify(so, name, namelen); 4038741Smasputra if (error != 0) 40390Sstevel@tonic-gate goto done; 4040741Smasputra 40418348SEric.Yu@Sun.COM if (sti->sti_direct) 4042741Smasputra return (sodgram_direct(so, name, namelen, uiop, flags)); 4043741Smasputra 40440Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 40458348SEric.Yu@Sun.COM if (sti->sti_faddr_noxlate) { 40460Sstevel@tonic-gate /* 40470Sstevel@tonic-gate * Already have a transport internal address. Do not 40480Sstevel@tonic-gate * pass any (transport internal) source address. 40490Sstevel@tonic-gate */ 40500Sstevel@tonic-gate addr = name; 40510Sstevel@tonic-gate addrlen = namelen; 40520Sstevel@tonic-gate src = NULL; 40530Sstevel@tonic-gate srclen = 0; 40540Sstevel@tonic-gate } else { 40550Sstevel@tonic-gate /* 40560Sstevel@tonic-gate * Pass the sockaddr_un source address as an option 40570Sstevel@tonic-gate * and translate the remote address. 40580Sstevel@tonic-gate * 40598348SEric.Yu@Sun.COM * Note that this code does not prevent sti_laddr_sa 40600Sstevel@tonic-gate * from changing while it is being used. Thus 40610Sstevel@tonic-gate * if an unbind+bind occurs concurrently with this 40620Sstevel@tonic-gate * send the peer might see a partially new and a 40630Sstevel@tonic-gate * partially old "from" address. 40640Sstevel@tonic-gate */ 40658348SEric.Yu@Sun.COM src = sti->sti_laddr_sa; 40668348SEric.Yu@Sun.COM srclen = (socklen_t)sti->sti_laddr_len; 40670Sstevel@tonic-gate dprintso(so, 1, 40685240Snordmark ("sosend_dgram UNIX: srclen %d, src %p\n", 40695240Snordmark srclen, src)); 40700Sstevel@tonic-gate error = so_ux_addr_xlate(so, name, namelen, 40715240Snordmark (flags & MSG_XPG4_2), 40725240Snordmark &addr, &addrlen); 40730Sstevel@tonic-gate if (error) { 40740Sstevel@tonic-gate eprintsoline(so, error); 40750Sstevel@tonic-gate goto done; 40760Sstevel@tonic-gate } 40770Sstevel@tonic-gate } 40780Sstevel@tonic-gate } else { 40790Sstevel@tonic-gate addr = name; 40800Sstevel@tonic-gate addrlen = namelen; 40810Sstevel@tonic-gate src = NULL; 40820Sstevel@tonic-gate srclen = 0; 40830Sstevel@tonic-gate } 40840Sstevel@tonic-gate tudr.PRIM_type = T_UNITDATA_REQ; 40850Sstevel@tonic-gate tudr.DEST_length = addrlen; 40860Sstevel@tonic-gate tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 40870Sstevel@tonic-gate if (srclen == 0) { 40880Sstevel@tonic-gate tudr.OPT_length = 0; 40890Sstevel@tonic-gate tudr.OPT_offset = 0; 40900Sstevel@tonic-gate 40910Sstevel@tonic-gate mp = soallocproto2(&tudr, sizeof (tudr), 40928778SErik.Nordmark@Sun.COM addr, addrlen, 0, _ALLOC_INTR, CRED()); 40930Sstevel@tonic-gate if (mp == NULL) { 40940Sstevel@tonic-gate /* 40950Sstevel@tonic-gate * Caught a signal waiting for memory. 40960Sstevel@tonic-gate * Let send* return EINTR. 40970Sstevel@tonic-gate */ 40980Sstevel@tonic-gate error = EINTR; 40990Sstevel@tonic-gate goto done; 41000Sstevel@tonic-gate } 41010Sstevel@tonic-gate } else { 41020Sstevel@tonic-gate /* 41030Sstevel@tonic-gate * There is a AF_UNIX sockaddr_un to include as a source 41040Sstevel@tonic-gate * address option. 41050Sstevel@tonic-gate */ 41060Sstevel@tonic-gate struct T_opthdr toh; 41070Sstevel@tonic-gate ssize_t size; 41080Sstevel@tonic-gate 41090Sstevel@tonic-gate tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 41105240Snordmark _TPI_ALIGN_TOPT(srclen)); 41110Sstevel@tonic-gate tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 41125240Snordmark _TPI_ALIGN_TOPT(addrlen)); 41130Sstevel@tonic-gate 41140Sstevel@tonic-gate toh.level = SOL_SOCKET; 41150Sstevel@tonic-gate toh.name = SO_SRCADDR; 41160Sstevel@tonic-gate toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 41170Sstevel@tonic-gate toh.status = 0; 41180Sstevel@tonic-gate 41190Sstevel@tonic-gate size = tudr.OPT_offset + tudr.OPT_length; 41200Sstevel@tonic-gate mp = soallocproto2(&tudr, sizeof (tudr), 41218778SErik.Nordmark@Sun.COM addr, addrlen, size, _ALLOC_INTR, CRED()); 41220Sstevel@tonic-gate if (mp == NULL) { 41230Sstevel@tonic-gate /* 41240Sstevel@tonic-gate * Caught a signal waiting for memory. 41250Sstevel@tonic-gate * Let send* return EINTR. 41260Sstevel@tonic-gate */ 41270Sstevel@tonic-gate error = EINTR; 41280Sstevel@tonic-gate goto done; 41290Sstevel@tonic-gate } 41300Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 41310Sstevel@tonic-gate soappendmsg(mp, &toh, sizeof (toh)); 41320Sstevel@tonic-gate soappendmsg(mp, src, srclen); 41330Sstevel@tonic-gate mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 41340Sstevel@tonic-gate ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 41350Sstevel@tonic-gate } 41360Sstevel@tonic-gate 413711861SMarek.Pospisil@Sun.COM if (AU_AUDITING()) 41380Sstevel@tonic-gate audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 41390Sstevel@tonic-gate 41400Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 41410Sstevel@tonic-gate done: 41420Sstevel@tonic-gate #ifdef SOCK_DEBUG 41430Sstevel@tonic-gate if (error) { 41440Sstevel@tonic-gate eprintsoline(so, error); 41450Sstevel@tonic-gate } 41460Sstevel@tonic-gate #endif /* SOCK_DEBUG */ 41470Sstevel@tonic-gate return (error); 41480Sstevel@tonic-gate } 41490Sstevel@tonic-gate 41500Sstevel@tonic-gate /* 41510Sstevel@tonic-gate * Sending data on a connected stream socket. 41520Sstevel@tonic-gate * Assumes caller has verified that SS_ISCONNECTED is set. 41530Sstevel@tonic-gate */ 41540Sstevel@tonic-gate int 41558348SEric.Yu@Sun.COM sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 41568348SEric.Yu@Sun.COM int sflag) 41570Sstevel@tonic-gate { 41580Sstevel@tonic-gate struct T_data_req tdr; 41590Sstevel@tonic-gate mblk_t *mp; 41600Sstevel@tonic-gate int error; 41610Sstevel@tonic-gate ssize_t iosize; 41628348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 41630Sstevel@tonic-gate 41640Sstevel@tonic-gate dprintso(so, 1, 41655240Snordmark ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 41667240Srh87107 (void *)so, uiop->uio_resid, prim, sflag)); 41670Sstevel@tonic-gate 41680Sstevel@tonic-gate /* 41690Sstevel@tonic-gate * Has to be bound and connected. However, since no locks are 41700Sstevel@tonic-gate * held the state could have changed after sotpi_sendmsg checked it 41710Sstevel@tonic-gate * thus it is not possible to ASSERT on the state. 41720Sstevel@tonic-gate */ 41730Sstevel@tonic-gate 41740Sstevel@tonic-gate do { 41750Sstevel@tonic-gate /* 41760Sstevel@tonic-gate * Set the MORE flag if uio_resid does not fit in this 41770Sstevel@tonic-gate * message or if the caller passed in "more". 41780Sstevel@tonic-gate * Error for transports with zero tidu_size. 41790Sstevel@tonic-gate */ 41800Sstevel@tonic-gate tdr.PRIM_type = prim; 41818348SEric.Yu@Sun.COM iosize = sti->sti_tidu_size; 41820Sstevel@tonic-gate if (iosize <= 0) 41830Sstevel@tonic-gate return (EMSGSIZE); 41840Sstevel@tonic-gate if (uiop->uio_resid > iosize) { 41850Sstevel@tonic-gate tdr.MORE_flag = 1; 41860Sstevel@tonic-gate } else { 41870Sstevel@tonic-gate if (more) 41880Sstevel@tonic-gate tdr.MORE_flag = 1; 41890Sstevel@tonic-gate else 41900Sstevel@tonic-gate tdr.MORE_flag = 0; 41910Sstevel@tonic-gate iosize = uiop->uio_resid; 41920Sstevel@tonic-gate } 41930Sstevel@tonic-gate dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 41945240Snordmark prim, tdr.MORE_flag, iosize)); 41958778SErik.Nordmark@Sun.COM mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 41960Sstevel@tonic-gate if (mp == NULL) { 41970Sstevel@tonic-gate /* 41980Sstevel@tonic-gate * Caught a signal waiting for memory. 41990Sstevel@tonic-gate * Let send* return EINTR. 42000Sstevel@tonic-gate */ 42018348SEric.Yu@Sun.COM return (EINTR); 42020Sstevel@tonic-gate } 42030Sstevel@tonic-gate 42040Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 42055240Snordmark 0, sflag | MSG_BAND, 0); 42060Sstevel@tonic-gate if (error) { 42070Sstevel@tonic-gate eprintsoline(so, error); 42080Sstevel@tonic-gate return (error); 42090Sstevel@tonic-gate } 42100Sstevel@tonic-gate if (uiop->uio_resid > 0) { 42110Sstevel@tonic-gate /* 42120Sstevel@tonic-gate * Recheck for fatal errors. Fail write even though 42130Sstevel@tonic-gate * some data have been written. This is consistent 42140Sstevel@tonic-gate * with strwrite semantics and BSD sockets semantics. 42150Sstevel@tonic-gate */ 42160Sstevel@tonic-gate if (so->so_state & SS_CANTSENDMORE) { 42170Sstevel@tonic-gate eprintsoline(so, error); 42180Sstevel@tonic-gate return (EPIPE); 42190Sstevel@tonic-gate } 42200Sstevel@tonic-gate if (so->so_error != 0) { 42210Sstevel@tonic-gate mutex_enter(&so->so_lock); 42228348SEric.Yu@Sun.COM error = sogeterr(so, B_TRUE); 42230Sstevel@tonic-gate mutex_exit(&so->so_lock); 42240Sstevel@tonic-gate if (error != 0) { 42250Sstevel@tonic-gate eprintsoline(so, error); 42260Sstevel@tonic-gate return (error); 42270Sstevel@tonic-gate } 42280Sstevel@tonic-gate } 42290Sstevel@tonic-gate } 42300Sstevel@tonic-gate } while (uiop->uio_resid > 0); 42310Sstevel@tonic-gate return (0); 42320Sstevel@tonic-gate } 42330Sstevel@tonic-gate 42340Sstevel@tonic-gate /* 42350Sstevel@tonic-gate * Check the state for errors and call the appropriate send function. 42360Sstevel@tonic-gate * 42370Sstevel@tonic-gate * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 42380Sstevel@tonic-gate * this function issues a setsockopt to toggle SO_DONTROUTE before and 42390Sstevel@tonic-gate * after sending the message. 42400Sstevel@tonic-gate */ 42410Sstevel@tonic-gate static int 42428348SEric.Yu@Sun.COM sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 42438348SEric.Yu@Sun.COM struct cred *cr) 42440Sstevel@tonic-gate { 42450Sstevel@tonic-gate int so_state; 42460Sstevel@tonic-gate int so_mode; 42470Sstevel@tonic-gate int error; 42480Sstevel@tonic-gate struct sockaddr *name; 42490Sstevel@tonic-gate t_uscalar_t namelen; 42500Sstevel@tonic-gate int dontroute; 42510Sstevel@tonic-gate int flags; 42528348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 42530Sstevel@tonic-gate 42540Sstevel@tonic-gate dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 42557240Srh87107 (void *)so, (void *)msg, msg->msg_flags, 42565240Snordmark pr_state(so->so_state, so->so_mode), so->so_error)); 42570Sstevel@tonic-gate 42588348SEric.Yu@Sun.COM if (so->so_version == SOV_STREAM) { 42598348SEric.Yu@Sun.COM /* The imaginary "sockmod" has been popped - act as a stream */ 42608348SEric.Yu@Sun.COM so_update_attrs(so, SOMOD); 42618348SEric.Yu@Sun.COM return (strwrite(SOTOV(so), uiop, cr)); 42628348SEric.Yu@Sun.COM } 42638348SEric.Yu@Sun.COM 42640Sstevel@tonic-gate mutex_enter(&so->so_lock); 42650Sstevel@tonic-gate so_state = so->so_state; 42660Sstevel@tonic-gate 42670Sstevel@tonic-gate if (so_state & SS_CANTSENDMORE) { 42680Sstevel@tonic-gate mutex_exit(&so->so_lock); 42690Sstevel@tonic-gate return (EPIPE); 42700Sstevel@tonic-gate } 42710Sstevel@tonic-gate 42720Sstevel@tonic-gate if (so->so_error != 0) { 42738348SEric.Yu@Sun.COM error = sogeterr(so, B_TRUE); 42740Sstevel@tonic-gate if (error != 0) { 42750Sstevel@tonic-gate mutex_exit(&so->so_lock); 42760Sstevel@tonic-gate return (error); 42770Sstevel@tonic-gate } 42780Sstevel@tonic-gate } 42790Sstevel@tonic-gate 42800Sstevel@tonic-gate name = (struct sockaddr *)msg->msg_name; 42810Sstevel@tonic-gate namelen = msg->msg_namelen; 42820Sstevel@tonic-gate 42830Sstevel@tonic-gate so_mode = so->so_mode; 42840Sstevel@tonic-gate 42850Sstevel@tonic-gate if (name == NULL) { 42860Sstevel@tonic-gate if (!(so_state & SS_ISCONNECTED)) { 42870Sstevel@tonic-gate mutex_exit(&so->so_lock); 42880Sstevel@tonic-gate if (so_mode & SM_CONNREQUIRED) 42890Sstevel@tonic-gate return (ENOTCONN); 42900Sstevel@tonic-gate else 42910Sstevel@tonic-gate return (EDESTADDRREQ); 42920Sstevel@tonic-gate } 42930Sstevel@tonic-gate if (so_mode & SM_CONNREQUIRED) { 42940Sstevel@tonic-gate name = NULL; 42950Sstevel@tonic-gate namelen = 0; 42960Sstevel@tonic-gate } else { 42970Sstevel@tonic-gate /* 42988348SEric.Yu@Sun.COM * Note that this code does not prevent sti_faddr_sa 42990Sstevel@tonic-gate * from changing while it is being used. Thus 43000Sstevel@tonic-gate * if an "unconnect"+connect occurs concurrently with 43010Sstevel@tonic-gate * this send the datagram might be delivered to a 43020Sstevel@tonic-gate * garbaled address. 43030Sstevel@tonic-gate */ 43048348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_sa); 43058348SEric.Yu@Sun.COM name = sti->sti_faddr_sa; 43068348SEric.Yu@Sun.COM namelen = (t_uscalar_t)sti->sti_faddr_len; 43070Sstevel@tonic-gate } 43080Sstevel@tonic-gate } else { 43090Sstevel@tonic-gate if (!(so_state & SS_ISCONNECTED) && 43100Sstevel@tonic-gate (so_mode & SM_CONNREQUIRED)) { 43110Sstevel@tonic-gate /* Required but not connected */ 43120Sstevel@tonic-gate mutex_exit(&so->so_lock); 43130Sstevel@tonic-gate return (ENOTCONN); 43140Sstevel@tonic-gate } 43150Sstevel@tonic-gate /* 43160Sstevel@tonic-gate * Ignore the address on connection-oriented sockets. 43170Sstevel@tonic-gate * Just like BSD this code does not generate an error for 43180Sstevel@tonic-gate * TCP (a CONNREQUIRED socket) when sending to an address 43190Sstevel@tonic-gate * passed in with sendto/sendmsg. Instead the data is 43200Sstevel@tonic-gate * delivered on the connection as if no address had been 43210Sstevel@tonic-gate * supplied. 43220Sstevel@tonic-gate */ 43230Sstevel@tonic-gate if ((so_state & SS_ISCONNECTED) && 43240Sstevel@tonic-gate !(so_mode & SM_CONNREQUIRED)) { 43250Sstevel@tonic-gate mutex_exit(&so->so_lock); 43260Sstevel@tonic-gate return (EISCONN); 43270Sstevel@tonic-gate } 43280Sstevel@tonic-gate if (!(so_state & SS_ISBOUND)) { 43290Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 43300Sstevel@tonic-gate error = sotpi_bind(so, NULL, 0, 43318348SEric.Yu@Sun.COM _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 43320Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 43330Sstevel@tonic-gate if (error) { 43340Sstevel@tonic-gate mutex_exit(&so->so_lock); 43350Sstevel@tonic-gate eprintsoline(so, error); 43360Sstevel@tonic-gate return (error); 43370Sstevel@tonic-gate } 43380Sstevel@tonic-gate } 43390Sstevel@tonic-gate /* 43400Sstevel@tonic-gate * Handle delayed datagram errors. These are only queued 43410Sstevel@tonic-gate * when the application sets SO_DGRAM_ERRIND. 43420Sstevel@tonic-gate * Return the error if we are sending to the address 43430Sstevel@tonic-gate * that was returned in the last T_UDERROR_IND. 43440Sstevel@tonic-gate * If sending to some other address discard the delayed 43450Sstevel@tonic-gate * error indication. 43460Sstevel@tonic-gate */ 43478348SEric.Yu@Sun.COM if (sti->sti_delayed_error) { 43480Sstevel@tonic-gate struct T_uderror_ind *tudi; 43490Sstevel@tonic-gate void *addr; 43500Sstevel@tonic-gate t_uscalar_t addrlen; 43510Sstevel@tonic-gate boolean_t match = B_FALSE; 43520Sstevel@tonic-gate 43538348SEric.Yu@Sun.COM ASSERT(sti->sti_eaddr_mp); 43548348SEric.Yu@Sun.COM error = sti->sti_delayed_error; 43558348SEric.Yu@Sun.COM sti->sti_delayed_error = 0; 43568348SEric.Yu@Sun.COM tudi = 43578348SEric.Yu@Sun.COM (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 43580Sstevel@tonic-gate addrlen = tudi->DEST_length; 43598348SEric.Yu@Sun.COM addr = sogetoff(sti->sti_eaddr_mp, 43608348SEric.Yu@Sun.COM tudi->DEST_offset, addrlen, 1); 43610Sstevel@tonic-gate ASSERT(addr); /* Checked by strsock_proto */ 43620Sstevel@tonic-gate switch (so->so_family) { 43630Sstevel@tonic-gate case AF_INET: { 43640Sstevel@tonic-gate /* Compare just IP address and port */ 43650Sstevel@tonic-gate sin_t *sin1 = (sin_t *)name; 43660Sstevel@tonic-gate sin_t *sin2 = (sin_t *)addr; 43670Sstevel@tonic-gate 43680Sstevel@tonic-gate if (addrlen == sizeof (sin_t) && 43690Sstevel@tonic-gate namelen == addrlen && 43700Sstevel@tonic-gate sin1->sin_port == sin2->sin_port && 43710Sstevel@tonic-gate sin1->sin_addr.s_addr == 43720Sstevel@tonic-gate sin2->sin_addr.s_addr) 43730Sstevel@tonic-gate match = B_TRUE; 43740Sstevel@tonic-gate break; 43750Sstevel@tonic-gate } 43760Sstevel@tonic-gate case AF_INET6: { 43770Sstevel@tonic-gate /* Compare just IP address and port. Not flow */ 43780Sstevel@tonic-gate sin6_t *sin1 = (sin6_t *)name; 43790Sstevel@tonic-gate sin6_t *sin2 = (sin6_t *)addr; 43800Sstevel@tonic-gate 43810Sstevel@tonic-gate if (addrlen == sizeof (sin6_t) && 43820Sstevel@tonic-gate namelen == addrlen && 43830Sstevel@tonic-gate sin1->sin6_port == sin2->sin6_port && 43840Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 43855240Snordmark &sin2->sin6_addr)) 43860Sstevel@tonic-gate match = B_TRUE; 43870Sstevel@tonic-gate break; 43880Sstevel@tonic-gate } 43890Sstevel@tonic-gate case AF_UNIX: 43900Sstevel@tonic-gate default: 43910Sstevel@tonic-gate if (namelen == addrlen && 43920Sstevel@tonic-gate bcmp(name, addr, namelen) == 0) 43930Sstevel@tonic-gate match = B_TRUE; 43940Sstevel@tonic-gate } 43950Sstevel@tonic-gate if (match) { 43968348SEric.Yu@Sun.COM freemsg(sti->sti_eaddr_mp); 43978348SEric.Yu@Sun.COM sti->sti_eaddr_mp = NULL; 43980Sstevel@tonic-gate mutex_exit(&so->so_lock); 43990Sstevel@tonic-gate #ifdef DEBUG 44000Sstevel@tonic-gate dprintso(so, 0, 44015240Snordmark ("sockfs delayed error %d for %s\n", 44025240Snordmark error, 44035240Snordmark pr_addr(so->so_family, name, namelen))); 44040Sstevel@tonic-gate #endif /* DEBUG */ 44050Sstevel@tonic-gate return (error); 44060Sstevel@tonic-gate } 44078348SEric.Yu@Sun.COM freemsg(sti->sti_eaddr_mp); 44088348SEric.Yu@Sun.COM sti->sti_eaddr_mp = NULL; 44090Sstevel@tonic-gate } 44100Sstevel@tonic-gate } 44110Sstevel@tonic-gate mutex_exit(&so->so_lock); 44120Sstevel@tonic-gate 44130Sstevel@tonic-gate flags = msg->msg_flags; 44140Sstevel@tonic-gate dontroute = 0; 44150Sstevel@tonic-gate if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 44160Sstevel@tonic-gate uint32_t val; 44170Sstevel@tonic-gate 44180Sstevel@tonic-gate val = 1; 44190Sstevel@tonic-gate error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 44208348SEric.Yu@Sun.COM &val, (t_uscalar_t)sizeof (val), cr); 44210Sstevel@tonic-gate if (error) 44220Sstevel@tonic-gate return (error); 44230Sstevel@tonic-gate dontroute = 1; 44240Sstevel@tonic-gate } 44250Sstevel@tonic-gate 44260Sstevel@tonic-gate if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 44270Sstevel@tonic-gate error = EOPNOTSUPP; 44280Sstevel@tonic-gate goto done; 44290Sstevel@tonic-gate } 44300Sstevel@tonic-gate if (msg->msg_controllen != 0) { 44310Sstevel@tonic-gate if (!(so_mode & SM_CONNREQUIRED)) { 44328348SEric.Yu@Sun.COM so_update_attrs(so, SOMOD); 44330Sstevel@tonic-gate error = sosend_dgramcmsg(so, name, namelen, uiop, 4434741Smasputra msg->msg_control, msg->msg_controllen, flags); 44350Sstevel@tonic-gate } else { 44360Sstevel@tonic-gate if (flags & MSG_OOB) { 44370Sstevel@tonic-gate /* Can't generate T_EXDATA_REQ with options */ 44380Sstevel@tonic-gate error = EOPNOTSUPP; 44390Sstevel@tonic-gate goto done; 44400Sstevel@tonic-gate } 44418348SEric.Yu@Sun.COM so_update_attrs(so, SOMOD); 44420Sstevel@tonic-gate error = sosend_svccmsg(so, uiop, 44435240Snordmark !(flags & MSG_EOR), 44445240Snordmark msg->msg_control, msg->msg_controllen, 44455240Snordmark flags); 44460Sstevel@tonic-gate } 44470Sstevel@tonic-gate goto done; 44480Sstevel@tonic-gate } 44490Sstevel@tonic-gate 44508348SEric.Yu@Sun.COM so_update_attrs(so, SOMOD); 44510Sstevel@tonic-gate if (!(so_mode & SM_CONNREQUIRED)) { 44520Sstevel@tonic-gate /* 44530Sstevel@tonic-gate * If there is no SO_DONTROUTE to turn off return immediately 4454741Smasputra * from send_dgram. This can allow tail-call optimizations. 44550Sstevel@tonic-gate */ 44560Sstevel@tonic-gate if (!dontroute) { 44570Sstevel@tonic-gate return (sosend_dgram(so, name, namelen, uiop, flags)); 44580Sstevel@tonic-gate } 44590Sstevel@tonic-gate error = sosend_dgram(so, name, namelen, uiop, flags); 44600Sstevel@tonic-gate } else { 44610Sstevel@tonic-gate t_scalar_t prim; 44620Sstevel@tonic-gate int sflag; 44630Sstevel@tonic-gate 44640Sstevel@tonic-gate /* Ignore msg_name in the connected state */ 44650Sstevel@tonic-gate if (flags & MSG_OOB) { 44660Sstevel@tonic-gate prim = T_EXDATA_REQ; 44670Sstevel@tonic-gate /* 44680Sstevel@tonic-gate * Send down T_EXDATA_REQ even if there is flow 44690Sstevel@tonic-gate * control for data. 44700Sstevel@tonic-gate */ 44710Sstevel@tonic-gate sflag = MSG_IGNFLOW; 44720Sstevel@tonic-gate } else { 44730Sstevel@tonic-gate if (so_mode & SM_BYTESTREAM) { 44740Sstevel@tonic-gate /* Byte stream transport - use write */ 44750Sstevel@tonic-gate dprintso(so, 1, ("sotpi_sendmsg: write\n")); 44768348SEric.Yu@Sun.COM 44778348SEric.Yu@Sun.COM /* Send M_DATA messages */ 44788348SEric.Yu@Sun.COM if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 44798348SEric.Yu@Sun.COM (error = nl7c_data(so, uiop)) >= 0) { 44808348SEric.Yu@Sun.COM /* NL7C consumed the data */ 44818348SEric.Yu@Sun.COM return (error); 44828348SEric.Yu@Sun.COM } 44830Sstevel@tonic-gate /* 4484741Smasputra * If there is no SO_DONTROUTE to turn off, 44858348SEric.Yu@Sun.COM * sti_direct is on, and there is no flow 4486741Smasputra * control, we can take the fast path. 44870Sstevel@tonic-gate */ 44888348SEric.Yu@Sun.COM if (!dontroute && sti->sti_direct != 0 && 4489741Smasputra canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4490741Smasputra return (sostream_direct(so, uiop, 44918348SEric.Yu@Sun.COM NULL, cr)); 4492741Smasputra } 44938348SEric.Yu@Sun.COM error = strwrite(SOTOV(so), uiop, cr); 44940Sstevel@tonic-gate goto done; 44950Sstevel@tonic-gate } 44960Sstevel@tonic-gate prim = T_DATA_REQ; 44970Sstevel@tonic-gate sflag = 0; 44980Sstevel@tonic-gate } 44990Sstevel@tonic-gate /* 45000Sstevel@tonic-gate * If there is no SO_DONTROUTE to turn off return immediately 45010Sstevel@tonic-gate * from sosend_svc. This can allow tail-call optimizations. 45020Sstevel@tonic-gate */ 45030Sstevel@tonic-gate if (!dontroute) 45040Sstevel@tonic-gate return (sosend_svc(so, uiop, prim, 45055240Snordmark !(flags & MSG_EOR), sflag)); 45060Sstevel@tonic-gate error = sosend_svc(so, uiop, prim, 45075240Snordmark !(flags & MSG_EOR), sflag); 45080Sstevel@tonic-gate } 45090Sstevel@tonic-gate ASSERT(dontroute); 45100Sstevel@tonic-gate done: 45110Sstevel@tonic-gate if (dontroute) { 45120Sstevel@tonic-gate uint32_t val; 45130Sstevel@tonic-gate 45140Sstevel@tonic-gate val = 0; 45150Sstevel@tonic-gate (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 45168348SEric.Yu@Sun.COM &val, (t_uscalar_t)sizeof (val), cr); 45178348SEric.Yu@Sun.COM } 45188348SEric.Yu@Sun.COM return (error); 45198348SEric.Yu@Sun.COM } 45208348SEric.Yu@Sun.COM 45218348SEric.Yu@Sun.COM /* 45228348SEric.Yu@Sun.COM * kstrwritemp() has very similar semantics as that of strwrite(). 45238348SEric.Yu@Sun.COM * The main difference is it obtains mblks from the caller and also 45248348SEric.Yu@Sun.COM * does not do any copy as done in strwrite() from user buffers to 45258348SEric.Yu@Sun.COM * kernel buffers. 45268348SEric.Yu@Sun.COM * 45278348SEric.Yu@Sun.COM * Currently, this routine is used by sendfile to send data allocated 45288348SEric.Yu@Sun.COM * within the kernel without any copying. This interface does not use the 45298348SEric.Yu@Sun.COM * synchronous stream interface as synch. stream interface implies 45308348SEric.Yu@Sun.COM * copying. 45318348SEric.Yu@Sun.COM */ 45328348SEric.Yu@Sun.COM int 45338348SEric.Yu@Sun.COM kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 45348348SEric.Yu@Sun.COM { 45358348SEric.Yu@Sun.COM struct stdata *stp; 45368348SEric.Yu@Sun.COM struct queue *wqp; 45378348SEric.Yu@Sun.COM mblk_t *newmp; 45388348SEric.Yu@Sun.COM char waitflag; 45398348SEric.Yu@Sun.COM int tempmode; 45408348SEric.Yu@Sun.COM int error = 0; 45418348SEric.Yu@Sun.COM int done = 0; 45428348SEric.Yu@Sun.COM struct sonode *so; 45438348SEric.Yu@Sun.COM boolean_t direct; 45448348SEric.Yu@Sun.COM 45458348SEric.Yu@Sun.COM ASSERT(vp->v_stream); 45468348SEric.Yu@Sun.COM stp = vp->v_stream; 45478348SEric.Yu@Sun.COM 45488348SEric.Yu@Sun.COM so = VTOSO(vp); 45498348SEric.Yu@Sun.COM direct = _SOTOTPI(so)->sti_direct; 45508348SEric.Yu@Sun.COM 45518348SEric.Yu@Sun.COM /* 45528348SEric.Yu@Sun.COM * This is the sockfs direct fast path. canputnext() need 45538348SEric.Yu@Sun.COM * not be accurate so we don't grab the sd_lock here. If 45548348SEric.Yu@Sun.COM * we get flow-controlled, we grab sd_lock just before the 45558348SEric.Yu@Sun.COM * do..while loop below to emulate what strwrite() does. 45568348SEric.Yu@Sun.COM */ 45578348SEric.Yu@Sun.COM wqp = stp->sd_wrq; 45588348SEric.Yu@Sun.COM if (canputnext(wqp) && direct && 45598348SEric.Yu@Sun.COM !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 45608348SEric.Yu@Sun.COM return (sostream_direct(so, NULL, mp, CRED())); 45618348SEric.Yu@Sun.COM } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 45628348SEric.Yu@Sun.COM /* Fast check of flags before acquiring the lock */ 45638348SEric.Yu@Sun.COM mutex_enter(&stp->sd_lock); 45648348SEric.Yu@Sun.COM error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 45658348SEric.Yu@Sun.COM mutex_exit(&stp->sd_lock); 45668348SEric.Yu@Sun.COM if (error != 0) { 45678348SEric.Yu@Sun.COM if (!(stp->sd_flag & STPLEX) && 45688348SEric.Yu@Sun.COM (stp->sd_wput_opt & SW_SIGPIPE)) { 45698348SEric.Yu@Sun.COM error = EPIPE; 45708348SEric.Yu@Sun.COM } 45718348SEric.Yu@Sun.COM return (error); 45728348SEric.Yu@Sun.COM } 45738348SEric.Yu@Sun.COM } 45748348SEric.Yu@Sun.COM 45758348SEric.Yu@Sun.COM waitflag = WRITEWAIT; 45768348SEric.Yu@Sun.COM if (stp->sd_flag & OLDNDELAY) 45778348SEric.Yu@Sun.COM tempmode = fmode & ~FNDELAY; 45788348SEric.Yu@Sun.COM else 45798348SEric.Yu@Sun.COM tempmode = fmode; 45808348SEric.Yu@Sun.COM 45818348SEric.Yu@Sun.COM mutex_enter(&stp->sd_lock); 45828348SEric.Yu@Sun.COM do { 45838348SEric.Yu@Sun.COM if (canputnext(wqp)) { 45848348SEric.Yu@Sun.COM mutex_exit(&stp->sd_lock); 45858348SEric.Yu@Sun.COM if (stp->sd_wputdatafunc != NULL) { 45868348SEric.Yu@Sun.COM newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 45878348SEric.Yu@Sun.COM NULL, NULL, NULL); 45888348SEric.Yu@Sun.COM if (newmp == NULL) { 45898348SEric.Yu@Sun.COM /* The caller will free mp */ 45908348SEric.Yu@Sun.COM return (ECOMM); 45918348SEric.Yu@Sun.COM } 45928348SEric.Yu@Sun.COM mp = newmp; 45938348SEric.Yu@Sun.COM } 45948348SEric.Yu@Sun.COM putnext(wqp, mp); 45958348SEric.Yu@Sun.COM return (0); 45968348SEric.Yu@Sun.COM } 45978348SEric.Yu@Sun.COM error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 45988348SEric.Yu@Sun.COM &done); 45998348SEric.Yu@Sun.COM } while (error == 0 && !done); 46008348SEric.Yu@Sun.COM 46018348SEric.Yu@Sun.COM mutex_exit(&stp->sd_lock); 46028348SEric.Yu@Sun.COM /* 46038348SEric.Yu@Sun.COM * EAGAIN tells the application to try again. ENOMEM 46048348SEric.Yu@Sun.COM * is returned only if the memory allocation size 46058348SEric.Yu@Sun.COM * exceeds the physical limits of the system. ENOMEM 46068348SEric.Yu@Sun.COM * can't be true here. 46078348SEric.Yu@Sun.COM */ 46088348SEric.Yu@Sun.COM if (error == ENOMEM) 46098348SEric.Yu@Sun.COM error = EAGAIN; 46108348SEric.Yu@Sun.COM return (error); 46118348SEric.Yu@Sun.COM } 46128348SEric.Yu@Sun.COM 46138348SEric.Yu@Sun.COM /* ARGSUSED */ 46148348SEric.Yu@Sun.COM static int 46158348SEric.Yu@Sun.COM sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 46168348SEric.Yu@Sun.COM struct cred *cr, mblk_t **mpp) 46178348SEric.Yu@Sun.COM { 46188348SEric.Yu@Sun.COM int error; 46198348SEric.Yu@Sun.COM 46208348SEric.Yu@Sun.COM if (so->so_family != AF_INET && so->so_family != AF_INET6) 46218348SEric.Yu@Sun.COM return (EAFNOSUPPORT); 46228348SEric.Yu@Sun.COM 46238348SEric.Yu@Sun.COM if (so->so_state & SS_CANTSENDMORE) 46248348SEric.Yu@Sun.COM return (EPIPE); 46258348SEric.Yu@Sun.COM 46268348SEric.Yu@Sun.COM if (so->so_type != SOCK_STREAM) 46278348SEric.Yu@Sun.COM return (EOPNOTSUPP); 46288348SEric.Yu@Sun.COM 46298348SEric.Yu@Sun.COM if ((so->so_state & SS_ISCONNECTED) == 0) 46308348SEric.Yu@Sun.COM return (ENOTCONN); 46318348SEric.Yu@Sun.COM 46328348SEric.Yu@Sun.COM error = kstrwritemp(so->so_vnode, *mpp, fflag); 46338348SEric.Yu@Sun.COM if (error == 0) 46348348SEric.Yu@Sun.COM *mpp = NULL; 46350Sstevel@tonic-gate return (error); 46360Sstevel@tonic-gate } 46370Sstevel@tonic-gate 46380Sstevel@tonic-gate /* 4639741Smasputra * Sending data on a datagram socket. 4640741Smasputra * Assumes caller has verified that SS_ISBOUND etc. are set. 4641741Smasputra */ 4642741Smasputra /* ARGSUSED */ 4643741Smasputra static int 4644741Smasputra sodgram_direct(struct sonode *so, struct sockaddr *name, 4645741Smasputra socklen_t namelen, struct uio *uiop, int flags) 4646741Smasputra { 4647741Smasputra struct T_unitdata_req tudr; 46485240Snordmark mblk_t *mp = NULL; 4649741Smasputra int error = 0; 4650741Smasputra void *addr; 4651741Smasputra socklen_t addrlen; 4652741Smasputra ssize_t len; 4653741Smasputra struct stdata *stp = SOTOV(so)->v_stream; 4654741Smasputra int so_state; 4655741Smasputra queue_t *udp_wq; 46565240Snordmark boolean_t connected; 46575240Snordmark mblk_t *mpdata = NULL; 46588348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 465911861SMarek.Pospisil@Sun.COM uint32_t auditing = AU_AUDITING(); 4660741Smasputra 4661741Smasputra ASSERT(name != NULL && namelen != 0); 4662741Smasputra ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4663741Smasputra ASSERT(!(so->so_mode & SM_EXDATA)); 4664741Smasputra ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4665741Smasputra ASSERT(SOTOV(so)->v_type == VSOCK); 4666741Smasputra 4667741Smasputra /* Caller checked for proper length */ 4668741Smasputra len = uiop->uio_resid; 46698348SEric.Yu@Sun.COM ASSERT(len <= sti->sti_tidu_size); 4670741Smasputra 4671741Smasputra /* Length and family checks have been done by caller */ 4672741Smasputra ASSERT(name->sa_family == so->so_family); 4673741Smasputra ASSERT(so->so_family == AF_INET || 4674741Smasputra (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4675741Smasputra ASSERT(so->so_family == AF_INET6 || 4676741Smasputra (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4677741Smasputra 4678741Smasputra addr = name; 4679741Smasputra addrlen = namelen; 4680741Smasputra 4681741Smasputra if (stp->sd_sidp != NULL && 4682741Smasputra (error = straccess(stp, JCWRITE)) != 0) 4683741Smasputra goto done; 4684741Smasputra 4685741Smasputra so_state = so->so_state; 4686741Smasputra 46875240Snordmark connected = so_state & SS_ISCONNECTED; 46885240Snordmark if (!connected) { 46895240Snordmark tudr.PRIM_type = T_UNITDATA_REQ; 46905240Snordmark tudr.DEST_length = addrlen; 46915240Snordmark tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 46925240Snordmark tudr.OPT_length = 0; 46935240Snordmark tudr.OPT_offset = 0; 46945240Snordmark 46955240Snordmark mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 46968778SErik.Nordmark@Sun.COM _ALLOC_INTR, CRED()); 46975240Snordmark if (mp == NULL) { 46985240Snordmark /* 46995240Snordmark * Caught a signal waiting for memory. 47005240Snordmark * Let send* return EINTR. 47015240Snordmark */ 47025240Snordmark error = EINTR; 47035240Snordmark goto done; 47045240Snordmark } 47055240Snordmark } 47065240Snordmark 4707741Smasputra /* 4708741Smasputra * For UDP we don't break up the copyin into smaller pieces 4709741Smasputra * as in the TCP case. That means if ENOMEM is returned by 4710741Smasputra * mcopyinuio() then the uio vector has not been modified at 4711741Smasputra * all and we fallback to either strwrite() or kstrputmsg() 4712741Smasputra * below. Note also that we never generate priority messages 4713741Smasputra * from here. 4714741Smasputra */ 4715741Smasputra udp_wq = stp->sd_wrq->q_next; 4716741Smasputra if (canput(udp_wq) && 47175240Snordmark (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 47185240Snordmark ASSERT(DB_TYPE(mpdata) == M_DATA); 4719741Smasputra ASSERT(uiop->uio_resid == 0); 47205240Snordmark if (!connected) 47215240Snordmark linkb(mp, mpdata); 47225240Snordmark else 47235240Snordmark mp = mpdata; 472411861SMarek.Pospisil@Sun.COM if (auditing) 4725741Smasputra audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 47265240Snordmark 47275240Snordmark udp_wput(udp_wq, mp); 4728741Smasputra return (0); 4729741Smasputra } 47305240Snordmark 47315240Snordmark ASSERT(mpdata == NULL); 47325240Snordmark if (error != 0 && error != ENOMEM) { 47335240Snordmark freemsg(mp); 4734741Smasputra return (error); 47355240Snordmark } 4736741Smasputra 4737741Smasputra /* 4738741Smasputra * For connected, let strwrite() handle the blocking case. 4739741Smasputra * Otherwise we fall thru and use kstrputmsg(). 4740741Smasputra */ 47415240Snordmark if (connected) 4742741Smasputra return (strwrite(SOTOV(so), uiop, CRED())); 4743741Smasputra 474411861SMarek.Pospisil@Sun.COM if (auditing) 4745741Smasputra audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4746741Smasputra 4747741Smasputra error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4748741Smasputra done: 4749741Smasputra #ifdef SOCK_DEBUG 4750741Smasputra if (error != 0) { 4751741Smasputra eprintsoline(so, error); 4752741Smasputra } 4753741Smasputra #endif /* SOCK_DEBUG */ 4754741Smasputra return (error); 4755741Smasputra } 4756741Smasputra 4757741Smasputra int 4758741Smasputra sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4759741Smasputra { 4760741Smasputra struct stdata *stp = SOTOV(so)->v_stream; 4761741Smasputra ssize_t iosize, rmax, maxblk; 4762741Smasputra queue_t *tcp_wq = stp->sd_wrq->q_next; 4763898Skais mblk_t *newmp; 4764741Smasputra int error = 0, wflag = 0; 4765741Smasputra 4766741Smasputra ASSERT(so->so_mode & SM_BYTESTREAM); 4767741Smasputra ASSERT(SOTOV(so)->v_type == VSOCK); 4768741Smasputra 4769741Smasputra if (stp->sd_sidp != NULL && 4770741Smasputra (error = straccess(stp, JCWRITE)) != 0) 4771741Smasputra return (error); 4772741Smasputra 4773741Smasputra if (uiop == NULL) { 4774741Smasputra /* 4775741Smasputra * kstrwritemp() should have checked sd_flag and 4776741Smasputra * flow-control before coming here. If we end up 4777741Smasputra * here it means that we can simply pass down the 4778741Smasputra * data to tcp. 4779741Smasputra */ 4780741Smasputra ASSERT(mp != NULL); 4781898Skais if (stp->sd_wputdatafunc != NULL) { 4782898Skais newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4783898Skais NULL, NULL, NULL); 4784898Skais if (newmp == NULL) { 4785898Skais /* The caller will free mp */ 4786898Skais return (ECOMM); 4787898Skais } 4788898Skais mp = newmp; 4789898Skais } 4790741Smasputra tcp_wput(tcp_wq, mp); 4791741Smasputra return (0); 4792741Smasputra } 4793741Smasputra 4794741Smasputra /* Fallback to strwrite() to do proper error handling */ 4795741Smasputra if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4796741Smasputra return (strwrite(SOTOV(so), uiop, cr)); 4797741Smasputra 4798741Smasputra rmax = stp->sd_qn_maxpsz; 4799741Smasputra ASSERT(rmax >= 0 || rmax == INFPSZ); 4800741Smasputra if (rmax == 0 || uiop->uio_resid <= 0) 4801741Smasputra return (0); 4802741Smasputra 4803741Smasputra if (rmax == INFPSZ) 4804741Smasputra rmax = uiop->uio_resid; 4805741Smasputra 4806741Smasputra maxblk = stp->sd_maxblk; 4807741Smasputra 4808741Smasputra for (;;) { 4809741Smasputra iosize = MIN(uiop->uio_resid, rmax); 4810741Smasputra 4811741Smasputra mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4812741Smasputra if (mp == NULL) { 4813741Smasputra /* 4814741Smasputra * Fallback to strwrite() for ENOMEM; if this 4815741Smasputra * is our first time in this routine and the uio 4816741Smasputra * vector has not been modified, we will end up 4817741Smasputra * calling strwrite() without any flag set. 4818741Smasputra */ 4819741Smasputra if (error == ENOMEM) 4820741Smasputra goto slow_send; 4821741Smasputra else 4822741Smasputra return (error); 4823741Smasputra } 4824741Smasputra ASSERT(uiop->uio_resid >= 0); 4825741Smasputra /* 4826741Smasputra * If mp is non-NULL and ENOMEM is set, it means that 4827741Smasputra * mcopyinuio() was able to break down some of the user 4828741Smasputra * data into one or more mblks. Send the partial data 4829741Smasputra * to tcp and let the rest be handled in strwrite(). 4830741Smasputra */ 4831741Smasputra ASSERT(error == 0 || error == ENOMEM); 4832898Skais if (stp->sd_wputdatafunc != NULL) { 4833898Skais newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4834898Skais NULL, NULL, NULL); 4835898Skais if (newmp == NULL) { 4836898Skais /* The caller will free mp */ 4837898Skais return (ECOMM); 4838898Skais } 4839898Skais mp = newmp; 4840898Skais } 4841741Smasputra tcp_wput(tcp_wq, mp); 4842741Smasputra 4843741Smasputra wflag |= NOINTR; 4844741Smasputra 4845741Smasputra if (uiop->uio_resid == 0) { /* No more data; we're done */ 4846741Smasputra ASSERT(error == 0); 4847741Smasputra break; 4848741Smasputra } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4849741Smasputra (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4850741Smasputra slow_send: 4851741Smasputra /* 4852741Smasputra * We were able to send down partial data using 4853741Smasputra * the direct call interface, but are now relying 4854741Smasputra * on strwrite() to handle the non-fastpath cases. 4855741Smasputra * If the socket is blocking we will sleep in 4856741Smasputra * strwaitq() until write is permitted, otherwise, 4857741Smasputra * we will need to return the amount of bytes 4858741Smasputra * written so far back to the app. This is the 4859741Smasputra * reason why we pass NOINTR flag to strwrite() 4860741Smasputra * for non-blocking socket, because we don't want 4861741Smasputra * to return EAGAIN when portion of the user data 4862741Smasputra * has actually been sent down. 4863741Smasputra */ 4864741Smasputra return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4865741Smasputra } 4866741Smasputra } 4867741Smasputra return (0); 4868741Smasputra } 4869741Smasputra 4870741Smasputra /* 48718348SEric.Yu@Sun.COM * Update sti_faddr by asking the transport (unless AF_UNIX). 48720Sstevel@tonic-gate */ 48738348SEric.Yu@Sun.COM /* ARGSUSED */ 48740Sstevel@tonic-gate int 48758348SEric.Yu@Sun.COM sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 48768348SEric.Yu@Sun.COM boolean_t accept, struct cred *cr) 48770Sstevel@tonic-gate { 48780Sstevel@tonic-gate struct strbuf strbuf; 48790Sstevel@tonic-gate int error = 0, res; 48800Sstevel@tonic-gate void *addr; 48810Sstevel@tonic-gate t_uscalar_t addrlen; 48820Sstevel@tonic-gate k_sigset_t smask; 48838348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 48840Sstevel@tonic-gate 48850Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 48867240Srh87107 (void *)so, pr_state(so->so_state, so->so_mode))); 48870Sstevel@tonic-gate 48888348SEric.Yu@Sun.COM ASSERT(*namelen > 0); 48890Sstevel@tonic-gate mutex_enter(&so->so_lock); 48900Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 48918348SEric.Yu@Sun.COM 48928348SEric.Yu@Sun.COM if (accept) { 48938348SEric.Yu@Sun.COM bcopy(sti->sti_faddr_sa, name, 48948348SEric.Yu@Sun.COM MIN(*namelen, sti->sti_faddr_len)); 48958348SEric.Yu@Sun.COM *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 48968348SEric.Yu@Sun.COM goto done; 48978348SEric.Yu@Sun.COM } 48988348SEric.Yu@Sun.COM 48990Sstevel@tonic-gate if (!(so->so_state & SS_ISCONNECTED)) { 49000Sstevel@tonic-gate error = ENOTCONN; 49010Sstevel@tonic-gate goto done; 49020Sstevel@tonic-gate } 49030Sstevel@tonic-gate /* Added this check for X/Open */ 49040Sstevel@tonic-gate if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 49050Sstevel@tonic-gate error = EINVAL; 49060Sstevel@tonic-gate if (xnet_check_print) { 49070Sstevel@tonic-gate printf("sockfs: X/Open getpeername check => EINVAL\n"); 49080Sstevel@tonic-gate } 49090Sstevel@tonic-gate goto done; 49100Sstevel@tonic-gate } 49118348SEric.Yu@Sun.COM 49128348SEric.Yu@Sun.COM if (sti->sti_faddr_valid) { 49138348SEric.Yu@Sun.COM bcopy(sti->sti_faddr_sa, name, 49148348SEric.Yu@Sun.COM MIN(*namelen, sti->sti_faddr_len)); 49158348SEric.Yu@Sun.COM *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 49168348SEric.Yu@Sun.COM goto done; 49178348SEric.Yu@Sun.COM } 49188348SEric.Yu@Sun.COM 49190Sstevel@tonic-gate #ifdef DEBUG 49200Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 49218348SEric.Yu@Sun.COM pr_addr(so->so_family, sti->sti_faddr_sa, 49228348SEric.Yu@Sun.COM (t_uscalar_t)sti->sti_faddr_len))); 49230Sstevel@tonic-gate #endif /* DEBUG */ 49240Sstevel@tonic-gate 49251548Srshoaib if (so->so_family == AF_UNIX) { 49260Sstevel@tonic-gate /* Transport has different name space - return local info */ 49278348SEric.Yu@Sun.COM if (sti->sti_faddr_noxlate) 49288348SEric.Yu@Sun.COM *namelen = 0; 49290Sstevel@tonic-gate error = 0; 49300Sstevel@tonic-gate goto done; 49310Sstevel@tonic-gate } 49320Sstevel@tonic-gate 49338348SEric.Yu@Sun.COM ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 49348348SEric.Yu@Sun.COM 49358348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_sa); 49360Sstevel@tonic-gate /* Allocate local buffer to use with ioctl */ 49378348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 49380Sstevel@tonic-gate mutex_exit(&so->so_lock); 49390Sstevel@tonic-gate addr = kmem_alloc(addrlen, KM_SLEEP); 49400Sstevel@tonic-gate 49410Sstevel@tonic-gate /* 49420Sstevel@tonic-gate * Issue TI_GETPEERNAME with signals masked. 49438348SEric.Yu@Sun.COM * Put the result in sti_faddr_sa so that getpeername works after 49440Sstevel@tonic-gate * a shutdown(output). 49450Sstevel@tonic-gate * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 49460Sstevel@tonic-gate * back to the socket. 49470Sstevel@tonic-gate */ 49480Sstevel@tonic-gate strbuf.buf = addr; 49490Sstevel@tonic-gate strbuf.maxlen = addrlen; 49500Sstevel@tonic-gate strbuf.len = 0; 49510Sstevel@tonic-gate 49520Sstevel@tonic-gate sigintr(&smask, 0); 49530Sstevel@tonic-gate res = 0; 49548348SEric.Yu@Sun.COM ASSERT(cr); 49550Sstevel@tonic-gate error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 49568348SEric.Yu@Sun.COM 0, K_TO_K, cr, &res); 49570Sstevel@tonic-gate sigunintr(&smask); 49580Sstevel@tonic-gate 49590Sstevel@tonic-gate mutex_enter(&so->so_lock); 49600Sstevel@tonic-gate /* 49610Sstevel@tonic-gate * If there is an error record the error in so_error put don't fail 49620Sstevel@tonic-gate * the getpeername. Instead fallback on the recorded 49638348SEric.Yu@Sun.COM * sti->sti_faddr_sa. 49640Sstevel@tonic-gate */ 49650Sstevel@tonic-gate if (error) { 49660Sstevel@tonic-gate /* 49670Sstevel@tonic-gate * Various stream head errors can be returned to the ioctl. 49680Sstevel@tonic-gate * However, it is impossible to determine which ones of 49690Sstevel@tonic-gate * these are really socket level errors that were incorrectly 49700Sstevel@tonic-gate * consumed by the ioctl. Thus this code silently ignores the 49710Sstevel@tonic-gate * error - to code explicitly does not reinstate the error 49720Sstevel@tonic-gate * using soseterror(). 49730Sstevel@tonic-gate * Experiments have shows that at least this set of 49740Sstevel@tonic-gate * errors are reported and should not be reinstated on the 49750Sstevel@tonic-gate * socket: 49760Sstevel@tonic-gate * EINVAL E.g. if an I_LINK was in effect when 49770Sstevel@tonic-gate * getpeername was called. 49780Sstevel@tonic-gate * EPIPE The ioctl error semantics prefer the write 49790Sstevel@tonic-gate * side error over the read side error. 49800Sstevel@tonic-gate * ENOTCONN The transport just got disconnected but 49810Sstevel@tonic-gate * sockfs had not yet seen the T_DISCON_IND 49820Sstevel@tonic-gate * when issuing the ioctl. 49830Sstevel@tonic-gate */ 49840Sstevel@tonic-gate error = 0; 49850Sstevel@tonic-gate } else if (res == 0 && strbuf.len > 0 && 49860Sstevel@tonic-gate (so->so_state & SS_ISCONNECTED)) { 49878348SEric.Yu@Sun.COM ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 49888348SEric.Yu@Sun.COM sti->sti_faddr_len = (socklen_t)strbuf.len; 49898348SEric.Yu@Sun.COM bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 49908348SEric.Yu@Sun.COM sti->sti_faddr_valid = 1; 49918348SEric.Yu@Sun.COM 49928348SEric.Yu@Sun.COM bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 49938348SEric.Yu@Sun.COM *namelen = sti->sti_faddr_len; 49940Sstevel@tonic-gate } 49950Sstevel@tonic-gate kmem_free(addr, addrlen); 49960Sstevel@tonic-gate #ifdef DEBUG 49970Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 49988348SEric.Yu@Sun.COM pr_addr(so->so_family, sti->sti_faddr_sa, 49998348SEric.Yu@Sun.COM (t_uscalar_t)sti->sti_faddr_len))); 50000Sstevel@tonic-gate #endif /* DEBUG */ 50010Sstevel@tonic-gate done: 50020Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 50030Sstevel@tonic-gate mutex_exit(&so->so_lock); 50040Sstevel@tonic-gate return (error); 50050Sstevel@tonic-gate } 50060Sstevel@tonic-gate 50070Sstevel@tonic-gate /* 50088348SEric.Yu@Sun.COM * Update sti_laddr by asking the transport (unless AF_UNIX). 50090Sstevel@tonic-gate */ 50100Sstevel@tonic-gate int 50118348SEric.Yu@Sun.COM sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 50128348SEric.Yu@Sun.COM struct cred *cr) 50130Sstevel@tonic-gate { 50140Sstevel@tonic-gate struct strbuf strbuf; 50150Sstevel@tonic-gate int error = 0, res; 50160Sstevel@tonic-gate void *addr; 50170Sstevel@tonic-gate t_uscalar_t addrlen; 50180Sstevel@tonic-gate k_sigset_t smask; 50198348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 50200Sstevel@tonic-gate 50210Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 50227240Srh87107 (void *)so, pr_state(so->so_state, so->so_mode))); 50230Sstevel@tonic-gate 50248348SEric.Yu@Sun.COM ASSERT(*namelen > 0); 50250Sstevel@tonic-gate mutex_enter(&so->so_lock); 50260Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 50278348SEric.Yu@Sun.COM 50280Sstevel@tonic-gate #ifdef DEBUG 50298348SEric.Yu@Sun.COM 50300Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 50318348SEric.Yu@Sun.COM pr_addr(so->so_family, sti->sti_laddr_sa, 50328348SEric.Yu@Sun.COM (t_uscalar_t)sti->sti_laddr_len))); 50330Sstevel@tonic-gate #endif /* DEBUG */ 50348348SEric.Yu@Sun.COM if (sti->sti_laddr_valid) { 50358348SEric.Yu@Sun.COM bcopy(sti->sti_laddr_sa, name, 50368348SEric.Yu@Sun.COM MIN(*namelen, sti->sti_laddr_len)); 50378348SEric.Yu@Sun.COM *namelen = sti->sti_laddr_len; 50388348SEric.Yu@Sun.COM goto done; 50398348SEric.Yu@Sun.COM } 50408348SEric.Yu@Sun.COM 50410Sstevel@tonic-gate if (so->so_family == AF_UNIX) { 50420Sstevel@tonic-gate /* Transport has different name space - return local info */ 50430Sstevel@tonic-gate error = 0; 50448465SEric.Yu@Sun.COM *namelen = 0; 50450Sstevel@tonic-gate goto done; 50460Sstevel@tonic-gate } 50471156Sblu if (!(so->so_state & SS_ISBOUND)) { 50481156Sblu /* If not bound, then nothing to return. */ 50491156Sblu error = 0; 50501156Sblu goto done; 50511156Sblu } 50528348SEric.Yu@Sun.COM 50530Sstevel@tonic-gate /* Allocate local buffer to use with ioctl */ 50548348SEric.Yu@Sun.COM addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 50550Sstevel@tonic-gate mutex_exit(&so->so_lock); 50560Sstevel@tonic-gate addr = kmem_alloc(addrlen, KM_SLEEP); 50570Sstevel@tonic-gate 50580Sstevel@tonic-gate /* 50590Sstevel@tonic-gate * Issue TI_GETMYNAME with signals masked. 50608348SEric.Yu@Sun.COM * Put the result in sti_laddr_sa so that getsockname works after 50610Sstevel@tonic-gate * a shutdown(output). 50620Sstevel@tonic-gate * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 50630Sstevel@tonic-gate * back to the socket. 50640Sstevel@tonic-gate */ 50650Sstevel@tonic-gate strbuf.buf = addr; 50660Sstevel@tonic-gate strbuf.maxlen = addrlen; 50670Sstevel@tonic-gate strbuf.len = 0; 50680Sstevel@tonic-gate 50690Sstevel@tonic-gate sigintr(&smask, 0); 50700Sstevel@tonic-gate res = 0; 50718348SEric.Yu@Sun.COM ASSERT(cr); 50720Sstevel@tonic-gate error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 50738348SEric.Yu@Sun.COM 0, K_TO_K, cr, &res); 50740Sstevel@tonic-gate sigunintr(&smask); 50750Sstevel@tonic-gate 50760Sstevel@tonic-gate mutex_enter(&so->so_lock); 50770Sstevel@tonic-gate /* 50780Sstevel@tonic-gate * If there is an error record the error in so_error put don't fail 50790Sstevel@tonic-gate * the getsockname. Instead fallback on the recorded 50808348SEric.Yu@Sun.COM * sti->sti_laddr_sa. 50810Sstevel@tonic-gate */ 50820Sstevel@tonic-gate if (error) { 50830Sstevel@tonic-gate /* 50840Sstevel@tonic-gate * Various stream head errors can be returned to the ioctl. 50850Sstevel@tonic-gate * However, it is impossible to determine which ones of 50860Sstevel@tonic-gate * these are really socket level errors that were incorrectly 50870Sstevel@tonic-gate * consumed by the ioctl. Thus this code silently ignores the 50880Sstevel@tonic-gate * error - to code explicitly does not reinstate the error 50890Sstevel@tonic-gate * using soseterror(). 50900Sstevel@tonic-gate * Experiments have shows that at least this set of 50910Sstevel@tonic-gate * errors are reported and should not be reinstated on the 50920Sstevel@tonic-gate * socket: 50930Sstevel@tonic-gate * EINVAL E.g. if an I_LINK was in effect when 50940Sstevel@tonic-gate * getsockname was called. 50950Sstevel@tonic-gate * EPIPE The ioctl error semantics prefer the write 50960Sstevel@tonic-gate * side error over the read side error. 50970Sstevel@tonic-gate */ 50980Sstevel@tonic-gate error = 0; 50990Sstevel@tonic-gate } else if (res == 0 && strbuf.len > 0 && 51000Sstevel@tonic-gate (so->so_state & SS_ISBOUND)) { 51018348SEric.Yu@Sun.COM ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 51028348SEric.Yu@Sun.COM sti->sti_laddr_len = (socklen_t)strbuf.len; 51038348SEric.Yu@Sun.COM bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 51048348SEric.Yu@Sun.COM sti->sti_laddr_valid = 1; 51058348SEric.Yu@Sun.COM 51068348SEric.Yu@Sun.COM bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 51078348SEric.Yu@Sun.COM *namelen = sti->sti_laddr_len; 51080Sstevel@tonic-gate } 51090Sstevel@tonic-gate kmem_free(addr, addrlen); 51100Sstevel@tonic-gate #ifdef DEBUG 51110Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 51128348SEric.Yu@Sun.COM pr_addr(so->so_family, sti->sti_laddr_sa, 51138348SEric.Yu@Sun.COM (t_uscalar_t)sti->sti_laddr_len))); 51140Sstevel@tonic-gate #endif /* DEBUG */ 51150Sstevel@tonic-gate done: 51160Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 51170Sstevel@tonic-gate mutex_exit(&so->so_lock); 51180Sstevel@tonic-gate return (error); 51190Sstevel@tonic-gate } 51200Sstevel@tonic-gate 51210Sstevel@tonic-gate /* 51220Sstevel@tonic-gate * Get socket options. For SOL_SOCKET options some options are handled 51230Sstevel@tonic-gate * by the sockfs while others use the value recorded in the sonode as a 51240Sstevel@tonic-gate * fallback should the T_SVR4_OPTMGMT_REQ fail. 51250Sstevel@tonic-gate * 51260Sstevel@tonic-gate * On the return most *optlenp bytes are copied to optval. 51270Sstevel@tonic-gate */ 51288348SEric.Yu@Sun.COM /* ARGSUSED */ 51290Sstevel@tonic-gate int 51300Sstevel@tonic-gate sotpi_getsockopt(struct sonode *so, int level, int option_name, 51318348SEric.Yu@Sun.COM void *optval, socklen_t *optlenp, int flags, struct cred *cr) 51320Sstevel@tonic-gate { 51330Sstevel@tonic-gate struct T_optmgmt_req optmgmt_req; 51340Sstevel@tonic-gate struct T_optmgmt_ack *optmgmt_ack; 51350Sstevel@tonic-gate struct opthdr oh; 51360Sstevel@tonic-gate struct opthdr *opt_res; 51370Sstevel@tonic-gate mblk_t *mp = NULL; 51380Sstevel@tonic-gate int error = 0; 51390Sstevel@tonic-gate void *option = NULL; /* Set if fallback value */ 51400Sstevel@tonic-gate t_uscalar_t maxlen = *optlenp; 51410Sstevel@tonic-gate t_uscalar_t len; 51420Sstevel@tonic-gate uint32_t value; 51438348SEric.Yu@Sun.COM struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 51448489Sshenjian struct timeval32 tmo_val32; 51458348SEric.Yu@Sun.COM struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 51460Sstevel@tonic-gate 51470Sstevel@tonic-gate dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 51487240Srh87107 (void *)so, level, option_name, optval, (void *)optlenp, 51495240Snordmark pr_state(so->so_state, so->so_mode))); 51500Sstevel@tonic-gate 51510Sstevel@tonic-gate mutex_enter(&so->so_lock); 51520Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 51530Sstevel@tonic-gate 51540Sstevel@tonic-gate /* 51550Sstevel@tonic-gate * Check for SOL_SOCKET options. 51560Sstevel@tonic-gate * Certain SOL_SOCKET options are returned directly whereas 51570Sstevel@tonic-gate * others only provide a default (fallback) value should 51580Sstevel@tonic-gate * the T_SVR4_OPTMGMT_REQ fail. 51590Sstevel@tonic-gate */ 51600Sstevel@tonic-gate if (level == SOL_SOCKET) { 51610Sstevel@tonic-gate /* Check parameters */ 51620Sstevel@tonic-gate switch (option_name) { 51630Sstevel@tonic-gate case SO_TYPE: 51640Sstevel@tonic-gate case SO_ERROR: 51650Sstevel@tonic-gate case SO_DEBUG: 51660Sstevel@tonic-gate case SO_ACCEPTCONN: 51670Sstevel@tonic-gate case SO_REUSEADDR: 51680Sstevel@tonic-gate case SO_KEEPALIVE: 51690Sstevel@tonic-gate case SO_DONTROUTE: 51700Sstevel@tonic-gate case SO_BROADCAST: 51710Sstevel@tonic-gate case SO_USELOOPBACK: 51720Sstevel@tonic-gate case SO_OOBINLINE: 51730Sstevel@tonic-gate case SO_SNDBUF: 51740Sstevel@tonic-gate case SO_RCVBUF: 51750Sstevel@tonic-gate #ifdef notyet 51760Sstevel@tonic-gate case SO_SNDLOWAT: 51770Sstevel@tonic-gate case SO_RCVLOWAT: 51780Sstevel@tonic-gate #endif /* notyet */ 51793388Skcpoon case SO_DOMAIN: 51800Sstevel@tonic-gate case SO_DGRAM_ERRIND: 51810Sstevel@tonic-gate if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 51820Sstevel@tonic-gate error = EINVAL; 51830Sstevel@tonic-gate eprintsoline(so, error); 51840Sstevel@tonic-gate goto done2; 51850Sstevel@tonic-gate } 51860Sstevel@tonic-gate break; 51878348SEric.Yu@Sun.COM case SO_RCVTIMEO: 51888348SEric.Yu@Sun.COM case SO_SNDTIMEO: 51898575Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 51908575Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 51918489Sshenjian if (maxlen < sizeof (struct timeval)) { 51928489Sshenjian error = EINVAL; 51938489Sshenjian eprintsoline(so, error); 51948489Sshenjian goto done2; 51958489Sshenjian } 51968489Sshenjian } else { 51978489Sshenjian if (maxlen < sizeof (struct timeval32)) { 51988489Sshenjian error = EINVAL; 51998489Sshenjian eprintsoline(so, error); 52008489Sshenjian goto done2; 52018489Sshenjian } 52028489Sshenjian 52038348SEric.Yu@Sun.COM } 52048348SEric.Yu@Sun.COM break; 52050Sstevel@tonic-gate case SO_LINGER: 52060Sstevel@tonic-gate if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 52070Sstevel@tonic-gate error = EINVAL; 52080Sstevel@tonic-gate eprintsoline(so, error); 52090Sstevel@tonic-gate goto done2; 52100Sstevel@tonic-gate } 52110Sstevel@tonic-gate break; 52128348SEric.Yu@Sun.COM case SO_SND_BUFINFO: 52138348SEric.Yu@Sun.COM if (maxlen < (t_uscalar_t) 52148348SEric.Yu@Sun.COM sizeof (struct so_snd_bufinfo)) { 52158348SEric.Yu@Sun.COM error = EINVAL; 52168348SEric.Yu@Sun.COM eprintsoline(so, error); 52178348SEric.Yu@Sun.COM goto done2; 52188348SEric.Yu@Sun.COM } 52198348SEric.Yu@Sun.COM break; 52200Sstevel@tonic-gate } 52210Sstevel@tonic-gate 52220Sstevel@tonic-gate len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 52230Sstevel@tonic-gate 52240Sstevel@tonic-gate switch (option_name) { 52250Sstevel@tonic-gate case SO_TYPE: 52260Sstevel@tonic-gate value = so->so_type; 52270Sstevel@tonic-gate option = &value; 52280Sstevel@tonic-gate goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 52290Sstevel@tonic-gate 52300Sstevel@tonic-gate case SO_ERROR: 52318348SEric.Yu@Sun.COM value = sogeterr(so, B_TRUE); 52320Sstevel@tonic-gate option = &value; 52330Sstevel@tonic-gate goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 52340Sstevel@tonic-gate 52350Sstevel@tonic-gate case SO_ACCEPTCONN: 52360Sstevel@tonic-gate if (so->so_state & SS_ACCEPTCONN) 52370Sstevel@tonic-gate value = SO_ACCEPTCONN; 52380Sstevel@tonic-gate else 52390Sstevel@tonic-gate value = 0; 52400Sstevel@tonic-gate #ifdef DEBUG 52410Sstevel@tonic-gate if (value) { 52420Sstevel@tonic-gate dprintso(so, 1, 52430Sstevel@tonic-gate ("sotpi_getsockopt: 0x%x is set\n", 52440Sstevel@tonic-gate option_name)); 52450Sstevel@tonic-gate } else { 52460Sstevel@tonic-gate dprintso(so, 1, 52470Sstevel@tonic-gate ("sotpi_getsockopt: 0x%x not set\n", 52480Sstevel@tonic-gate option_name)); 52490Sstevel@tonic-gate } 52500Sstevel@tonic-gate #endif /* DEBUG */ 52510Sstevel@tonic-gate option = &value; 52520Sstevel@tonic-gate goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 52530Sstevel@tonic-gate 52540Sstevel@tonic-gate case SO_DEBUG: 52550Sstevel@tonic-gate case SO_REUSEADDR: 52560Sstevel@tonic-gate case SO_KEEPALIVE: 52570Sstevel@tonic-gate case SO_DONTROUTE: 52580Sstevel@tonic-gate case SO_BROADCAST: 52590Sstevel@tonic-gate case SO_USELOOPBACK: 52600Sstevel@tonic-gate case SO_OOBINLINE: 52610Sstevel@tonic-gate case SO_DGRAM_ERRIND: 52620Sstevel@tonic-gate value = (so->so_options & option_name); 52630Sstevel@tonic-gate #ifdef DEBUG 52640Sstevel@tonic-gate if (value) { 52650Sstevel@tonic-gate dprintso(so, 1, 52660Sstevel@tonic-gate ("sotpi_getsockopt: 0x%x is set\n", 52670Sstevel@tonic-gate option_name)); 52680Sstevel@tonic-gate } else { 52690Sstevel@tonic-gate dprintso(so, 1, 52700Sstevel@tonic-gate ("sotpi_getsockopt: 0x%x not set\n", 52710Sstevel@tonic-gate option_name)); 52720Sstevel@tonic-gate } 52730Sstevel@tonic-gate #endif /* DEBUG */ 52740Sstevel@tonic-gate option = &value; 52750Sstevel@tonic-gate goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 52760Sstevel@tonic-gate 52770Sstevel@tonic-gate /* 52780Sstevel@tonic-gate * The following options are only returned by sockfs when the 52790Sstevel@tonic-gate * T_SVR4_OPTMGMT_REQ fails. 52800Sstevel@tonic-gate */ 52810Sstevel@tonic-gate case SO_LINGER: 52820Sstevel@tonic-gate option = &so->so_linger; 52830Sstevel@tonic-gate len = (t_uscalar_t)sizeof (struct linger); 52840Sstevel@tonic-gate break; 52850Sstevel@tonic-gate case SO_SNDBUF: { 52860Sstevel@tonic-gate ssize_t lvalue; 52870Sstevel@tonic-gate 52880Sstevel@tonic-gate /* 52890Sstevel@tonic-gate * If the option has not been set then get a default 52900Sstevel@tonic-gate * value from the read queue. This value is 52910Sstevel@tonic-gate * returned if the transport fails 52920Sstevel@tonic-gate * the T_SVR4_OPTMGMT_REQ. 52930Sstevel@tonic-gate */ 52940Sstevel@tonic-gate lvalue = so->so_sndbuf; 52950Sstevel@tonic-gate if (lvalue == 0) { 52960Sstevel@tonic-gate mutex_exit(&so->so_lock); 52970Sstevel@tonic-gate (void) strqget(strvp2wq(SOTOV(so))->q_next, 52985240Snordmark QHIWAT, 0, &lvalue); 52990Sstevel@tonic-gate mutex_enter(&so->so_lock); 53000Sstevel@tonic-gate dprintso(so, 1, 53010Sstevel@tonic-gate ("got SO_SNDBUF %ld from q\n", lvalue)); 53020Sstevel@tonic-gate } 53030Sstevel@tonic-gate value = (int)lvalue; 53040Sstevel@tonic-gate option = &value; 53050Sstevel@tonic-gate len = (t_uscalar_t)sizeof (so->so_sndbuf); 53060Sstevel@tonic-gate break; 53070Sstevel@tonic-gate } 53080Sstevel@tonic-gate case SO_RCVBUF: { 53090Sstevel@tonic-gate ssize_t lvalue; 53100Sstevel@tonic-gate 53110Sstevel@tonic-gate /* 53120Sstevel@tonic-gate * If the option has not been set then get a default 53130Sstevel@tonic-gate * value from the read queue. This value is 53140Sstevel@tonic-gate * returned if the transport fails 53150Sstevel@tonic-gate * the T_SVR4_OPTMGMT_REQ. 53160Sstevel@tonic-gate * 53170Sstevel@tonic-gate * XXX If SO_RCVBUF has been set and this is an 53180Sstevel@tonic-gate * XPG 4.2 application then do not ask the transport 53190Sstevel@tonic-gate * since the transport might adjust the value and not 53200Sstevel@tonic-gate * return exactly what was set by the application. 53210Sstevel@tonic-gate * For non-XPG 4.2 application we return the value 53220Sstevel@tonic-gate * that the transport is actually using. 53230Sstevel@tonic-gate */ 53240Sstevel@tonic-gate lvalue = so->so_rcvbuf; 53250Sstevel@tonic-gate if (lvalue == 0) { 53260Sstevel@tonic-gate mutex_exit(&so->so_lock); 53270Sstevel@tonic-gate (void) strqget(RD(strvp2wq(SOTOV(so))), 53285240Snordmark QHIWAT, 0, &lvalue); 53290Sstevel@tonic-gate mutex_enter(&so->so_lock); 53300Sstevel@tonic-gate dprintso(so, 1, 53310Sstevel@tonic-gate ("got SO_RCVBUF %ld from q\n", lvalue)); 53320Sstevel@tonic-gate } else if (flags & _SOGETSOCKOPT_XPG4_2) { 53330Sstevel@tonic-gate value = (int)lvalue; 53340Sstevel@tonic-gate option = &value; 53350Sstevel@tonic-gate goto copyout; /* skip asking transport */ 53360Sstevel@tonic-gate } 53370Sstevel@tonic-gate value = (int)lvalue; 53380Sstevel@tonic-gate option = &value; 53390Sstevel@tonic-gate len = (t_uscalar_t)sizeof (so->so_rcvbuf); 53400Sstevel@tonic-gate break; 53410Sstevel@tonic-gate } 53423388Skcpoon case SO_DOMAIN: 53433388Skcpoon value = so->so_family; 53443388Skcpoon option = &value; 53453388Skcpoon goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 53463388Skcpoon 53470Sstevel@tonic-gate #ifdef notyet 53480Sstevel@tonic-gate /* 53490Sstevel@tonic-gate * We do not implement the semantics of these options 53500Sstevel@tonic-gate * thus we shouldn't implement the options either. 53510Sstevel@tonic-gate */ 53520Sstevel@tonic-gate case SO_SNDLOWAT: 53530Sstevel@tonic-gate value = so->so_sndlowat; 53540Sstevel@tonic-gate option = &value; 53550Sstevel@tonic-gate break; 53560Sstevel@tonic-gate case SO_RCVLOWAT: 53570Sstevel@tonic-gate value = so->so_rcvlowat; 53580Sstevel@tonic-gate option = &value; 53590Sstevel@tonic-gate break; 53608348SEric.Yu@Sun.COM #endif /* notyet */ 53610Sstevel@tonic-gate case SO_SNDTIMEO: 53628348SEric.Yu@Sun.COM case SO_RCVTIMEO: { 53638348SEric.Yu@Sun.COM clock_t val; 53648489Sshenjian 53658348SEric.Yu@Sun.COM if (option_name == SO_RCVTIMEO) 53668348SEric.Yu@Sun.COM val = drv_hztousec(so->so_rcvtimeo); 53678348SEric.Yu@Sun.COM else 53688348SEric.Yu@Sun.COM val = drv_hztousec(so->so_sndtimeo); 53698348SEric.Yu@Sun.COM tmo_val.tv_sec = val / (1000 * 1000); 53708348SEric.Yu@Sun.COM tmo_val.tv_usec = val % (1000 * 1000); 53718575Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 53728575Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 53738489Sshenjian option = &tmo_val; 53748489Sshenjian len = sizeof (struct timeval); 53758489Sshenjian } else { 53768489Sshenjian TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 53778489Sshenjian option = &tmo_val32; 53788489Sshenjian len = sizeof (struct timeval32); 53798489Sshenjian } 53800Sstevel@tonic-gate break; 53818348SEric.Yu@Sun.COM } 53828348SEric.Yu@Sun.COM case SO_SND_BUFINFO: { 53838348SEric.Yu@Sun.COM snd_bufinfo.sbi_wroff = 53848348SEric.Yu@Sun.COM (so->so_proto_props).sopp_wroff; 53858348SEric.Yu@Sun.COM snd_bufinfo.sbi_maxblk = 53868348SEric.Yu@Sun.COM (so->so_proto_props).sopp_maxblk; 53878348SEric.Yu@Sun.COM snd_bufinfo.sbi_maxpsz = 53888348SEric.Yu@Sun.COM (so->so_proto_props).sopp_maxpsz; 53898348SEric.Yu@Sun.COM snd_bufinfo.sbi_tail = 53908348SEric.Yu@Sun.COM (so->so_proto_props).sopp_tail; 53918348SEric.Yu@Sun.COM option = &snd_bufinfo; 53928348SEric.Yu@Sun.COM len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 53930Sstevel@tonic-gate break; 53948348SEric.Yu@Sun.COM } 53950Sstevel@tonic-gate } 53960Sstevel@tonic-gate } 53970Sstevel@tonic-gate 53980Sstevel@tonic-gate mutex_exit(&so->so_lock); 53990Sstevel@tonic-gate 54000Sstevel@tonic-gate /* Send request */ 54010Sstevel@tonic-gate optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 54020Sstevel@tonic-gate optmgmt_req.MGMT_flags = T_CHECK; 54030Sstevel@tonic-gate optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 54040Sstevel@tonic-gate optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 54050Sstevel@tonic-gate 54060Sstevel@tonic-gate oh.level = level; 54070Sstevel@tonic-gate oh.name = option_name; 54080Sstevel@tonic-gate oh.len = maxlen; 54090Sstevel@tonic-gate 54100Sstevel@tonic-gate mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 54118778SErik.Nordmark@Sun.COM &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 54120Sstevel@tonic-gate /* Let option management work in the presence of data flow control */ 54130Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 54145240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 54150Sstevel@tonic-gate mp = NULL; 54160Sstevel@tonic-gate mutex_enter(&so->so_lock); 54170Sstevel@tonic-gate if (error) { 54180Sstevel@tonic-gate eprintsoline(so, error); 54190Sstevel@tonic-gate goto done2; 54200Sstevel@tonic-gate } 54210Sstevel@tonic-gate error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 54220Sstevel@tonic-gate (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 54230Sstevel@tonic-gate if (error) { 54240Sstevel@tonic-gate if (option != NULL) { 54250Sstevel@tonic-gate /* We have a fallback value */ 54260Sstevel@tonic-gate error = 0; 54270Sstevel@tonic-gate goto copyout; 54280Sstevel@tonic-gate } 54290Sstevel@tonic-gate eprintsoline(so, error); 54300Sstevel@tonic-gate goto done2; 54310Sstevel@tonic-gate } 54320Sstevel@tonic-gate ASSERT(mp); 54330Sstevel@tonic-gate optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 54340Sstevel@tonic-gate opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 54355240Snordmark optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 54360Sstevel@tonic-gate if (opt_res == NULL) { 54370Sstevel@tonic-gate if (option != NULL) { 54380Sstevel@tonic-gate /* We have a fallback value */ 54390Sstevel@tonic-gate error = 0; 54400Sstevel@tonic-gate goto copyout; 54410Sstevel@tonic-gate } 54420Sstevel@tonic-gate error = EPROTO; 54430Sstevel@tonic-gate eprintsoline(so, error); 54440Sstevel@tonic-gate goto done; 54450Sstevel@tonic-gate } 54460Sstevel@tonic-gate option = &opt_res[1]; 54470Sstevel@tonic-gate 54480Sstevel@tonic-gate /* check to ensure that the option is within bounds */ 54490Sstevel@tonic-gate if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 54505240Snordmark (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 54510Sstevel@tonic-gate if (option != NULL) { 54520Sstevel@tonic-gate /* We have a fallback value */ 54530Sstevel@tonic-gate error = 0; 54540Sstevel@tonic-gate goto copyout; 54550Sstevel@tonic-gate } 54560Sstevel@tonic-gate error = EPROTO; 54570Sstevel@tonic-gate eprintsoline(so, error); 54580Sstevel@tonic-gate goto done; 54590Sstevel@tonic-gate } 54600Sstevel@tonic-gate 54610Sstevel@tonic-gate len = opt_res->len; 54620Sstevel@tonic-gate 54630Sstevel@tonic-gate copyout: { 54640Sstevel@tonic-gate t_uscalar_t size = MIN(len, maxlen); 54650Sstevel@tonic-gate bcopy(option, optval, size); 54660Sstevel@tonic-gate bcopy(&size, optlenp, sizeof (size)); 54670Sstevel@tonic-gate } 54680Sstevel@tonic-gate done: 54690Sstevel@tonic-gate freemsg(mp); 54700Sstevel@tonic-gate done2: 54710Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 54720Sstevel@tonic-gate mutex_exit(&so->so_lock); 54738348SEric.Yu@Sun.COM 54740Sstevel@tonic-gate return (error); 54750Sstevel@tonic-gate } 54760Sstevel@tonic-gate 54770Sstevel@tonic-gate /* 54780Sstevel@tonic-gate * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 54790Sstevel@tonic-gate * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 54800Sstevel@tonic-gate * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 54810Sstevel@tonic-gate * setsockopt has to work even if the transport does not support the option. 54820Sstevel@tonic-gate */ 54838348SEric.Yu@Sun.COM /* ARGSUSED */ 54840Sstevel@tonic-gate int 54850Sstevel@tonic-gate sotpi_setsockopt(struct sonode *so, int level, int option_name, 54868348SEric.Yu@Sun.COM const void *optval, t_uscalar_t optlen, struct cred *cr) 54870Sstevel@tonic-gate { 54880Sstevel@tonic-gate struct T_optmgmt_req optmgmt_req; 54890Sstevel@tonic-gate struct opthdr oh; 54900Sstevel@tonic-gate mblk_t *mp; 54910Sstevel@tonic-gate int error = 0; 54920Sstevel@tonic-gate boolean_t handled = B_FALSE; 54930Sstevel@tonic-gate 54940Sstevel@tonic-gate dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 54957240Srh87107 (void *)so, level, option_name, optval, optlen, 54965240Snordmark pr_state(so->so_state, so->so_mode))); 54970Sstevel@tonic-gate 54980Sstevel@tonic-gate /* X/Open requires this check */ 54990Sstevel@tonic-gate if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 55000Sstevel@tonic-gate if (xnet_check_print) 55010Sstevel@tonic-gate printf("sockfs: X/Open setsockopt check => EINVAL\n"); 55020Sstevel@tonic-gate return (EINVAL); 55030Sstevel@tonic-gate } 55040Sstevel@tonic-gate 55050Sstevel@tonic-gate mutex_enter(&so->so_lock); 55060Sstevel@tonic-gate so_lock_single(so); /* Set SOLOCKED */ 55070Sstevel@tonic-gate mutex_exit(&so->so_lock); 55080Sstevel@tonic-gate 55090Sstevel@tonic-gate optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 55100Sstevel@tonic-gate optmgmt_req.MGMT_flags = T_NEGOTIATE; 55110Sstevel@tonic-gate optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 55120Sstevel@tonic-gate optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 55130Sstevel@tonic-gate 55140Sstevel@tonic-gate oh.level = level; 55150Sstevel@tonic-gate oh.name = option_name; 55160Sstevel@tonic-gate oh.len = optlen; 55170Sstevel@tonic-gate 55180Sstevel@tonic-gate mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 55198778SErik.Nordmark@Sun.COM &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 55200Sstevel@tonic-gate /* Let option management work in the presence of data flow control */ 55210Sstevel@tonic-gate error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 55225240Snordmark MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 55230Sstevel@tonic-gate mp = NULL; 55240Sstevel@tonic-gate mutex_enter(&so->so_lock); 55250Sstevel@tonic-gate if (error) { 55260Sstevel@tonic-gate eprintsoline(so, error); 55278348SEric.Yu@Sun.COM goto done2; 55280Sstevel@tonic-gate } 55290Sstevel@tonic-gate error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 55300Sstevel@tonic-gate (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 55310Sstevel@tonic-gate if (error) { 55320Sstevel@tonic-gate eprintsoline(so, error); 55330Sstevel@tonic-gate goto done; 55340Sstevel@tonic-gate } 55350Sstevel@tonic-gate ASSERT(mp); 55360Sstevel@tonic-gate /* No need to verify T_optmgmt_ack */ 55370Sstevel@tonic-gate freemsg(mp); 55380Sstevel@tonic-gate done: 55390Sstevel@tonic-gate /* 55400Sstevel@tonic-gate * Check for SOL_SOCKET options and record their values. 55410Sstevel@tonic-gate * If we know about a SOL_SOCKET parameter and the transport 55420Sstevel@tonic-gate * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 55430Sstevel@tonic-gate * EPROTO) we let the setsockopt succeed. 55440Sstevel@tonic-gate */ 55450Sstevel@tonic-gate if (level == SOL_SOCKET) { 55460Sstevel@tonic-gate /* Check parameters */ 55470Sstevel@tonic-gate switch (option_name) { 55480Sstevel@tonic-gate case SO_DEBUG: 55490Sstevel@tonic-gate case SO_REUSEADDR: 55500Sstevel@tonic-gate case SO_KEEPALIVE: 55510Sstevel@tonic-gate case SO_DONTROUTE: 55520Sstevel@tonic-gate case SO_BROADCAST: 55530Sstevel@tonic-gate case SO_USELOOPBACK: 55540Sstevel@tonic-gate case SO_OOBINLINE: 55550Sstevel@tonic-gate case SO_SNDBUF: 55560Sstevel@tonic-gate case SO_RCVBUF: 55570Sstevel@tonic-gate #ifdef notyet 55580Sstevel@tonic-gate case SO_SNDLOWAT: 55590Sstevel@tonic-gate case SO_RCVLOWAT: 55600Sstevel@tonic-gate #endif /* notyet */ 55610Sstevel@tonic-gate case SO_DGRAM_ERRIND: 55620Sstevel@tonic-gate if (optlen != (t_uscalar_t)sizeof (int32_t)) { 55630Sstevel@tonic-gate error = EINVAL; 55640Sstevel@tonic-gate eprintsoline(so, error); 55650Sstevel@tonic-gate goto done2; 55660Sstevel@tonic-gate } 55670Sstevel@tonic-gate ASSERT(optval); 55680Sstevel@tonic-gate handled = B_TRUE; 55690Sstevel@tonic-gate break; 55708348SEric.Yu@Sun.COM case SO_SNDTIMEO: 55718348SEric.Yu@Sun.COM case SO_RCVTIMEO: 55728575Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 55738575Sshenjian get_udatamodel() == DATAMODEL_NATIVE) { 55748489Sshenjian if (optlen != sizeof (struct timeval)) { 55758489Sshenjian error = EINVAL; 55768489Sshenjian eprintsoline(so, error); 55778489Sshenjian goto done2; 55788489Sshenjian } 55798489Sshenjian } else { 55808489Sshenjian if (optlen != sizeof (struct timeval32)) { 55818489Sshenjian error = EINVAL; 55828489Sshenjian eprintsoline(so, error); 55838489Sshenjian goto done2; 55848489Sshenjian } 55858348SEric.Yu@Sun.COM } 55868348SEric.Yu@Sun.COM ASSERT(optval); 55878348SEric.Yu@Sun.COM handled = B_TRUE; 55888348SEric.Yu@Sun.COM break; 55890Sstevel@tonic-gate case SO_LINGER: 55900Sstevel@tonic-gate if (optlen != (t_uscalar_t)sizeof (struct linger)) { 55910Sstevel@tonic-gate error = EINVAL; 55920Sstevel@tonic-gate eprintsoline(so, error); 55930Sstevel@tonic-gate goto done2; 55940Sstevel@tonic-gate } 55950Sstevel@tonic-gate ASSERT(optval); 55960Sstevel@tonic-gate handled = B_TRUE; 55970Sstevel@tonic-gate break; 55980Sstevel@tonic-gate } 55990Sstevel@tonic-gate 56000Sstevel@tonic-gate #define intvalue (*(int32_t *)optval) 56010Sstevel@tonic-gate 56020Sstevel@tonic-gate switch (option_name) { 56030Sstevel@tonic-gate case SO_TYPE: 56040Sstevel@tonic-gate case SO_ERROR: 56050Sstevel@tonic-gate case SO_ACCEPTCONN: 56060Sstevel@tonic-gate /* Can't be set */ 56070Sstevel@tonic-gate error = ENOPROTOOPT; 56080Sstevel@tonic-gate goto done2; 56090Sstevel@tonic-gate case SO_LINGER: { 56100Sstevel@tonic-gate struct linger *l = (struct linger *)optval; 56110Sstevel@tonic-gate 56120Sstevel@tonic-gate so->so_linger.l_linger = l->l_linger; 56130Sstevel@tonic-gate if (l->l_onoff) { 56140Sstevel@tonic-gate so->so_linger.l_onoff = SO_LINGER; 56150Sstevel@tonic-gate so->so_options |= SO_LINGER; 56160Sstevel@tonic-gate } else { 56170Sstevel@tonic-gate so->so_linger.l_onoff = 0; 56180Sstevel@tonic-gate so->so_options &= ~SO_LINGER; 56190Sstevel@tonic-gate } 56200Sstevel@tonic-gate break; 56210Sstevel@tonic-gate } 56220Sstevel@tonic-gate 56230Sstevel@tonic-gate case SO_DEBUG: 56240Sstevel@tonic-gate #ifdef SOCK_TEST 56250Sstevel@tonic-gate if (intvalue & 2) 56260Sstevel@tonic-gate sock_test_timelimit = 10 * hz; 56270Sstevel@tonic-gate else 56280Sstevel@tonic-gate sock_test_timelimit = 0; 56290Sstevel@tonic-gate 56300Sstevel@tonic-gate if (intvalue & 4) 56310Sstevel@tonic-gate do_useracc = 0; 56320Sstevel@tonic-gate else 56330Sstevel@tonic-gate do_useracc = 1; 56340Sstevel@tonic-gate #endif /* SOCK_TEST */ 56350Sstevel@tonic-gate /* FALLTHRU */ 56360Sstevel@tonic-gate case SO_REUSEADDR: 56370Sstevel@tonic-gate case SO_KEEPALIVE: 56380Sstevel@tonic-gate case SO_DONTROUTE: 56390Sstevel@tonic-gate case SO_BROADCAST: 56400Sstevel@tonic-gate case SO_USELOOPBACK: 56410Sstevel@tonic-gate case SO_OOBINLINE: 56420Sstevel@tonic-gate case SO_DGRAM_ERRIND: 56430Sstevel@tonic-gate if (intvalue != 0) { 56440Sstevel@tonic-gate dprintso(so, 1, 56458348SEric.Yu@Sun.COM ("socket_setsockopt: setting 0x%x\n", 56465240Snordmark option_name)); 56470Sstevel@tonic-gate so->so_options |= option_name; 56480Sstevel@tonic-gate } else { 56490Sstevel@tonic-gate dprintso(so, 1, 56508348SEric.Yu@Sun.COM ("socket_setsockopt: clearing 0x%x\n", 56515240Snordmark option_name)); 56520Sstevel@tonic-gate so->so_options &= ~option_name; 56530Sstevel@tonic-gate } 56540Sstevel@tonic-gate break; 56550Sstevel@tonic-gate /* 56560Sstevel@tonic-gate * The following options are only returned by us when the 56578348SEric.Yu@Sun.COM * transport layer fails. 56580Sstevel@tonic-gate * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 56590Sstevel@tonic-gate * since the transport might adjust the value and not 56600Sstevel@tonic-gate * return exactly what was set by the application. 56610Sstevel@tonic-gate */ 56620Sstevel@tonic-gate case SO_SNDBUF: 56630Sstevel@tonic-gate so->so_sndbuf = intvalue; 56640Sstevel@tonic-gate break; 56650Sstevel@tonic-gate case SO_RCVBUF: 56660Sstevel@tonic-gate so->so_rcvbuf = intvalue; 56670Sstevel@tonic-gate break; 56688348SEric.Yu@Sun.COM case SO_RCVPSH: 56698348SEric.Yu@Sun.COM so->so_rcv_timer_interval = intvalue; 56708348SEric.Yu@Sun.COM break; 56710Sstevel@tonic-gate #ifdef notyet 56720Sstevel@tonic-gate /* 56730Sstevel@tonic-gate * We do not implement the semantics of these options 56740Sstevel@tonic-gate * thus we shouldn't implement the options either. 56750Sstevel@tonic-gate */ 56760Sstevel@tonic-gate case SO_SNDLOWAT: 56770Sstevel@tonic-gate so->so_sndlowat = intvalue; 56780Sstevel@tonic-gate break; 56790Sstevel@tonic-gate case SO_RCVLOWAT: 56800Sstevel@tonic-gate so->so_rcvlowat = intvalue; 56810Sstevel@tonic-gate break; 56828348SEric.Yu@Sun.COM #endif /* notyet */ 56830Sstevel@tonic-gate case SO_SNDTIMEO: 56848348SEric.Yu@Sun.COM case SO_RCVTIMEO: { 56858489Sshenjian struct timeval tl; 56868489Sshenjian clock_t val; 56878489Sshenjian 56888575Sshenjian if (get_udatamodel() == DATAMODEL_NONE || 56898575Sshenjian get_udatamodel() == DATAMODEL_NATIVE) 56908489Sshenjian bcopy(&tl, (struct timeval *)optval, 56918489Sshenjian sizeof (struct timeval)); 56928489Sshenjian else 56938489Sshenjian TIMEVAL32_TO_TIMEVAL(&tl, 56948489Sshenjian (struct timeval32 *)optval); 56958489Sshenjian val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 56968348SEric.Yu@Sun.COM if (option_name == SO_RCVTIMEO) 56978348SEric.Yu@Sun.COM so->so_rcvtimeo = drv_usectohz(val); 56988348SEric.Yu@Sun.COM else 56998348SEric.Yu@Sun.COM so->so_sndtimeo = drv_usectohz(val); 57000Sstevel@tonic-gate break; 57018348SEric.Yu@Sun.COM } 57020Sstevel@tonic-gate } 57030Sstevel@tonic-gate #undef intvalue 57040Sstevel@tonic-gate 57050Sstevel@tonic-gate if (error) { 57060Sstevel@tonic-gate if ((error == ENOPROTOOPT || error == EPROTO || 57070Sstevel@tonic-gate error == EINVAL) && handled) { 57080Sstevel@tonic-gate dprintso(so, 1, 57090Sstevel@tonic-gate ("setsockopt: ignoring error %d for 0x%x\n", 57100Sstevel@tonic-gate error, option_name)); 57110Sstevel@tonic-gate error = 0; 57120Sstevel@tonic-gate } 57130Sstevel@tonic-gate } 57140Sstevel@tonic-gate } 57150Sstevel@tonic-gate done2: 57160Sstevel@tonic-gate so_unlock_single(so, SOLOCKED); 57170Sstevel@tonic-gate mutex_exit(&so->so_lock); 57180Sstevel@tonic-gate return (error); 57190Sstevel@tonic-gate } 57208348SEric.Yu@Sun.COM 57218427SAnders.Persson@Sun.COM /* 57228427SAnders.Persson@Sun.COM * sotpi_close() is called when the last open reference goes away. 57238427SAnders.Persson@Sun.COM */ 57248348SEric.Yu@Sun.COM /* ARGSUSED */ 57258348SEric.Yu@Sun.COM int 57268348SEric.Yu@Sun.COM sotpi_close(struct sonode *so, int flag, struct cred *cr) 57278348SEric.Yu@Sun.COM { 57288348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 57298348SEric.Yu@Sun.COM dev_t dev; 57308348SEric.Yu@Sun.COM int error = 0; 57318348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 57328348SEric.Yu@Sun.COM 57338348SEric.Yu@Sun.COM dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 57348348SEric.Yu@Sun.COM (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 57358348SEric.Yu@Sun.COM 57368348SEric.Yu@Sun.COM dev = sti->sti_dev; 57378348SEric.Yu@Sun.COM 57388348SEric.Yu@Sun.COM ASSERT(STREAMSTAB(getmajor(dev))); 57398348SEric.Yu@Sun.COM 57408348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 57418348SEric.Yu@Sun.COM so_lock_single(so); /* Set SOLOCKED */ 57428348SEric.Yu@Sun.COM 57438399SRao.Shoaib@Sun.COM ASSERT(so_verify_oobstate(so)); 57448399SRao.Shoaib@Sun.COM 57458348SEric.Yu@Sun.COM if (sti->sti_nl7c_flags & NL7C_ENABLED) { 57468348SEric.Yu@Sun.COM sti->sti_nl7c_flags = 0; 57478348SEric.Yu@Sun.COM nl7c_close(so); 57488348SEric.Yu@Sun.COM } 57498348SEric.Yu@Sun.COM 57508348SEric.Yu@Sun.COM if (vp->v_stream != NULL) { 57518348SEric.Yu@Sun.COM vnode_t *ux_vp; 57528348SEric.Yu@Sun.COM 57538348SEric.Yu@Sun.COM if (so->so_family == AF_UNIX) { 57548348SEric.Yu@Sun.COM /* Could avoid this when CANTSENDMORE for !dgram */ 57558348SEric.Yu@Sun.COM so_unix_close(so); 57568348SEric.Yu@Sun.COM } 57578348SEric.Yu@Sun.COM 57588348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 57598348SEric.Yu@Sun.COM /* 57608348SEric.Yu@Sun.COM * Disassemble the linkage from the AF_UNIX underlying file 57618348SEric.Yu@Sun.COM * system vnode to this socket (by atomically clearing 57628348SEric.Yu@Sun.COM * v_stream in vn_rele_stream) before strclose clears sd_vnode 57638348SEric.Yu@Sun.COM * and frees the stream head. 57648348SEric.Yu@Sun.COM */ 57658348SEric.Yu@Sun.COM if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 57668348SEric.Yu@Sun.COM ASSERT(ux_vp->v_stream); 57678348SEric.Yu@Sun.COM sti->sti_ux_bound_vp = NULL; 57688348SEric.Yu@Sun.COM vn_rele_stream(ux_vp); 57698348SEric.Yu@Sun.COM } 57708348SEric.Yu@Sun.COM if (so->so_family == AF_INET || so->so_family == AF_INET6) { 57718348SEric.Yu@Sun.COM strsetrwputdatahooks(SOTOV(so), NULL, NULL); 57728348SEric.Yu@Sun.COM if (sti->sti_kssl_ent != NULL) { 57738348SEric.Yu@Sun.COM kssl_release_ent(sti->sti_kssl_ent, so, 57748348SEric.Yu@Sun.COM sti->sti_kssl_type); 57758348SEric.Yu@Sun.COM sti->sti_kssl_ent = NULL; 57768348SEric.Yu@Sun.COM } 57778348SEric.Yu@Sun.COM if (sti->sti_kssl_ctx != NULL) { 57788348SEric.Yu@Sun.COM kssl_release_ctx(sti->sti_kssl_ctx); 57798348SEric.Yu@Sun.COM sti->sti_kssl_ctx = NULL; 57808348SEric.Yu@Sun.COM } 57818348SEric.Yu@Sun.COM sti->sti_kssl_type = KSSL_NO_PROXY; 57828348SEric.Yu@Sun.COM } 57838348SEric.Yu@Sun.COM error = strclose(vp, flag, cr); 57848348SEric.Yu@Sun.COM vp->v_stream = NULL; 57858348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 57868348SEric.Yu@Sun.COM } 57878348SEric.Yu@Sun.COM 57888348SEric.Yu@Sun.COM /* 57898348SEric.Yu@Sun.COM * Flush the T_DISCON_IND on sti_discon_ind_mp. 57908348SEric.Yu@Sun.COM */ 57918348SEric.Yu@Sun.COM so_flush_discon_ind(so); 57928348SEric.Yu@Sun.COM 57938348SEric.Yu@Sun.COM so_unlock_single(so, SOLOCKED); 57948348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 57958348SEric.Yu@Sun.COM 57968348SEric.Yu@Sun.COM /* 57978348SEric.Yu@Sun.COM * Needed for STREAMs. 57988348SEric.Yu@Sun.COM * Decrement the device driver's reference count for streams 57998348SEric.Yu@Sun.COM * opened via the clone dip. The driver was held in clone_open(). 58008348SEric.Yu@Sun.COM * The absence of clone_close() forces this asymmetry. 58018348SEric.Yu@Sun.COM */ 58028348SEric.Yu@Sun.COM if (so->so_flag & SOCLONE) 58038348SEric.Yu@Sun.COM ddi_rele_driver(getmajor(dev)); 58048348SEric.Yu@Sun.COM 58058348SEric.Yu@Sun.COM return (error); 58068348SEric.Yu@Sun.COM } 58078348SEric.Yu@Sun.COM 58088348SEric.Yu@Sun.COM static int 58098348SEric.Yu@Sun.COM sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 58108348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 58118348SEric.Yu@Sun.COM { 58128348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 58138348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 58148348SEric.Yu@Sun.COM int error = 0; 58158348SEric.Yu@Sun.COM 58168348SEric.Yu@Sun.COM dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 58178348SEric.Yu@Sun.COM cmd, arg, pr_state(so->so_state, so->so_mode))); 58188348SEric.Yu@Sun.COM 58198348SEric.Yu@Sun.COM switch (cmd) { 58208477SRao.Shoaib@Sun.COM case SIOCSQPTR: 58218477SRao.Shoaib@Sun.COM /* 58228477SRao.Shoaib@Sun.COM * SIOCSQPTR is valid only when helper stream is created 58238477SRao.Shoaib@Sun.COM * by the protocol. 58248477SRao.Shoaib@Sun.COM */ 58258348SEric.Yu@Sun.COM case _I_INSERT: 58268348SEric.Yu@Sun.COM case _I_REMOVE: 58278348SEric.Yu@Sun.COM /* 58288348SEric.Yu@Sun.COM * Since there's no compelling reason to support these ioctls 58298348SEric.Yu@Sun.COM * on sockets, and doing so would increase the complexity 58308348SEric.Yu@Sun.COM * markedly, prevent it. 58318348SEric.Yu@Sun.COM */ 58328348SEric.Yu@Sun.COM return (EOPNOTSUPP); 58338348SEric.Yu@Sun.COM 58348348SEric.Yu@Sun.COM case I_FIND: 58358348SEric.Yu@Sun.COM case I_LIST: 58368348SEric.Yu@Sun.COM case I_LOOK: 58378348SEric.Yu@Sun.COM case I_POP: 58388348SEric.Yu@Sun.COM case I_PUSH: 58398348SEric.Yu@Sun.COM /* 58408348SEric.Yu@Sun.COM * To prevent races and inconsistencies between the actual 58418348SEric.Yu@Sun.COM * state of the stream and the state according to the sonode, 58428348SEric.Yu@Sun.COM * we serialize all operations which modify or operate on the 58438348SEric.Yu@Sun.COM * list of modules on the socket's stream. 58448348SEric.Yu@Sun.COM */ 58458348SEric.Yu@Sun.COM mutex_enter(&sti->sti_plumb_lock); 58468348SEric.Yu@Sun.COM error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 58478348SEric.Yu@Sun.COM mutex_exit(&sti->sti_plumb_lock); 58488348SEric.Yu@Sun.COM return (error); 58498348SEric.Yu@Sun.COM 58508348SEric.Yu@Sun.COM default: 58518348SEric.Yu@Sun.COM if (so->so_version != SOV_STREAM) 58528348SEric.Yu@Sun.COM break; 58538348SEric.Yu@Sun.COM 58548348SEric.Yu@Sun.COM /* 58558348SEric.Yu@Sun.COM * The imaginary "sockmod" has been popped; act as a stream. 58568348SEric.Yu@Sun.COM */ 58578348SEric.Yu@Sun.COM return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 58588348SEric.Yu@Sun.COM } 58598348SEric.Yu@Sun.COM 58608348SEric.Yu@Sun.COM ASSERT(so->so_version != SOV_STREAM); 58618348SEric.Yu@Sun.COM 58628348SEric.Yu@Sun.COM /* 58638348SEric.Yu@Sun.COM * Process socket-specific ioctls. 58648348SEric.Yu@Sun.COM */ 58658348SEric.Yu@Sun.COM switch (cmd) { 58668348SEric.Yu@Sun.COM case FIONBIO: { 58678348SEric.Yu@Sun.COM int32_t value; 58688348SEric.Yu@Sun.COM 58698348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &value, sizeof (int32_t), 58708348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 58718348SEric.Yu@Sun.COM return (EFAULT); 58728348SEric.Yu@Sun.COM 58738348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 58748348SEric.Yu@Sun.COM if (value) { 58758348SEric.Yu@Sun.COM so->so_state |= SS_NDELAY; 58768348SEric.Yu@Sun.COM } else { 58778348SEric.Yu@Sun.COM so->so_state &= ~SS_NDELAY; 58788348SEric.Yu@Sun.COM } 58798348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 58808348SEric.Yu@Sun.COM return (0); 58818348SEric.Yu@Sun.COM } 58828348SEric.Yu@Sun.COM 58838348SEric.Yu@Sun.COM case FIOASYNC: { 58848348SEric.Yu@Sun.COM int32_t value; 58858348SEric.Yu@Sun.COM 58868348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &value, sizeof (int32_t), 58878348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 58888348SEric.Yu@Sun.COM return (EFAULT); 58898348SEric.Yu@Sun.COM 58908348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 58918348SEric.Yu@Sun.COM /* 58928348SEric.Yu@Sun.COM * SS_ASYNC flag not already set correctly? 58938348SEric.Yu@Sun.COM * (!value != !(so->so_state & SS_ASYNC)) 58948348SEric.Yu@Sun.COM * but some engineers find that too hard to read. 58958348SEric.Yu@Sun.COM */ 58968348SEric.Yu@Sun.COM if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 58978348SEric.Yu@Sun.COM value != 0 && (so->so_state & SS_ASYNC) == 0) 58988348SEric.Yu@Sun.COM error = so_flip_async(so, vp, mode, cr); 58998348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 59008348SEric.Yu@Sun.COM return (error); 59018348SEric.Yu@Sun.COM } 59028348SEric.Yu@Sun.COM 59038348SEric.Yu@Sun.COM case SIOCSPGRP: 59048348SEric.Yu@Sun.COM case FIOSETOWN: { 59058348SEric.Yu@Sun.COM pid_t pgrp; 59068348SEric.Yu@Sun.COM 59078348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 59088348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 59098348SEric.Yu@Sun.COM return (EFAULT); 59108348SEric.Yu@Sun.COM 59118348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 59128348SEric.Yu@Sun.COM dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 59138348SEric.Yu@Sun.COM /* Any change? */ 59148348SEric.Yu@Sun.COM if (pgrp != so->so_pgrp) 59158348SEric.Yu@Sun.COM error = so_set_siggrp(so, vp, pgrp, mode, cr); 59168348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 59178348SEric.Yu@Sun.COM return (error); 59188348SEric.Yu@Sun.COM } 59198348SEric.Yu@Sun.COM case SIOCGPGRP: 59208348SEric.Yu@Sun.COM case FIOGETOWN: 59218348SEric.Yu@Sun.COM if (so_copyout(&so->so_pgrp, (void *)arg, 59228348SEric.Yu@Sun.COM sizeof (pid_t), (mode & (int)FKIOCTL))) 59238348SEric.Yu@Sun.COM return (EFAULT); 59248348SEric.Yu@Sun.COM return (0); 59258348SEric.Yu@Sun.COM 59268348SEric.Yu@Sun.COM case SIOCATMARK: { 59278348SEric.Yu@Sun.COM int retval; 59288348SEric.Yu@Sun.COM uint_t so_state; 59298348SEric.Yu@Sun.COM 59308348SEric.Yu@Sun.COM /* 59318348SEric.Yu@Sun.COM * strwaitmark has a finite timeout after which it 59328348SEric.Yu@Sun.COM * returns -1 if the mark state is undetermined. 59338348SEric.Yu@Sun.COM * In order to avoid any race between the mark state 59348348SEric.Yu@Sun.COM * in sockfs and the mark state in the stream head this 59358348SEric.Yu@Sun.COM * routine loops until the mark state can be determined 59368348SEric.Yu@Sun.COM * (or the urgent data indication has been removed by some 59378348SEric.Yu@Sun.COM * other thread). 59388348SEric.Yu@Sun.COM */ 59398348SEric.Yu@Sun.COM do { 59408348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 59418348SEric.Yu@Sun.COM so_state = so->so_state; 59428348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 59438348SEric.Yu@Sun.COM if (so_state & SS_RCVATMARK) { 59448348SEric.Yu@Sun.COM retval = 1; 59458348SEric.Yu@Sun.COM } else if (!(so_state & SS_OOBPEND)) { 59468348SEric.Yu@Sun.COM /* 59478348SEric.Yu@Sun.COM * No SIGURG has been generated -- there is no 59488348SEric.Yu@Sun.COM * pending or present urgent data. Thus can't 59498348SEric.Yu@Sun.COM * possibly be at the mark. 59508348SEric.Yu@Sun.COM */ 59518348SEric.Yu@Sun.COM retval = 0; 59528348SEric.Yu@Sun.COM } else { 59538348SEric.Yu@Sun.COM /* 59548348SEric.Yu@Sun.COM * Have the stream head wait until there is 59558348SEric.Yu@Sun.COM * either some messages on the read queue, or 59568348SEric.Yu@Sun.COM * STRATMARK or STRNOTATMARK gets set. The 59578348SEric.Yu@Sun.COM * STRNOTATMARK flag is used so that the 59588348SEric.Yu@Sun.COM * transport can send up a MSGNOTMARKNEXT 59598348SEric.Yu@Sun.COM * M_DATA to indicate that it is not 59608348SEric.Yu@Sun.COM * at the mark and additional data is not about 59618348SEric.Yu@Sun.COM * to be send upstream. 59628348SEric.Yu@Sun.COM * 59638348SEric.Yu@Sun.COM * If the mark state is undetermined this will 59648348SEric.Yu@Sun.COM * return -1 and we will loop rechecking the 59658348SEric.Yu@Sun.COM * socket state. 59668348SEric.Yu@Sun.COM */ 59678348SEric.Yu@Sun.COM retval = strwaitmark(vp); 59688348SEric.Yu@Sun.COM } 59698348SEric.Yu@Sun.COM } while (retval == -1); 59708348SEric.Yu@Sun.COM 59718348SEric.Yu@Sun.COM if (so_copyout(&retval, (void *)arg, sizeof (int), 59728348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 59738348SEric.Yu@Sun.COM return (EFAULT); 59748348SEric.Yu@Sun.COM return (0); 59758348SEric.Yu@Sun.COM } 59768348SEric.Yu@Sun.COM 59778348SEric.Yu@Sun.COM case I_FDINSERT: 59788348SEric.Yu@Sun.COM case I_SENDFD: 59798348SEric.Yu@Sun.COM case I_RECVFD: 59808348SEric.Yu@Sun.COM case I_ATMARK: 59818348SEric.Yu@Sun.COM case _SIOCSOCKFALLBACK: 59828348SEric.Yu@Sun.COM /* 59838348SEric.Yu@Sun.COM * These ioctls do not apply to sockets. I_FDINSERT can be 59848348SEric.Yu@Sun.COM * used to send M_PROTO messages without modifying the socket 59858348SEric.Yu@Sun.COM * state. I_SENDFD/RECVFD should not be used for socket file 59868348SEric.Yu@Sun.COM * descriptor passing since they assume a twisted stream. 59878348SEric.Yu@Sun.COM * SIOCATMARK must be used instead of I_ATMARK. 59888348SEric.Yu@Sun.COM * 59898348SEric.Yu@Sun.COM * _SIOCSOCKFALLBACK from an application should never be 59908348SEric.Yu@Sun.COM * processed. It is only generated by socktpi_open() or 59918348SEric.Yu@Sun.COM * in response to I_POP or I_PUSH. 59928348SEric.Yu@Sun.COM */ 59938348SEric.Yu@Sun.COM #ifdef DEBUG 59948348SEric.Yu@Sun.COM zcmn_err(getzoneid(), CE_WARN, 59958348SEric.Yu@Sun.COM "Unsupported STREAMS ioctl 0x%x on socket. " 59968348SEric.Yu@Sun.COM "Pid = %d\n", cmd, curproc->p_pid); 59978348SEric.Yu@Sun.COM #endif /* DEBUG */ 59988348SEric.Yu@Sun.COM return (EOPNOTSUPP); 59998348SEric.Yu@Sun.COM 60008348SEric.Yu@Sun.COM case _I_GETPEERCRED: 60018348SEric.Yu@Sun.COM if ((mode & FKIOCTL) == 0) 60028348SEric.Yu@Sun.COM return (EINVAL); 60038348SEric.Yu@Sun.COM 60048348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 60058348SEric.Yu@Sun.COM if ((so->so_mode & SM_CONNREQUIRED) == 0) { 60068348SEric.Yu@Sun.COM error = ENOTSUP; 60078348SEric.Yu@Sun.COM } else if ((so->so_state & SS_ISCONNECTED) == 0) { 60088348SEric.Yu@Sun.COM error = ENOTCONN; 60098348SEric.Yu@Sun.COM } else if (so->so_peercred != NULL) { 60108348SEric.Yu@Sun.COM k_peercred_t *kp = (k_peercred_t *)arg; 60118348SEric.Yu@Sun.COM kp->pc_cr = so->so_peercred; 60128348SEric.Yu@Sun.COM kp->pc_cpid = so->so_cpid; 60138348SEric.Yu@Sun.COM crhold(so->so_peercred); 60148348SEric.Yu@Sun.COM } else { 60158348SEric.Yu@Sun.COM error = EINVAL; 60168348SEric.Yu@Sun.COM } 60178348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 60188348SEric.Yu@Sun.COM return (error); 60198348SEric.Yu@Sun.COM 60208348SEric.Yu@Sun.COM default: 60218348SEric.Yu@Sun.COM /* 60228348SEric.Yu@Sun.COM * Do the higher-order bits of the ioctl cmd indicate 60238348SEric.Yu@Sun.COM * that it is an I_* streams ioctl? 60248348SEric.Yu@Sun.COM */ 60258348SEric.Yu@Sun.COM if ((cmd & 0xffffff00U) == STR && 60268348SEric.Yu@Sun.COM so->so_version == SOV_SOCKBSD) { 60278348SEric.Yu@Sun.COM #ifdef DEBUG 60288348SEric.Yu@Sun.COM zcmn_err(getzoneid(), CE_WARN, 60298348SEric.Yu@Sun.COM "Unsupported STREAMS ioctl 0x%x on socket. " 60308348SEric.Yu@Sun.COM "Pid = %d\n", cmd, curproc->p_pid); 60318348SEric.Yu@Sun.COM #endif /* DEBUG */ 60328348SEric.Yu@Sun.COM return (EOPNOTSUPP); 60338348SEric.Yu@Sun.COM } 60348348SEric.Yu@Sun.COM return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 60358348SEric.Yu@Sun.COM } 60368348SEric.Yu@Sun.COM } 60378348SEric.Yu@Sun.COM 60388348SEric.Yu@Sun.COM /* 60398348SEric.Yu@Sun.COM * Handle plumbing-related ioctls. 60408348SEric.Yu@Sun.COM */ 60418348SEric.Yu@Sun.COM static int 60428348SEric.Yu@Sun.COM socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 60438348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 60448348SEric.Yu@Sun.COM { 60458348SEric.Yu@Sun.COM static const char sockmod_name[] = "sockmod"; 60468348SEric.Yu@Sun.COM struct sonode *so = VTOSO(vp); 60478348SEric.Yu@Sun.COM char mname[FMNAMESZ + 1]; 60488348SEric.Yu@Sun.COM int error; 60498348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 60508348SEric.Yu@Sun.COM 60518348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 60528348SEric.Yu@Sun.COM 60538348SEric.Yu@Sun.COM if (so->so_version == SOV_SOCKBSD) 60548348SEric.Yu@Sun.COM return (EOPNOTSUPP); 60558348SEric.Yu@Sun.COM 60568348SEric.Yu@Sun.COM if (so->so_version == SOV_STREAM) { 60578348SEric.Yu@Sun.COM /* 60588348SEric.Yu@Sun.COM * The imaginary "sockmod" has been popped - act as a stream. 60598348SEric.Yu@Sun.COM * If this is a push of sockmod then change back to a socket. 60608348SEric.Yu@Sun.COM */ 60618348SEric.Yu@Sun.COM if (cmd == I_PUSH) { 60628348SEric.Yu@Sun.COM error = ((mode & FKIOCTL) ? copystr : copyinstr)( 60638348SEric.Yu@Sun.COM (void *)arg, mname, sizeof (mname), NULL); 60648348SEric.Yu@Sun.COM 60658348SEric.Yu@Sun.COM if (error == 0 && strcmp(mname, sockmod_name) == 0) { 60668348SEric.Yu@Sun.COM dprintso(so, 0, ("socktpi_ioctl: going to " 60678348SEric.Yu@Sun.COM "socket version\n")); 60688348SEric.Yu@Sun.COM so_stream2sock(so); 60698348SEric.Yu@Sun.COM return (0); 60708348SEric.Yu@Sun.COM } 60718348SEric.Yu@Sun.COM } 60728348SEric.Yu@Sun.COM return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 60738348SEric.Yu@Sun.COM } 60748348SEric.Yu@Sun.COM 60758348SEric.Yu@Sun.COM switch (cmd) { 60768348SEric.Yu@Sun.COM case I_PUSH: 60778348SEric.Yu@Sun.COM if (sti->sti_direct) { 60788348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 60798348SEric.Yu@Sun.COM so_lock_single(so); 60808348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 60818348SEric.Yu@Sun.COM 60828348SEric.Yu@Sun.COM error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 60838778SErik.Nordmark@Sun.COM cr, rvalp); 60848348SEric.Yu@Sun.COM 60858348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 60868348SEric.Yu@Sun.COM if (error == 0) 60878348SEric.Yu@Sun.COM sti->sti_direct = 0; 60888348SEric.Yu@Sun.COM so_unlock_single(so, SOLOCKED); 60898348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 60908348SEric.Yu@Sun.COM 60918348SEric.Yu@Sun.COM if (error != 0) 60928348SEric.Yu@Sun.COM return (error); 60938348SEric.Yu@Sun.COM } 60948348SEric.Yu@Sun.COM 60958348SEric.Yu@Sun.COM error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 60968348SEric.Yu@Sun.COM if (error == 0) 60978348SEric.Yu@Sun.COM sti->sti_pushcnt++; 60988348SEric.Yu@Sun.COM return (error); 60998348SEric.Yu@Sun.COM 61008348SEric.Yu@Sun.COM case I_POP: 61018348SEric.Yu@Sun.COM if (sti->sti_pushcnt == 0) { 61028348SEric.Yu@Sun.COM /* Emulate sockmod being popped */ 61038348SEric.Yu@Sun.COM dprintso(so, 0, 61048348SEric.Yu@Sun.COM ("socktpi_ioctl: going to STREAMS version\n")); 61058348SEric.Yu@Sun.COM return (so_sock2stream(so)); 61068348SEric.Yu@Sun.COM } 61078348SEric.Yu@Sun.COM 61088348SEric.Yu@Sun.COM error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 61098348SEric.Yu@Sun.COM if (error == 0) 61108348SEric.Yu@Sun.COM sti->sti_pushcnt--; 61118348SEric.Yu@Sun.COM return (error); 61128348SEric.Yu@Sun.COM 61138348SEric.Yu@Sun.COM case I_LIST: { 61148348SEric.Yu@Sun.COM struct str_mlist *kmlistp, *umlistp; 61158348SEric.Yu@Sun.COM struct str_list kstrlist; 61168348SEric.Yu@Sun.COM ssize_t kstrlistsize; 61178348SEric.Yu@Sun.COM int i, nmods; 61188348SEric.Yu@Sun.COM 61198348SEric.Yu@Sun.COM STRUCT_DECL(str_list, ustrlist); 61208348SEric.Yu@Sun.COM STRUCT_INIT(ustrlist, mode); 61218348SEric.Yu@Sun.COM 61228348SEric.Yu@Sun.COM if (arg == NULL) { 61238348SEric.Yu@Sun.COM error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 61248348SEric.Yu@Sun.COM if (error == 0) 61258348SEric.Yu@Sun.COM (*rvalp)++; /* Add one for sockmod */ 61268348SEric.Yu@Sun.COM return (error); 61278348SEric.Yu@Sun.COM } 61288348SEric.Yu@Sun.COM 61298348SEric.Yu@Sun.COM error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 61308348SEric.Yu@Sun.COM STRUCT_SIZE(ustrlist), mode & FKIOCTL); 61318348SEric.Yu@Sun.COM if (error != 0) 61328348SEric.Yu@Sun.COM return (error); 61338348SEric.Yu@Sun.COM 61348348SEric.Yu@Sun.COM nmods = STRUCT_FGET(ustrlist, sl_nmods); 61358348SEric.Yu@Sun.COM if (nmods <= 0) 61368348SEric.Yu@Sun.COM return (EINVAL); 61378348SEric.Yu@Sun.COM /* 61388348SEric.Yu@Sun.COM * Ceiling nmods at nstrpush to prevent someone from 61398348SEric.Yu@Sun.COM * maliciously consuming lots of kernel memory. 61408348SEric.Yu@Sun.COM */ 61418348SEric.Yu@Sun.COM nmods = MIN(nmods, nstrpush); 61428348SEric.Yu@Sun.COM 61438348SEric.Yu@Sun.COM kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 61448348SEric.Yu@Sun.COM kstrlist.sl_nmods = nmods; 61458348SEric.Yu@Sun.COM kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 61468348SEric.Yu@Sun.COM 61478348SEric.Yu@Sun.COM error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 61488348SEric.Yu@Sun.COM cr, rvalp); 61498348SEric.Yu@Sun.COM if (error != 0) 61508348SEric.Yu@Sun.COM goto done; 61518348SEric.Yu@Sun.COM 61528348SEric.Yu@Sun.COM /* 61538348SEric.Yu@Sun.COM * Considering the module list as a 0-based array of sl_nmods 61548348SEric.Yu@Sun.COM * modules, sockmod should conceptually exist at slot 61558348SEric.Yu@Sun.COM * sti_pushcnt. Insert sockmod at this location by sliding all 61568348SEric.Yu@Sun.COM * of the module names after so_pushcnt over by one. We know 61578348SEric.Yu@Sun.COM * that there will be room to do this since we allocated 61588348SEric.Yu@Sun.COM * sl_modlist with an additional slot. 61598348SEric.Yu@Sun.COM */ 61608348SEric.Yu@Sun.COM for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 61618348SEric.Yu@Sun.COM kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 61628348SEric.Yu@Sun.COM 61638348SEric.Yu@Sun.COM (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 61648348SEric.Yu@Sun.COM kstrlist.sl_nmods++; 61658348SEric.Yu@Sun.COM 61668348SEric.Yu@Sun.COM /* 61678348SEric.Yu@Sun.COM * Copy all of the entries out to ustrlist. 61688348SEric.Yu@Sun.COM */ 61698348SEric.Yu@Sun.COM kmlistp = kstrlist.sl_modlist; 61708348SEric.Yu@Sun.COM umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 61718348SEric.Yu@Sun.COM for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 61728348SEric.Yu@Sun.COM error = so_copyout(kmlistp++, umlistp++, 61738348SEric.Yu@Sun.COM sizeof (struct str_mlist), mode & FKIOCTL); 61748348SEric.Yu@Sun.COM if (error != 0) 61758348SEric.Yu@Sun.COM goto done; 61768348SEric.Yu@Sun.COM } 61778348SEric.Yu@Sun.COM 61788348SEric.Yu@Sun.COM error = so_copyout(&i, (void *)arg, sizeof (int32_t), 61798348SEric.Yu@Sun.COM mode & FKIOCTL); 61808348SEric.Yu@Sun.COM if (error == 0) 61818348SEric.Yu@Sun.COM *rvalp = 0; 61828348SEric.Yu@Sun.COM done: 61838348SEric.Yu@Sun.COM kmem_free(kstrlist.sl_modlist, kstrlistsize); 61848348SEric.Yu@Sun.COM return (error); 61858348SEric.Yu@Sun.COM } 61868348SEric.Yu@Sun.COM case I_LOOK: 61878348SEric.Yu@Sun.COM if (sti->sti_pushcnt == 0) { 61888348SEric.Yu@Sun.COM return (so_copyout(sockmod_name, (void *)arg, 61898348SEric.Yu@Sun.COM sizeof (sockmod_name), mode & FKIOCTL)); 61908348SEric.Yu@Sun.COM } 61918348SEric.Yu@Sun.COM return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 61928348SEric.Yu@Sun.COM 61938348SEric.Yu@Sun.COM case I_FIND: 61948348SEric.Yu@Sun.COM error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 61958348SEric.Yu@Sun.COM if (error && error != EINVAL) 61968348SEric.Yu@Sun.COM return (error); 61978348SEric.Yu@Sun.COM 61988348SEric.Yu@Sun.COM /* if not found and string was sockmod return 1 */ 61998348SEric.Yu@Sun.COM if (*rvalp == 0 || error == EINVAL) { 62008348SEric.Yu@Sun.COM error = ((mode & FKIOCTL) ? copystr : copyinstr)( 62018348SEric.Yu@Sun.COM (void *)arg, mname, sizeof (mname), NULL); 62028348SEric.Yu@Sun.COM if (error == ENAMETOOLONG) 62038348SEric.Yu@Sun.COM error = EINVAL; 62048348SEric.Yu@Sun.COM 62058348SEric.Yu@Sun.COM if (error == 0 && strcmp(mname, sockmod_name) == 0) 62068348SEric.Yu@Sun.COM *rvalp = 1; 62078348SEric.Yu@Sun.COM } 62088348SEric.Yu@Sun.COM return (error); 62098348SEric.Yu@Sun.COM 62108348SEric.Yu@Sun.COM default: 62118348SEric.Yu@Sun.COM panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 62128348SEric.Yu@Sun.COM break; 62138348SEric.Yu@Sun.COM } 62148348SEric.Yu@Sun.COM 62158348SEric.Yu@Sun.COM return (0); 62168348SEric.Yu@Sun.COM } 62178348SEric.Yu@Sun.COM 62188348SEric.Yu@Sun.COM /* 62198348SEric.Yu@Sun.COM * Wrapper around the streams poll routine that implements socket poll 62208348SEric.Yu@Sun.COM * semantics. 62218348SEric.Yu@Sun.COM * The sockfs never calls pollwakeup itself - the stream head take care 62228348SEric.Yu@Sun.COM * of all pollwakeups. Since sockfs never holds so_lock when calling the 62238348SEric.Yu@Sun.COM * stream head there can never be a deadlock due to holding so_lock across 62248348SEric.Yu@Sun.COM * pollwakeup and acquiring so_lock in this routine. 62258348SEric.Yu@Sun.COM * 62268348SEric.Yu@Sun.COM * However, since the performance of VOP_POLL is critical we avoid 62278348SEric.Yu@Sun.COM * acquiring so_lock here. This is based on two assumptions: 62288348SEric.Yu@Sun.COM * - The poll implementation holds locks to serialize the VOP_POLL call 62298348SEric.Yu@Sun.COM * and a pollwakeup for the same pollhead. This ensures that should 62308348SEric.Yu@Sun.COM * e.g. so_state change during a socktpi_poll call the pollwakeup 62318348SEric.Yu@Sun.COM * (which strsock_* and strrput conspire to issue) is issued after 62328348SEric.Yu@Sun.COM * the state change. Thus the pollwakeup will block until VOP_POLL has 62338348SEric.Yu@Sun.COM * returned and then wake up poll and have it call VOP_POLL again. 62348348SEric.Yu@Sun.COM * - The reading of so_state without holding so_lock does not result in 62358348SEric.Yu@Sun.COM * stale data that is older than the latest state change that has dropped 62368348SEric.Yu@Sun.COM * so_lock. This is ensured by the mutex_exit issuing the appropriate 62378348SEric.Yu@Sun.COM * memory barrier to force the data into the coherency domain. 62388348SEric.Yu@Sun.COM */ 62398348SEric.Yu@Sun.COM static int 62408348SEric.Yu@Sun.COM sotpi_poll( 62418348SEric.Yu@Sun.COM struct sonode *so, 62428348SEric.Yu@Sun.COM short events, 62438348SEric.Yu@Sun.COM int anyyet, 62448348SEric.Yu@Sun.COM short *reventsp, 62458348SEric.Yu@Sun.COM struct pollhead **phpp) 62468348SEric.Yu@Sun.COM { 62478348SEric.Yu@Sun.COM short origevents = events; 62488348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 62498348SEric.Yu@Sun.COM int error; 62508348SEric.Yu@Sun.COM int so_state = so->so_state; /* snapshot */ 62518348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 62528348SEric.Yu@Sun.COM 62538348SEric.Yu@Sun.COM dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 62548348SEric.Yu@Sun.COM (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 62558348SEric.Yu@Sun.COM 62568348SEric.Yu@Sun.COM ASSERT(vp->v_type == VSOCK); 62578348SEric.Yu@Sun.COM ASSERT(vp->v_stream != NULL); 62588348SEric.Yu@Sun.COM 62598348SEric.Yu@Sun.COM if (so->so_version == SOV_STREAM) { 62608348SEric.Yu@Sun.COM /* The imaginary "sockmod" has been popped - act as a stream */ 62618348SEric.Yu@Sun.COM return (strpoll(vp->v_stream, events, anyyet, 62628348SEric.Yu@Sun.COM reventsp, phpp)); 62638348SEric.Yu@Sun.COM } 62648348SEric.Yu@Sun.COM 62658348SEric.Yu@Sun.COM if (!(so_state & SS_ISCONNECTED) && 62668348SEric.Yu@Sun.COM (so->so_mode & SM_CONNREQUIRED)) { 62678348SEric.Yu@Sun.COM /* Not connected yet - turn off write side events */ 62688348SEric.Yu@Sun.COM events &= ~(POLLOUT|POLLWRBAND); 62698348SEric.Yu@Sun.COM } 62708348SEric.Yu@Sun.COM /* 62718348SEric.Yu@Sun.COM * Check for errors without calling strpoll if the caller wants them. 62728348SEric.Yu@Sun.COM * In sockets the errors are represented as input/output events 62738348SEric.Yu@Sun.COM * and there is no need to ask the stream head for this information. 62748348SEric.Yu@Sun.COM */ 62758348SEric.Yu@Sun.COM if (so->so_error != 0 && 62768348SEric.Yu@Sun.COM ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 62778348SEric.Yu@Sun.COM *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 62788348SEric.Yu@Sun.COM return (0); 62798348SEric.Yu@Sun.COM } 62808348SEric.Yu@Sun.COM /* 62818348SEric.Yu@Sun.COM * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 62828348SEric.Yu@Sun.COM * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 62838348SEric.Yu@Sun.COM * will not trigger a POLLIN event with POLLRDDATA set. 62848348SEric.Yu@Sun.COM * The handling of urgent data (causing POLLRDBAND) is done by 62858348SEric.Yu@Sun.COM * inspecting SS_OOBPEND below. 62868348SEric.Yu@Sun.COM */ 62878348SEric.Yu@Sun.COM events |= POLLRDDATA; 62888348SEric.Yu@Sun.COM 62898348SEric.Yu@Sun.COM /* 62908348SEric.Yu@Sun.COM * After shutdown(output) a stream head write error is set. 62918348SEric.Yu@Sun.COM * However, we should not return output events. 62928348SEric.Yu@Sun.COM */ 62938348SEric.Yu@Sun.COM events |= POLLNOERR; 62948348SEric.Yu@Sun.COM error = strpoll(vp->v_stream, events, anyyet, 62958348SEric.Yu@Sun.COM reventsp, phpp); 62968348SEric.Yu@Sun.COM if (error) 62978348SEric.Yu@Sun.COM return (error); 62988348SEric.Yu@Sun.COM 62998348SEric.Yu@Sun.COM ASSERT(!(*reventsp & POLLERR)); 63008348SEric.Yu@Sun.COM 63018348SEric.Yu@Sun.COM /* 63028348SEric.Yu@Sun.COM * Notes on T_CONN_IND handling for sockets. 63038348SEric.Yu@Sun.COM * 63048348SEric.Yu@Sun.COM * If strpoll() returned without events, SR_POLLIN is guaranteed 63058348SEric.Yu@Sun.COM * to be set, ensuring any subsequent strrput() runs pollwakeup(). 63068348SEric.Yu@Sun.COM * 63078348SEric.Yu@Sun.COM * Since the so_lock is not held, soqueueconnind() may have run 63088348SEric.Yu@Sun.COM * and a T_CONN_IND may be waiting. We now check for any queued 63098348SEric.Yu@Sun.COM * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 63108348SEric.Yu@Sun.COM * to ensure poll returns. 63118348SEric.Yu@Sun.COM * 63128348SEric.Yu@Sun.COM * However: 63138348SEric.Yu@Sun.COM * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 63148348SEric.Yu@Sun.COM * when strrput() does run for an arriving M_PROTO with T_CONN_IND 63158348SEric.Yu@Sun.COM * the following actions will occur; taken together they ensure the 63168348SEric.Yu@Sun.COM * syscall will return. 63178348SEric.Yu@Sun.COM * 63188348SEric.Yu@Sun.COM * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 63198348SEric.Yu@Sun.COM * the accept() was run on a non-blocking socket sowaitconnind() 63208348SEric.Yu@Sun.COM * may have already returned EWOULDBLOCK, so not be waiting to 63218348SEric.Yu@Sun.COM * process the message. Additionally socktpi_poll() has probably 63228348SEric.Yu@Sun.COM * proceeded past the sti_conn_ind_head check below. 63238348SEric.Yu@Sun.COM * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 63248348SEric.Yu@Sun.COM * this thread, however that could occur before poll_common() 63258348SEric.Yu@Sun.COM * has entered cv_wait. 63268348SEric.Yu@Sun.COM * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 63278348SEric.Yu@Sun.COM * 63288348SEric.Yu@Sun.COM * Before proceeding to cv_wait() in poll_common() for an event, 63298348SEric.Yu@Sun.COM * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 63308348SEric.Yu@Sun.COM * and if set, re-calls strpoll() to ensure the late arriving 63318348SEric.Yu@Sun.COM * T_CONN_IND is recognized, and pollsys() returns. 63328348SEric.Yu@Sun.COM */ 63338348SEric.Yu@Sun.COM 63348348SEric.Yu@Sun.COM if (sti->sti_conn_ind_head != NULL) 63358348SEric.Yu@Sun.COM *reventsp |= (POLLIN|POLLRDNORM) & events; 63368348SEric.Yu@Sun.COM 63378348SEric.Yu@Sun.COM if (so->so_state & SS_OOBPEND) 63388348SEric.Yu@Sun.COM *reventsp |= POLLRDBAND & events; 63398348SEric.Yu@Sun.COM 63408348SEric.Yu@Sun.COM if (sti->sti_nl7c_rcv_mp != NULL) { 63418348SEric.Yu@Sun.COM *reventsp |= (POLLIN|POLLRDNORM) & events; 63428348SEric.Yu@Sun.COM } 63438348SEric.Yu@Sun.COM if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 63448348SEric.Yu@Sun.COM ((POLLIN|POLLRDNORM) & *reventsp)) { 63458348SEric.Yu@Sun.COM sti->sti_nl7c_flags |= NL7C_POLLIN; 63468348SEric.Yu@Sun.COM } 63478348SEric.Yu@Sun.COM 63488348SEric.Yu@Sun.COM return (0); 63498348SEric.Yu@Sun.COM } 63508348SEric.Yu@Sun.COM 63518348SEric.Yu@Sun.COM /*ARGSUSED*/ 63528348SEric.Yu@Sun.COM static int 63538348SEric.Yu@Sun.COM socktpi_constructor(void *buf, void *cdrarg, int kmflags) 63548348SEric.Yu@Sun.COM { 63558348SEric.Yu@Sun.COM sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 63568348SEric.Yu@Sun.COM int error = 0; 63578348SEric.Yu@Sun.COM 63588348SEric.Yu@Sun.COM error = sonode_constructor(buf, cdrarg, kmflags); 63598348SEric.Yu@Sun.COM if (error != 0) 63608348SEric.Yu@Sun.COM return (error); 63618348SEric.Yu@Sun.COM 63628348SEric.Yu@Sun.COM error = i_sotpi_info_constructor(&st->st_info); 63638348SEric.Yu@Sun.COM if (error != 0) 63648348SEric.Yu@Sun.COM sonode_destructor(buf, cdrarg); 63658348SEric.Yu@Sun.COM 63668348SEric.Yu@Sun.COM st->st_sonode.so_priv = &st->st_info; 63678348SEric.Yu@Sun.COM 63688348SEric.Yu@Sun.COM return (error); 63698348SEric.Yu@Sun.COM } 63708348SEric.Yu@Sun.COM 63718348SEric.Yu@Sun.COM /*ARGSUSED1*/ 63728348SEric.Yu@Sun.COM static void 63738348SEric.Yu@Sun.COM socktpi_destructor(void *buf, void *cdrarg) 63748348SEric.Yu@Sun.COM { 63758348SEric.Yu@Sun.COM sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 63768348SEric.Yu@Sun.COM 63778348SEric.Yu@Sun.COM ASSERT(st->st_sonode.so_priv == &st->st_info); 63788348SEric.Yu@Sun.COM st->st_sonode.so_priv = NULL; 63798348SEric.Yu@Sun.COM 63808348SEric.Yu@Sun.COM i_sotpi_info_destructor(&st->st_info); 63818348SEric.Yu@Sun.COM sonode_destructor(buf, cdrarg); 63828348SEric.Yu@Sun.COM } 63838348SEric.Yu@Sun.COM 63848348SEric.Yu@Sun.COM static int 63858348SEric.Yu@Sun.COM socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 63868348SEric.Yu@Sun.COM { 63878348SEric.Yu@Sun.COM int retval; 63888348SEric.Yu@Sun.COM 63898348SEric.Yu@Sun.COM if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 63908348SEric.Yu@Sun.COM struct sonode *so = (struct sonode *)buf; 63918348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 63928348SEric.Yu@Sun.COM 63938348SEric.Yu@Sun.COM mutex_enter(&socklist.sl_lock); 63948348SEric.Yu@Sun.COM 63958348SEric.Yu@Sun.COM sti->sti_next_so = socklist.sl_list; 63968348SEric.Yu@Sun.COM sti->sti_prev_so = NULL; 63978348SEric.Yu@Sun.COM if (sti->sti_next_so != NULL) 63988348SEric.Yu@Sun.COM SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 63998348SEric.Yu@Sun.COM socklist.sl_list = so; 64008348SEric.Yu@Sun.COM 64018348SEric.Yu@Sun.COM mutex_exit(&socklist.sl_lock); 64028348SEric.Yu@Sun.COM 64038348SEric.Yu@Sun.COM } 64048348SEric.Yu@Sun.COM return (retval); 64058348SEric.Yu@Sun.COM } 64068348SEric.Yu@Sun.COM 64078348SEric.Yu@Sun.COM static void 64088348SEric.Yu@Sun.COM socktpi_unix_destructor(void *buf, void *cdrarg) 64098348SEric.Yu@Sun.COM { 64108348SEric.Yu@Sun.COM struct sonode *so = (struct sonode *)buf; 64118348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 64128348SEric.Yu@Sun.COM 64138348SEric.Yu@Sun.COM mutex_enter(&socklist.sl_lock); 64148348SEric.Yu@Sun.COM 64158348SEric.Yu@Sun.COM if (sti->sti_next_so != NULL) 64168348SEric.Yu@Sun.COM SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 64178348SEric.Yu@Sun.COM if (sti->sti_prev_so != NULL) 64188348SEric.Yu@Sun.COM SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 64198348SEric.Yu@Sun.COM else 64208348SEric.Yu@Sun.COM socklist.sl_list = sti->sti_next_so; 64218348SEric.Yu@Sun.COM 64228348SEric.Yu@Sun.COM mutex_exit(&socklist.sl_lock); 64238348SEric.Yu@Sun.COM 64248348SEric.Yu@Sun.COM socktpi_destructor(buf, cdrarg); 64258348SEric.Yu@Sun.COM } 64268348SEric.Yu@Sun.COM 64278348SEric.Yu@Sun.COM int 64288348SEric.Yu@Sun.COM socktpi_init(void) 64298348SEric.Yu@Sun.COM { 64308348SEric.Yu@Sun.COM /* 64318348SEric.Yu@Sun.COM * Create sonode caches. We create a special one for AF_UNIX so 64328348SEric.Yu@Sun.COM * that we can track them for netstat(1m). 64338348SEric.Yu@Sun.COM */ 64348348SEric.Yu@Sun.COM socktpi_cache = kmem_cache_create("socktpi_cache", 64358348SEric.Yu@Sun.COM sizeof (struct sotpi_sonode), 0, socktpi_constructor, 64368348SEric.Yu@Sun.COM socktpi_destructor, NULL, NULL, NULL, 0); 64378348SEric.Yu@Sun.COM 64388348SEric.Yu@Sun.COM socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 64398348SEric.Yu@Sun.COM sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 64408348SEric.Yu@Sun.COM socktpi_unix_destructor, NULL, NULL, NULL, 0); 64418348SEric.Yu@Sun.COM 64428348SEric.Yu@Sun.COM return (0); 64438348SEric.Yu@Sun.COM } 64448348SEric.Yu@Sun.COM 64458348SEric.Yu@Sun.COM /* 64468348SEric.Yu@Sun.COM * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 64478348SEric.Yu@Sun.COM * 64488348SEric.Yu@Sun.COM * Caller must still update state and mode using sotpi_update_state(). 64498348SEric.Yu@Sun.COM */ 64508963SAnders.Persson@Sun.COM int 64518348SEric.Yu@Sun.COM sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 64528963SAnders.Persson@Sun.COM boolean_t *direct, queue_t **qp, struct cred *cr) 64538348SEric.Yu@Sun.COM { 64548348SEric.Yu@Sun.COM sotpi_info_t *sti; 64558348SEric.Yu@Sun.COM struct sockparams *origsp = so->so_sockparams; 64568348SEric.Yu@Sun.COM sock_lower_handle_t handle = so->so_proto_handle; 64578348SEric.Yu@Sun.COM struct stdata *stp; 64588348SEric.Yu@Sun.COM struct vnode *vp; 64598348SEric.Yu@Sun.COM queue_t *q; 64608963SAnders.Persson@Sun.COM int error = 0; 64618963SAnders.Persson@Sun.COM 64628963SAnders.Persson@Sun.COM ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 64638963SAnders.Persson@Sun.COM SS_FALLBACK_PENDING); 64648963SAnders.Persson@Sun.COM ASSERT(SOCK_IS_NONSTR(so)); 64658963SAnders.Persson@Sun.COM 64668963SAnders.Persson@Sun.COM *qp = NULL; 64678348SEric.Yu@Sun.COM *direct = B_FALSE; 64688348SEric.Yu@Sun.COM so->so_sockparams = newsp; 64698348SEric.Yu@Sun.COM /* 64708348SEric.Yu@Sun.COM * Allocate and initalize fields required by TPI. 64718348SEric.Yu@Sun.COM */ 64728348SEric.Yu@Sun.COM (void) sotpi_info_create(so, KM_SLEEP); 64738348SEric.Yu@Sun.COM sotpi_info_init(so); 64748348SEric.Yu@Sun.COM 64758963SAnders.Persson@Sun.COM if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 64768348SEric.Yu@Sun.COM sotpi_info_fini(so); 64778348SEric.Yu@Sun.COM sotpi_info_destroy(so); 64788963SAnders.Persson@Sun.COM return (error); 64798348SEric.Yu@Sun.COM } 64808348SEric.Yu@Sun.COM ASSERT(handle == so->so_proto_handle); 64818348SEric.Yu@Sun.COM sti = SOTOTPI(so); 64828348SEric.Yu@Sun.COM if (sti->sti_direct != 0) 64838348SEric.Yu@Sun.COM *direct = B_TRUE; 64848348SEric.Yu@Sun.COM 64858348SEric.Yu@Sun.COM /* 64868348SEric.Yu@Sun.COM * Keep the original sp around so we can properly dispose of the 64878348SEric.Yu@Sun.COM * sonode when the socket is being closed. 64888348SEric.Yu@Sun.COM */ 64898348SEric.Yu@Sun.COM sti->sti_orig_sp = origsp; 64908348SEric.Yu@Sun.COM 64918348SEric.Yu@Sun.COM so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 64928348SEric.Yu@Sun.COM so_alloc_addr(so, so->so_max_addr_len); 64938348SEric.Yu@Sun.COM 64948348SEric.Yu@Sun.COM /* 64958348SEric.Yu@Sun.COM * If the application has done a SIOCSPGRP, make sure the 64968348SEric.Yu@Sun.COM * STREAM head is aware. This needs to take place before 64978348SEric.Yu@Sun.COM * the protocol start sending up messages. Otherwise we 64988348SEric.Yu@Sun.COM * might miss to generate SIGPOLL. 64998348SEric.Yu@Sun.COM * 65008348SEric.Yu@Sun.COM * It is possible that the application will receive duplicate 65018348SEric.Yu@Sun.COM * signals if some were already generated for either data or 65028348SEric.Yu@Sun.COM * connection indications. 65038348SEric.Yu@Sun.COM */ 65048348SEric.Yu@Sun.COM if (so->so_pgrp != 0) { 65058348SEric.Yu@Sun.COM if (so_set_events(so, so->so_vnode, cr) != 0) 65068348SEric.Yu@Sun.COM so->so_pgrp = 0; 65078348SEric.Yu@Sun.COM } 65088348SEric.Yu@Sun.COM 65098348SEric.Yu@Sun.COM /* 65108348SEric.Yu@Sun.COM * Determine which queue to use. 65118348SEric.Yu@Sun.COM */ 65128348SEric.Yu@Sun.COM vp = SOTOV(so); 65138348SEric.Yu@Sun.COM stp = vp->v_stream; 65148348SEric.Yu@Sun.COM ASSERT(stp != NULL); 65158348SEric.Yu@Sun.COM q = stp->sd_wrq->q_next; 65168348SEric.Yu@Sun.COM 65178348SEric.Yu@Sun.COM /* 65188348SEric.Yu@Sun.COM * Skip any modules that may have been auto pushed when the device 65198348SEric.Yu@Sun.COM * was opened 65208348SEric.Yu@Sun.COM */ 65218348SEric.Yu@Sun.COM while (q->q_next != NULL) 65228348SEric.Yu@Sun.COM q = q->q_next; 65238963SAnders.Persson@Sun.COM *qp = _RD(q); 65248963SAnders.Persson@Sun.COM 65258963SAnders.Persson@Sun.COM /* This is now a STREAMS sockets */ 65268963SAnders.Persson@Sun.COM so->so_not_str = B_FALSE; 65278963SAnders.Persson@Sun.COM 65288963SAnders.Persson@Sun.COM return (error); 65298963SAnders.Persson@Sun.COM } 65308963SAnders.Persson@Sun.COM 65318963SAnders.Persson@Sun.COM /* 65328963SAnders.Persson@Sun.COM * Revert a TPI sonode. It is only allowed to revert the sonode during 65338963SAnders.Persson@Sun.COM * the fallback process. 65348963SAnders.Persson@Sun.COM */ 65358963SAnders.Persson@Sun.COM void 65368963SAnders.Persson@Sun.COM sotpi_revert_sonode(struct sonode *so, struct cred *cr) 65378963SAnders.Persson@Sun.COM { 65388963SAnders.Persson@Sun.COM vnode_t *vp = SOTOV(so); 65398963SAnders.Persson@Sun.COM 65408963SAnders.Persson@Sun.COM ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 65418963SAnders.Persson@Sun.COM SS_FALLBACK_PENDING); 65428963SAnders.Persson@Sun.COM ASSERT(!SOCK_IS_NONSTR(so)); 65438963SAnders.Persson@Sun.COM ASSERT(vp->v_stream != NULL); 65448963SAnders.Persson@Sun.COM 65458963SAnders.Persson@Sun.COM strclean(vp); 65468963SAnders.Persson@Sun.COM (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 65478963SAnders.Persson@Sun.COM 65488963SAnders.Persson@Sun.COM /* 65498963SAnders.Persson@Sun.COM * Restore the original sockparams. The caller is responsible for 65508963SAnders.Persson@Sun.COM * dropping the ref to the new sp. 65518963SAnders.Persson@Sun.COM */ 65528963SAnders.Persson@Sun.COM so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 65538963SAnders.Persson@Sun.COM 65548963SAnders.Persson@Sun.COM sotpi_info_fini(so); 65558963SAnders.Persson@Sun.COM sotpi_info_destroy(so); 65568963SAnders.Persson@Sun.COM 65578963SAnders.Persson@Sun.COM /* This is no longer a STREAMS sockets */ 65588963SAnders.Persson@Sun.COM so->so_not_str = B_TRUE; 65598348SEric.Yu@Sun.COM } 65608348SEric.Yu@Sun.COM 65618348SEric.Yu@Sun.COM void 65628348SEric.Yu@Sun.COM sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 65638348SEric.Yu@Sun.COM struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 65648348SEric.Yu@Sun.COM socklen_t faddrlen, short opts) 65658348SEric.Yu@Sun.COM { 65668348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 65678348SEric.Yu@Sun.COM 65688348SEric.Yu@Sun.COM so_proc_tcapability_ack(so, tcap); 65698348SEric.Yu@Sun.COM 65708348SEric.Yu@Sun.COM so->so_options |= opts; 65718348SEric.Yu@Sun.COM 65728348SEric.Yu@Sun.COM /* 65738348SEric.Yu@Sun.COM * Determine whether the foreign and local address are valid 65748348SEric.Yu@Sun.COM */ 65758348SEric.Yu@Sun.COM if (laddrlen != 0) { 65768348SEric.Yu@Sun.COM ASSERT(laddrlen <= sti->sti_laddr_maxlen); 65778348SEric.Yu@Sun.COM sti->sti_laddr_len = laddrlen; 65788348SEric.Yu@Sun.COM bcopy(laddr, sti->sti_laddr_sa, laddrlen); 65798348SEric.Yu@Sun.COM sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 65808348SEric.Yu@Sun.COM } 65818348SEric.Yu@Sun.COM 65828348SEric.Yu@Sun.COM if (faddrlen != 0) { 65838348SEric.Yu@Sun.COM ASSERT(faddrlen <= sti->sti_faddr_maxlen); 65848348SEric.Yu@Sun.COM sti->sti_faddr_len = faddrlen; 65858348SEric.Yu@Sun.COM bcopy(faddr, sti->sti_faddr_sa, faddrlen); 65868348SEric.Yu@Sun.COM sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 65878348SEric.Yu@Sun.COM } 65888348SEric.Yu@Sun.COM 65898348SEric.Yu@Sun.COM } 65908348SEric.Yu@Sun.COM 65918348SEric.Yu@Sun.COM /* 65928348SEric.Yu@Sun.COM * Allocate enough space to cache the local and foreign addresses. 65938348SEric.Yu@Sun.COM */ 65948348SEric.Yu@Sun.COM void 65958348SEric.Yu@Sun.COM so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 65968348SEric.Yu@Sun.COM { 65978348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 65988348SEric.Yu@Sun.COM 65998348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 66008348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 66018348SEric.Yu@Sun.COM sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 66028348SEric.Yu@Sun.COM P2ROUNDUP(maxlen, KMEM_ALIGN); 66038348SEric.Yu@Sun.COM so->so_max_addr_len = sti->sti_laddr_maxlen; 66048348SEric.Yu@Sun.COM sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 66058348SEric.Yu@Sun.COM sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 66068348SEric.Yu@Sun.COM + sti->sti_laddr_maxlen); 66078348SEric.Yu@Sun.COM 66088348SEric.Yu@Sun.COM if (so->so_family == AF_UNIX) { 66098348SEric.Yu@Sun.COM /* 66108348SEric.Yu@Sun.COM * Initialize AF_UNIX related fields. 66118348SEric.Yu@Sun.COM */ 66128348SEric.Yu@Sun.COM bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 66138348SEric.Yu@Sun.COM bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 66148348SEric.Yu@Sun.COM } 66158348SEric.Yu@Sun.COM } 66168348SEric.Yu@Sun.COM 66178348SEric.Yu@Sun.COM 66188348SEric.Yu@Sun.COM sotpi_info_t * 66198348SEric.Yu@Sun.COM sotpi_sototpi(struct sonode *so) 66208348SEric.Yu@Sun.COM { 66218348SEric.Yu@Sun.COM sotpi_info_t *sti; 66228348SEric.Yu@Sun.COM 66238963SAnders.Persson@Sun.COM ASSERT(so != NULL); 66248348SEric.Yu@Sun.COM 66258348SEric.Yu@Sun.COM sti = (sotpi_info_t *)so->so_priv; 66268348SEric.Yu@Sun.COM 66278348SEric.Yu@Sun.COM ASSERT(sti != NULL); 66288348SEric.Yu@Sun.COM ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 66298348SEric.Yu@Sun.COM 66308348SEric.Yu@Sun.COM return (sti); 66318348SEric.Yu@Sun.COM } 66328348SEric.Yu@Sun.COM 66338348SEric.Yu@Sun.COM static int 66348348SEric.Yu@Sun.COM i_sotpi_info_constructor(sotpi_info_t *sti) 66358348SEric.Yu@Sun.COM { 66368348SEric.Yu@Sun.COM sti->sti_magic = SOTPI_INFO_MAGIC; 66378348SEric.Yu@Sun.COM sti->sti_ack_mp = NULL; 66388348SEric.Yu@Sun.COM sti->sti_discon_ind_mp = NULL; 66398348SEric.Yu@Sun.COM sti->sti_ux_bound_vp = NULL; 66408348SEric.Yu@Sun.COM sti->sti_unbind_mp = NULL; 66418348SEric.Yu@Sun.COM 66428348SEric.Yu@Sun.COM sti->sti_conn_ind_head = NULL; 66438348SEric.Yu@Sun.COM sti->sti_conn_ind_tail = NULL; 66448348SEric.Yu@Sun.COM 66458348SEric.Yu@Sun.COM sti->sti_laddr_sa = NULL; 66468348SEric.Yu@Sun.COM sti->sti_faddr_sa = NULL; 66478348SEric.Yu@Sun.COM 66488348SEric.Yu@Sun.COM sti->sti_nl7c_flags = 0; 66498348SEric.Yu@Sun.COM sti->sti_nl7c_uri = NULL; 66508348SEric.Yu@Sun.COM sti->sti_nl7c_rcv_mp = NULL; 66518348SEric.Yu@Sun.COM 66528348SEric.Yu@Sun.COM mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 66538348SEric.Yu@Sun.COM cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 66548348SEric.Yu@Sun.COM 66558348SEric.Yu@Sun.COM return (0); 66568348SEric.Yu@Sun.COM } 66578348SEric.Yu@Sun.COM 66588348SEric.Yu@Sun.COM static void 66598348SEric.Yu@Sun.COM i_sotpi_info_destructor(sotpi_info_t *sti) 66608348SEric.Yu@Sun.COM { 66618348SEric.Yu@Sun.COM ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 66628348SEric.Yu@Sun.COM ASSERT(sti->sti_ack_mp == NULL); 66638348SEric.Yu@Sun.COM ASSERT(sti->sti_discon_ind_mp == NULL); 66648348SEric.Yu@Sun.COM ASSERT(sti->sti_ux_bound_vp == NULL); 66658348SEric.Yu@Sun.COM ASSERT(sti->sti_unbind_mp == NULL); 66668348SEric.Yu@Sun.COM 66678348SEric.Yu@Sun.COM ASSERT(sti->sti_conn_ind_head == NULL); 66688348SEric.Yu@Sun.COM ASSERT(sti->sti_conn_ind_tail == NULL); 66698348SEric.Yu@Sun.COM 66708348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_sa == NULL); 66718348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_sa == NULL); 66728348SEric.Yu@Sun.COM 66738348SEric.Yu@Sun.COM ASSERT(sti->sti_nl7c_flags == 0); 66748348SEric.Yu@Sun.COM ASSERT(sti->sti_nl7c_uri == NULL); 66758348SEric.Yu@Sun.COM ASSERT(sti->sti_nl7c_rcv_mp == NULL); 66768348SEric.Yu@Sun.COM 66778348SEric.Yu@Sun.COM mutex_destroy(&sti->sti_plumb_lock); 66788348SEric.Yu@Sun.COM cv_destroy(&sti->sti_ack_cv); 66798348SEric.Yu@Sun.COM } 66808348SEric.Yu@Sun.COM 66818348SEric.Yu@Sun.COM /* 66828348SEric.Yu@Sun.COM * Creates and attaches TPI information to the given sonode 66838348SEric.Yu@Sun.COM */ 66848348SEric.Yu@Sun.COM static boolean_t 66858348SEric.Yu@Sun.COM sotpi_info_create(struct sonode *so, int kmflags) 66868348SEric.Yu@Sun.COM { 66878348SEric.Yu@Sun.COM sotpi_info_t *sti; 66888348SEric.Yu@Sun.COM 66898348SEric.Yu@Sun.COM ASSERT(so->so_priv == NULL); 66908348SEric.Yu@Sun.COM 66918348SEric.Yu@Sun.COM if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 66928348SEric.Yu@Sun.COM return (B_FALSE); 66938348SEric.Yu@Sun.COM 66948348SEric.Yu@Sun.COM if (i_sotpi_info_constructor(sti) != 0) { 66958348SEric.Yu@Sun.COM kmem_free(sti, sizeof (*sti)); 66968348SEric.Yu@Sun.COM return (B_FALSE); 66978348SEric.Yu@Sun.COM } 66988348SEric.Yu@Sun.COM 66998348SEric.Yu@Sun.COM so->so_priv = (void *)sti; 67008348SEric.Yu@Sun.COM return (B_TRUE); 67018348SEric.Yu@Sun.COM } 67028348SEric.Yu@Sun.COM 67038348SEric.Yu@Sun.COM /* 67048348SEric.Yu@Sun.COM * Initializes the TPI information. 67058348SEric.Yu@Sun.COM */ 67068348SEric.Yu@Sun.COM static void 67078348SEric.Yu@Sun.COM sotpi_info_init(struct sonode *so) 67088348SEric.Yu@Sun.COM { 67098348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 67108348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 67118348SEric.Yu@Sun.COM time_t now; 67128348SEric.Yu@Sun.COM 67138348SEric.Yu@Sun.COM sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 67148348SEric.Yu@Sun.COM vp->v_rdev = sti->sti_dev; 67158348SEric.Yu@Sun.COM 67168348SEric.Yu@Sun.COM sti->sti_orig_sp = NULL; 67178348SEric.Yu@Sun.COM 67188348SEric.Yu@Sun.COM sti->sti_pushcnt = 0; 67198348SEric.Yu@Sun.COM 67208348SEric.Yu@Sun.COM now = gethrestime_sec(); 67218348SEric.Yu@Sun.COM sti->sti_atime = now; 67228348SEric.Yu@Sun.COM sti->sti_mtime = now; 67238348SEric.Yu@Sun.COM sti->sti_ctime = now; 67248348SEric.Yu@Sun.COM 67258348SEric.Yu@Sun.COM sti->sti_eaddr_mp = NULL; 67268348SEric.Yu@Sun.COM sti->sti_delayed_error = 0; 67278348SEric.Yu@Sun.COM 67288348SEric.Yu@Sun.COM sti->sti_provinfo = NULL; 67298348SEric.Yu@Sun.COM 67308348SEric.Yu@Sun.COM sti->sti_oobcnt = 0; 67318348SEric.Yu@Sun.COM sti->sti_oobsigcnt = 0; 67328348SEric.Yu@Sun.COM 67338348SEric.Yu@Sun.COM ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 67348348SEric.Yu@Sun.COM 67358348SEric.Yu@Sun.COM sti->sti_laddr_sa = 0; 67368348SEric.Yu@Sun.COM sti->sti_faddr_sa = 0; 67378348SEric.Yu@Sun.COM sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 67388348SEric.Yu@Sun.COM sti->sti_laddr_len = sti->sti_faddr_len = 0; 67398348SEric.Yu@Sun.COM 67408348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 67418348SEric.Yu@Sun.COM sti->sti_faddr_valid = 0; 67428348SEric.Yu@Sun.COM sti->sti_faddr_noxlate = 0; 67438348SEric.Yu@Sun.COM 67448348SEric.Yu@Sun.COM sti->sti_direct = 0; 67458348SEric.Yu@Sun.COM 67468348SEric.Yu@Sun.COM ASSERT(sti->sti_ack_mp == NULL); 67478348SEric.Yu@Sun.COM ASSERT(sti->sti_ux_bound_vp == NULL); 67488348SEric.Yu@Sun.COM ASSERT(sti->sti_unbind_mp == NULL); 67498348SEric.Yu@Sun.COM 67508348SEric.Yu@Sun.COM ASSERT(sti->sti_conn_ind_head == NULL); 67518348SEric.Yu@Sun.COM ASSERT(sti->sti_conn_ind_tail == NULL); 67528348SEric.Yu@Sun.COM 67538348SEric.Yu@Sun.COM /* Initialize the kernel SSL proxy fields */ 67548348SEric.Yu@Sun.COM sti->sti_kssl_type = KSSL_NO_PROXY; 67558348SEric.Yu@Sun.COM sti->sti_kssl_ent = NULL; 67568348SEric.Yu@Sun.COM sti->sti_kssl_ctx = NULL; 67578348SEric.Yu@Sun.COM } 67588348SEric.Yu@Sun.COM 67598348SEric.Yu@Sun.COM /* 67608348SEric.Yu@Sun.COM * Given a sonode, grab the TPI info and free any data. 67618348SEric.Yu@Sun.COM */ 67628348SEric.Yu@Sun.COM static void 67638348SEric.Yu@Sun.COM sotpi_info_fini(struct sonode *so) 67648348SEric.Yu@Sun.COM { 67658348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 67668348SEric.Yu@Sun.COM mblk_t *mp; 67678348SEric.Yu@Sun.COM 67688348SEric.Yu@Sun.COM ASSERT(sti->sti_discon_ind_mp == NULL); 67698348SEric.Yu@Sun.COM 67708348SEric.Yu@Sun.COM if ((mp = sti->sti_conn_ind_head) != NULL) { 67718348SEric.Yu@Sun.COM mblk_t *mp1; 67728348SEric.Yu@Sun.COM 67738348SEric.Yu@Sun.COM while (mp) { 67748348SEric.Yu@Sun.COM mp1 = mp->b_next; 67758348SEric.Yu@Sun.COM mp->b_next = NULL; 67768348SEric.Yu@Sun.COM freemsg(mp); 67778348SEric.Yu@Sun.COM mp = mp1; 67788348SEric.Yu@Sun.COM } 67798348SEric.Yu@Sun.COM sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 67808348SEric.Yu@Sun.COM } 67818348SEric.Yu@Sun.COM 67828348SEric.Yu@Sun.COM /* 67838348SEric.Yu@Sun.COM * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 67848348SEric.Yu@Sun.COM * indirect them. It also uses so_count as a validity test. 67858348SEric.Yu@Sun.COM */ 67868348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 67878348SEric.Yu@Sun.COM 67888348SEric.Yu@Sun.COM if (sti->sti_laddr_sa) { 67898348SEric.Yu@Sun.COM ASSERT((caddr_t)sti->sti_faddr_sa == 67908348SEric.Yu@Sun.COM (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 67918348SEric.Yu@Sun.COM ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 67928348SEric.Yu@Sun.COM sti->sti_laddr_valid = 0; 67938348SEric.Yu@Sun.COM sti->sti_faddr_valid = 0; 67948348SEric.Yu@Sun.COM kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 67958348SEric.Yu@Sun.COM sti->sti_laddr_sa = NULL; 67968348SEric.Yu@Sun.COM sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 67978348SEric.Yu@Sun.COM sti->sti_faddr_sa = NULL; 67988348SEric.Yu@Sun.COM sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 67998348SEric.Yu@Sun.COM } 68008348SEric.Yu@Sun.COM 68018348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 68028348SEric.Yu@Sun.COM 68038348SEric.Yu@Sun.COM if ((mp = sti->sti_eaddr_mp) != NULL) { 68048348SEric.Yu@Sun.COM freemsg(mp); 68058348SEric.Yu@Sun.COM sti->sti_eaddr_mp = NULL; 68068348SEric.Yu@Sun.COM sti->sti_delayed_error = 0; 68078348SEric.Yu@Sun.COM } 68088348SEric.Yu@Sun.COM 68098348SEric.Yu@Sun.COM if ((mp = sti->sti_ack_mp) != NULL) { 68108348SEric.Yu@Sun.COM freemsg(mp); 68118348SEric.Yu@Sun.COM sti->sti_ack_mp = NULL; 68128348SEric.Yu@Sun.COM } 68138348SEric.Yu@Sun.COM 68148348SEric.Yu@Sun.COM if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 68158348SEric.Yu@Sun.COM sti->sti_nl7c_rcv_mp = NULL; 68168348SEric.Yu@Sun.COM freemsg(mp); 68178348SEric.Yu@Sun.COM } 68188348SEric.Yu@Sun.COM sti->sti_nl7c_rcv_rval = 0; 68198348SEric.Yu@Sun.COM if (sti->sti_nl7c_uri != NULL) { 68208348SEric.Yu@Sun.COM nl7c_urifree(so); 68218348SEric.Yu@Sun.COM /* urifree() cleared nl7c_uri */ 68228348SEric.Yu@Sun.COM } 68238348SEric.Yu@Sun.COM if (sti->sti_nl7c_flags) { 68248348SEric.Yu@Sun.COM sti->sti_nl7c_flags = 0; 68258348SEric.Yu@Sun.COM } 68268348SEric.Yu@Sun.COM 68278348SEric.Yu@Sun.COM ASSERT(sti->sti_ux_bound_vp == NULL); 68288348SEric.Yu@Sun.COM if ((mp = sti->sti_unbind_mp) != NULL) { 68298348SEric.Yu@Sun.COM freemsg(mp); 68308348SEric.Yu@Sun.COM sti->sti_unbind_mp = NULL; 68318348SEric.Yu@Sun.COM } 68328348SEric.Yu@Sun.COM } 68338348SEric.Yu@Sun.COM 68348348SEric.Yu@Sun.COM /* 68358348SEric.Yu@Sun.COM * Destroys the TPI information attached to a sonode. 68368348SEric.Yu@Sun.COM */ 68378348SEric.Yu@Sun.COM static void 68388348SEric.Yu@Sun.COM sotpi_info_destroy(struct sonode *so) 68398348SEric.Yu@Sun.COM { 68408348SEric.Yu@Sun.COM sotpi_info_t *sti = SOTOTPI(so); 68418348SEric.Yu@Sun.COM 68428348SEric.Yu@Sun.COM i_sotpi_info_destructor(sti); 68438348SEric.Yu@Sun.COM kmem_free(sti, sizeof (*sti)); 68448348SEric.Yu@Sun.COM 68458348SEric.Yu@Sun.COM so->so_priv = NULL; 68468348SEric.Yu@Sun.COM } 68478348SEric.Yu@Sun.COM 68488348SEric.Yu@Sun.COM /* 68498429SEric.Yu@Sun.COM * Create the global sotpi socket module entry. It will never be freed. 68508348SEric.Yu@Sun.COM */ 68518348SEric.Yu@Sun.COM smod_info_t * 68528348SEric.Yu@Sun.COM sotpi_smod_create(void) 68538348SEric.Yu@Sun.COM { 68548348SEric.Yu@Sun.COM smod_info_t *smodp; 68558348SEric.Yu@Sun.COM 68568348SEric.Yu@Sun.COM smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 68578429SEric.Yu@Sun.COM smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 68588429SEric.Yu@Sun.COM (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 68598348SEric.Yu@Sun.COM /* 68608429SEric.Yu@Sun.COM * Initialize the smod_refcnt to 1 so it will never be freed. 68618348SEric.Yu@Sun.COM */ 68628348SEric.Yu@Sun.COM smodp->smod_refcnt = 1; 68638348SEric.Yu@Sun.COM smodp->smod_uc_version = SOCK_UC_VERSION; 68648348SEric.Yu@Sun.COM smodp->smod_dc_version = SOCK_DC_VERSION; 68658348SEric.Yu@Sun.COM smodp->smod_sock_create_func = &sotpi_create; 68668348SEric.Yu@Sun.COM smodp->smod_sock_destroy_func = &sotpi_destroy; 68678348SEric.Yu@Sun.COM return (smodp); 68688348SEric.Yu@Sun.COM } 6869