1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/types.h> 30*0Sstevel@tonic-gate #include <sys/t_lock.h> 31*0Sstevel@tonic-gate #include <sys/param.h> 32*0Sstevel@tonic-gate #include <sys/systm.h> 33*0Sstevel@tonic-gate #include <sys/buf.h> 34*0Sstevel@tonic-gate #include <sys/conf.h> 35*0Sstevel@tonic-gate #include <sys/cred.h> 36*0Sstevel@tonic-gate #include <sys/kmem.h> 37*0Sstevel@tonic-gate #include <sys/sysmacros.h> 38*0Sstevel@tonic-gate #include <sys/vfs.h> 39*0Sstevel@tonic-gate #include <sys/vnode.h> 40*0Sstevel@tonic-gate #include <sys/debug.h> 41*0Sstevel@tonic-gate #include <sys/errno.h> 42*0Sstevel@tonic-gate #include <sys/time.h> 43*0Sstevel@tonic-gate #include <sys/file.h> 44*0Sstevel@tonic-gate #include <sys/open.h> 45*0Sstevel@tonic-gate #include <sys/user.h> 46*0Sstevel@tonic-gate #include <sys/termios.h> 47*0Sstevel@tonic-gate #include <sys/stream.h> 48*0Sstevel@tonic-gate #include <sys/strsubr.h> 49*0Sstevel@tonic-gate #include <sys/strsun.h> 50*0Sstevel@tonic-gate #include <sys/esunddi.h> 51*0Sstevel@tonic-gate #include <sys/flock.h> 52*0Sstevel@tonic-gate #include <sys/modctl.h> 53*0Sstevel@tonic-gate #include <sys/cmn_err.h> 54*0Sstevel@tonic-gate #include <sys/vmsystm.h> 55*0Sstevel@tonic-gate #include <sys/policy.h> 56*0Sstevel@tonic-gate 57*0Sstevel@tonic-gate #include <sys/socket.h> 58*0Sstevel@tonic-gate #include <sys/socketvar.h> 59*0Sstevel@tonic-gate #include <netinet/in.h> 60*0Sstevel@tonic-gate #include <sys/un.h> 61*0Sstevel@tonic-gate #include <inet/nca/ncadoorhdr.h> 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate #include <sys/isa_defs.h> 64*0Sstevel@tonic-gate #include <sys/inttypes.h> 65*0Sstevel@tonic-gate #include <sys/systm.h> 66*0Sstevel@tonic-gate #include <sys/cpuvar.h> 67*0Sstevel@tonic-gate #include <sys/atomic.h> 68*0Sstevel@tonic-gate #include <sys/filio.h> 69*0Sstevel@tonic-gate #include <sys/sendfile.h> 70*0Sstevel@tonic-gate #include <sys/ddi.h> 71*0Sstevel@tonic-gate #include <vm/seg.h> 72*0Sstevel@tonic-gate #include <vm/seg_map.h> 73*0Sstevel@tonic-gate #include <vm/seg_kpm.h> 74*0Sstevel@tonic-gate #include <fs/sockfs/nl7c.h> 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate #ifdef SOCK_TEST 77*0Sstevel@tonic-gate int do_useracc = 1; /* Controlled by setting SO_DEBUG to 4 */ 78*0Sstevel@tonic-gate #else 79*0Sstevel@tonic-gate #define do_useracc 1 80*0Sstevel@tonic-gate #endif /* SOCK_TEST */ 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate extern int xnet_truncate_print; 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate /* 85*0Sstevel@tonic-gate * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c" 86*0Sstevel@tonic-gate * as there isn't a formal definition of IOV_MAX ??? 87*0Sstevel@tonic-gate */ 88*0Sstevel@tonic-gate #define MSG_MAXIOVLEN 16 89*0Sstevel@tonic-gate 90*0Sstevel@tonic-gate /* 91*0Sstevel@tonic-gate * Kernel component of socket creation. 92*0Sstevel@tonic-gate * 93*0Sstevel@tonic-gate * The socket library determines which version number to use. 94*0Sstevel@tonic-gate * First the library calls this with a NULL devpath. If this fails 95*0Sstevel@tonic-gate * to find a transport (using solookup) the library will look in /etc/netconfig 96*0Sstevel@tonic-gate * for the appropriate transport. If one is found it will pass in the 97*0Sstevel@tonic-gate * devpath for the kernel to use. 98*0Sstevel@tonic-gate */ 99*0Sstevel@tonic-gate int 100*0Sstevel@tonic-gate so_socket(int domain, int type, int protocol, char *devpath, int version) 101*0Sstevel@tonic-gate { 102*0Sstevel@tonic-gate vnode_t *accessvp; 103*0Sstevel@tonic-gate struct sonode *so; 104*0Sstevel@tonic-gate vnode_t *vp; 105*0Sstevel@tonic-gate struct file *fp; 106*0Sstevel@tonic-gate int fd; 107*0Sstevel@tonic-gate int error; 108*0Sstevel@tonic-gate boolean_t wildcard = B_FALSE; 109*0Sstevel@tonic-gate int saved_error = 0; 110*0Sstevel@tonic-gate int sdomain = domain; 111*0Sstevel@tonic-gate 112*0Sstevel@tonic-gate dprint(1, ("so_socket(%d,%d,%d,%p,%d)\n", 113*0Sstevel@tonic-gate domain, type, protocol, devpath, version)); 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate if (domain == AF_NCA) { 116*0Sstevel@tonic-gate /* 117*0Sstevel@tonic-gate * The request is for an NCA socket so for NL7C use the 118*0Sstevel@tonic-gate * INET domain instead and mark NL7C_AF_NCA below. 119*0Sstevel@tonic-gate */ 120*0Sstevel@tonic-gate domain = AF_INET; 121*0Sstevel@tonic-gate /* 122*0Sstevel@tonic-gate * NL7C is not supported in non-global zones, 123*0Sstevel@tonic-gate * we enforce this restriction here. 124*0Sstevel@tonic-gate */ 125*0Sstevel@tonic-gate if (getzoneid() != GLOBAL_ZONEID) { 126*0Sstevel@tonic-gate return (set_errno(ENOTSUP)); 127*0Sstevel@tonic-gate } 128*0Sstevel@tonic-gate } 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate accessvp = solookup(domain, type, protocol, devpath, &error); 131*0Sstevel@tonic-gate if (accessvp == NULL) { 132*0Sstevel@tonic-gate /* 133*0Sstevel@tonic-gate * If there is either an EPROTONOSUPPORT or EPROTOTYPE error 134*0Sstevel@tonic-gate * it makes sense doing the wildcard lookup since the 135*0Sstevel@tonic-gate * protocol might not be in the table. 136*0Sstevel@tonic-gate */ 137*0Sstevel@tonic-gate if (devpath != NULL || protocol == 0 || 138*0Sstevel@tonic-gate !(error == EPROTONOSUPPORT || error == EPROTOTYPE)) 139*0Sstevel@tonic-gate return (set_errno(error)); 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate saved_error = error; 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate /* 144*0Sstevel@tonic-gate * Try wildcard lookup. Never use devpath for wildcards. 145*0Sstevel@tonic-gate */ 146*0Sstevel@tonic-gate accessvp = solookup(domain, type, 0, NULL, &error); 147*0Sstevel@tonic-gate if (accessvp == NULL) { 148*0Sstevel@tonic-gate /* 149*0Sstevel@tonic-gate * Can't find in kernel table - have library 150*0Sstevel@tonic-gate * fall back to /etc/netconfig and tell us 151*0Sstevel@tonic-gate * the devpath (The library will do this if it didn't 152*0Sstevel@tonic-gate * already pass in a devpath). 153*0Sstevel@tonic-gate */ 154*0Sstevel@tonic-gate if (saved_error != 0) 155*0Sstevel@tonic-gate error = saved_error; 156*0Sstevel@tonic-gate return (set_errno(error)); 157*0Sstevel@tonic-gate } 158*0Sstevel@tonic-gate wildcard = B_TRUE; 159*0Sstevel@tonic-gate } 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate /* Check the device policy */ 162*0Sstevel@tonic-gate if ((error = secpolicy_spec_open(CRED(), 163*0Sstevel@tonic-gate accessvp, FREAD|FWRITE)) != 0) { 164*0Sstevel@tonic-gate return (set_errno(error)); 165*0Sstevel@tonic-gate } 166*0Sstevel@tonic-gate 167*0Sstevel@tonic-gate if (domain == AF_NCA) { 168*0Sstevel@tonic-gate so = sonca_create(accessvp, domain, type, protocol, version, 169*0Sstevel@tonic-gate NULL, &error); 170*0Sstevel@tonic-gate } else if (protocol == IPPROTO_SCTP) { 171*0Sstevel@tonic-gate so = sosctp_create(accessvp, domain, type, protocol, version, 172*0Sstevel@tonic-gate NULL, &error); 173*0Sstevel@tonic-gate } else { 174*0Sstevel@tonic-gate so = sotpi_create(accessvp, domain, type, protocol, version, 175*0Sstevel@tonic-gate NULL, &error); 176*0Sstevel@tonic-gate } 177*0Sstevel@tonic-gate if (so == NULL) { 178*0Sstevel@tonic-gate return (set_errno(error)); 179*0Sstevel@tonic-gate } 180*0Sstevel@tonic-gate if (sdomain == AF_NCA && domain == AF_INET) { 181*0Sstevel@tonic-gate so->so_nl7c_flags = NL7C_AF_NCA; 182*0Sstevel@tonic-gate } 183*0Sstevel@tonic-gate vp = SOTOV(so); 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate if (wildcard) { 186*0Sstevel@tonic-gate /* 187*0Sstevel@tonic-gate * Issue SO_PROTOTYPE setsockopt. 188*0Sstevel@tonic-gate */ 189*0Sstevel@tonic-gate error = SOP_SETSOCKOPT(so, SOL_SOCKET, SO_PROTOTYPE, 190*0Sstevel@tonic-gate &protocol, 191*0Sstevel@tonic-gate (t_uscalar_t)sizeof (protocol)); 192*0Sstevel@tonic-gate if (error) { 193*0Sstevel@tonic-gate (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 194*0Sstevel@tonic-gate VN_RELE(vp); 195*0Sstevel@tonic-gate /* 196*0Sstevel@tonic-gate * Setsockopt often fails with ENOPROTOOPT but socket() 197*0Sstevel@tonic-gate * should fail with EPROTONOSUPPORT/EPROTOTYPE. 198*0Sstevel@tonic-gate */ 199*0Sstevel@tonic-gate if (saved_error != 0 && error == ENOPROTOOPT) 200*0Sstevel@tonic-gate error = saved_error; 201*0Sstevel@tonic-gate else 202*0Sstevel@tonic-gate error = EPROTONOSUPPORT; 203*0Sstevel@tonic-gate return (set_errno(error)); 204*0Sstevel@tonic-gate } 205*0Sstevel@tonic-gate } 206*0Sstevel@tonic-gate if (error = falloc(vp, FWRITE|FREAD, &fp, &fd)) { 207*0Sstevel@tonic-gate (void) VOP_CLOSE(vp, 0, 1, 0, CRED()); 208*0Sstevel@tonic-gate VN_RELE(vp); 209*0Sstevel@tonic-gate return (set_errno(error)); 210*0Sstevel@tonic-gate } 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate /* 213*0Sstevel@tonic-gate * Now fill in the entries that falloc reserved 214*0Sstevel@tonic-gate */ 215*0Sstevel@tonic-gate mutex_exit(&fp->f_tlock); 216*0Sstevel@tonic-gate setf(fd, fp); 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate return (fd); 219*0Sstevel@tonic-gate } 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate /* 222*0Sstevel@tonic-gate * Map from a file descriptor to a socket node. 223*0Sstevel@tonic-gate * Returns with the file descriptor held i.e. the caller has to 224*0Sstevel@tonic-gate * use releasef when done with the file descriptor. 225*0Sstevel@tonic-gate */ 226*0Sstevel@tonic-gate static struct sonode * 227*0Sstevel@tonic-gate getsonode(int sock, int *errorp, file_t **fpp) 228*0Sstevel@tonic-gate { 229*0Sstevel@tonic-gate file_t *fp; 230*0Sstevel@tonic-gate vnode_t *vp; 231*0Sstevel@tonic-gate struct sonode *so; 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate if ((fp = getf(sock)) == NULL) { 234*0Sstevel@tonic-gate *errorp = EBADF; 235*0Sstevel@tonic-gate eprintline(*errorp); 236*0Sstevel@tonic-gate return (NULL); 237*0Sstevel@tonic-gate } 238*0Sstevel@tonic-gate vp = fp->f_vnode; 239*0Sstevel@tonic-gate /* Check if it is a socket */ 240*0Sstevel@tonic-gate if (vp->v_type != VSOCK) { 241*0Sstevel@tonic-gate releasef(sock); 242*0Sstevel@tonic-gate *errorp = ENOTSOCK; 243*0Sstevel@tonic-gate eprintline(*errorp); 244*0Sstevel@tonic-gate return (NULL); 245*0Sstevel@tonic-gate } 246*0Sstevel@tonic-gate /* 247*0Sstevel@tonic-gate * Use the stream head to find the real socket vnode. 248*0Sstevel@tonic-gate * This is needed when namefs sits above sockfs. 249*0Sstevel@tonic-gate */ 250*0Sstevel@tonic-gate if (vp->v_stream) { 251*0Sstevel@tonic-gate ASSERT(vp->v_stream->sd_vnode); 252*0Sstevel@tonic-gate vp = vp->v_stream->sd_vnode; 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gate so = VTOSO(vp); 255*0Sstevel@tonic-gate if (so->so_version == SOV_STREAM) { 256*0Sstevel@tonic-gate releasef(sock); 257*0Sstevel@tonic-gate *errorp = ENOTSOCK; 258*0Sstevel@tonic-gate eprintsoline(so, *errorp); 259*0Sstevel@tonic-gate return (NULL); 260*0Sstevel@tonic-gate } 261*0Sstevel@tonic-gate } else { 262*0Sstevel@tonic-gate so = VTOSO(vp); 263*0Sstevel@tonic-gate } 264*0Sstevel@tonic-gate if (fpp) 265*0Sstevel@tonic-gate *fpp = fp; 266*0Sstevel@tonic-gate return (so); 267*0Sstevel@tonic-gate } 268*0Sstevel@tonic-gate 269*0Sstevel@tonic-gate /* 270*0Sstevel@tonic-gate * Allocate and copyin a sockaddr. 271*0Sstevel@tonic-gate * Ensures NULL termination for AF_UNIX addresses by extending them 272*0Sstevel@tonic-gate * with one NULL byte if need be. Verifies that the length is not 273*0Sstevel@tonic-gate * excessive to prevent an application from consuming all of kernel 274*0Sstevel@tonic-gate * memory. Returns NULL when an error occurred. 275*0Sstevel@tonic-gate */ 276*0Sstevel@tonic-gate static struct sockaddr * 277*0Sstevel@tonic-gate copyin_name(struct sonode *so, struct sockaddr *name, socklen_t *namelenp, 278*0Sstevel@tonic-gate int *errorp) 279*0Sstevel@tonic-gate { 280*0Sstevel@tonic-gate char *faddr; 281*0Sstevel@tonic-gate size_t namelen = (size_t)*namelenp; 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate ASSERT(namelen != 0); 284*0Sstevel@tonic-gate if (namelen > SO_MAXARGSIZE) { 285*0Sstevel@tonic-gate *errorp = EINVAL; 286*0Sstevel@tonic-gate eprintsoline(so, *errorp); 287*0Sstevel@tonic-gate return (NULL); 288*0Sstevel@tonic-gate } 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate faddr = (char *)kmem_alloc(namelen, KM_SLEEP); 291*0Sstevel@tonic-gate if (copyin(name, faddr, namelen)) { 292*0Sstevel@tonic-gate kmem_free(faddr, namelen); 293*0Sstevel@tonic-gate *errorp = EFAULT; 294*0Sstevel@tonic-gate eprintsoline(so, *errorp); 295*0Sstevel@tonic-gate return (NULL); 296*0Sstevel@tonic-gate } 297*0Sstevel@tonic-gate 298*0Sstevel@tonic-gate /* 299*0Sstevel@tonic-gate * Add space for NULL termination if needed. 300*0Sstevel@tonic-gate * Do a quick check if the last byte is NUL. 301*0Sstevel@tonic-gate */ 302*0Sstevel@tonic-gate if (so->so_family == AF_UNIX && faddr[namelen - 1] != '\0') { 303*0Sstevel@tonic-gate /* Check if there is any NULL termination */ 304*0Sstevel@tonic-gate size_t i; 305*0Sstevel@tonic-gate int foundnull = 0; 306*0Sstevel@tonic-gate 307*0Sstevel@tonic-gate for (i = sizeof (name->sa_family); i < namelen; i++) { 308*0Sstevel@tonic-gate if (faddr[i] == '\0') { 309*0Sstevel@tonic-gate foundnull = 1; 310*0Sstevel@tonic-gate break; 311*0Sstevel@tonic-gate } 312*0Sstevel@tonic-gate } 313*0Sstevel@tonic-gate if (!foundnull) { 314*0Sstevel@tonic-gate /* Add extra byte for NUL padding */ 315*0Sstevel@tonic-gate char *nfaddr; 316*0Sstevel@tonic-gate 317*0Sstevel@tonic-gate nfaddr = (char *)kmem_alloc(namelen + 1, KM_SLEEP); 318*0Sstevel@tonic-gate bcopy(faddr, nfaddr, namelen); 319*0Sstevel@tonic-gate kmem_free(faddr, namelen); 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate /* NUL terminate */ 322*0Sstevel@tonic-gate nfaddr[namelen] = '\0'; 323*0Sstevel@tonic-gate namelen++; 324*0Sstevel@tonic-gate ASSERT((socklen_t)namelen == namelen); 325*0Sstevel@tonic-gate *namelenp = (socklen_t)namelen; 326*0Sstevel@tonic-gate faddr = nfaddr; 327*0Sstevel@tonic-gate } 328*0Sstevel@tonic-gate } 329*0Sstevel@tonic-gate return ((struct sockaddr *)faddr); 330*0Sstevel@tonic-gate } 331*0Sstevel@tonic-gate 332*0Sstevel@tonic-gate /* 333*0Sstevel@tonic-gate * Copy from kaddr/klen to uaddr/ulen. Updates ulenp if non-NULL. 334*0Sstevel@tonic-gate */ 335*0Sstevel@tonic-gate static int 336*0Sstevel@tonic-gate copyout_arg(void *uaddr, socklen_t ulen, void *ulenp, 337*0Sstevel@tonic-gate void *kaddr, socklen_t klen) 338*0Sstevel@tonic-gate { 339*0Sstevel@tonic-gate if (uaddr != NULL) { 340*0Sstevel@tonic-gate if (ulen > klen) 341*0Sstevel@tonic-gate ulen = klen; 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate if (ulen != 0) { 344*0Sstevel@tonic-gate if (copyout(kaddr, uaddr, ulen)) 345*0Sstevel@tonic-gate return (EFAULT); 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate } else 348*0Sstevel@tonic-gate ulen = 0; 349*0Sstevel@tonic-gate 350*0Sstevel@tonic-gate if (ulenp != NULL) { 351*0Sstevel@tonic-gate if (copyout(&ulen, ulenp, sizeof (ulen))) 352*0Sstevel@tonic-gate return (EFAULT); 353*0Sstevel@tonic-gate } 354*0Sstevel@tonic-gate return (0); 355*0Sstevel@tonic-gate } 356*0Sstevel@tonic-gate 357*0Sstevel@tonic-gate /* 358*0Sstevel@tonic-gate * Copy from kaddr/klen to uaddr/ulen. Updates ulenp if non-NULL. 359*0Sstevel@tonic-gate * If klen is greater than ulen it still uses the non-truncated 360*0Sstevel@tonic-gate * klen to update ulenp. 361*0Sstevel@tonic-gate */ 362*0Sstevel@tonic-gate static int 363*0Sstevel@tonic-gate copyout_name(void *uaddr, socklen_t ulen, void *ulenp, 364*0Sstevel@tonic-gate void *kaddr, socklen_t klen) 365*0Sstevel@tonic-gate { 366*0Sstevel@tonic-gate if (uaddr != NULL) { 367*0Sstevel@tonic-gate if (ulen >= klen) 368*0Sstevel@tonic-gate ulen = klen; 369*0Sstevel@tonic-gate else if (ulen != 0 && xnet_truncate_print) { 370*0Sstevel@tonic-gate printf("sockfs: truncating copyout of address using " 371*0Sstevel@tonic-gate "XNET semantics for pid = %d. Lengths %d, %d\n", 372*0Sstevel@tonic-gate curproc->p_pid, klen, ulen); 373*0Sstevel@tonic-gate } 374*0Sstevel@tonic-gate 375*0Sstevel@tonic-gate if (ulen != 0) { 376*0Sstevel@tonic-gate if (copyout(kaddr, uaddr, ulen)) 377*0Sstevel@tonic-gate return (EFAULT); 378*0Sstevel@tonic-gate } else 379*0Sstevel@tonic-gate klen = 0; 380*0Sstevel@tonic-gate } else 381*0Sstevel@tonic-gate klen = 0; 382*0Sstevel@tonic-gate 383*0Sstevel@tonic-gate if (ulenp != NULL) { 384*0Sstevel@tonic-gate if (copyout(&klen, ulenp, sizeof (klen))) 385*0Sstevel@tonic-gate return (EFAULT); 386*0Sstevel@tonic-gate } 387*0Sstevel@tonic-gate return (0); 388*0Sstevel@tonic-gate } 389*0Sstevel@tonic-gate 390*0Sstevel@tonic-gate /* 391*0Sstevel@tonic-gate * The socketpair() code in libsocket creates two sockets (using 392*0Sstevel@tonic-gate * the /etc/netconfig fallback if needed) before calling this routine 393*0Sstevel@tonic-gate * to connect the two sockets together. 394*0Sstevel@tonic-gate * 395*0Sstevel@tonic-gate * For a SOCK_STREAM socketpair a listener is needed - in that case this 396*0Sstevel@tonic-gate * routine will create a new file descriptor as part of accepting the 397*0Sstevel@tonic-gate * connection. The library socketpair() will check if svs[2] has changed 398*0Sstevel@tonic-gate * in which case it will close the changed fd. 399*0Sstevel@tonic-gate * 400*0Sstevel@tonic-gate * Note that this code could use the TPI feature of accepting the connection 401*0Sstevel@tonic-gate * on the listening endpoint. However, that would require significant changes 402*0Sstevel@tonic-gate * to soaccept. 403*0Sstevel@tonic-gate */ 404*0Sstevel@tonic-gate int 405*0Sstevel@tonic-gate so_socketpair(int sv[2]) 406*0Sstevel@tonic-gate { 407*0Sstevel@tonic-gate int svs[2]; 408*0Sstevel@tonic-gate struct sonode *so1, *so2; 409*0Sstevel@tonic-gate int error; 410*0Sstevel@tonic-gate struct sockaddr_ux *name; 411*0Sstevel@tonic-gate size_t namelen; 412*0Sstevel@tonic-gate 413*0Sstevel@tonic-gate dprint(1, ("so_socketpair(%p)\n", sv)); 414*0Sstevel@tonic-gate 415*0Sstevel@tonic-gate error = useracc(sv, sizeof (svs), B_WRITE); 416*0Sstevel@tonic-gate if (error && do_useracc) 417*0Sstevel@tonic-gate return (set_errno(EFAULT)); 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate if (copyin(sv, svs, sizeof (svs))) 420*0Sstevel@tonic-gate return (set_errno(EFAULT)); 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate if ((so1 = getsonode(svs[0], &error, NULL)) == NULL) 423*0Sstevel@tonic-gate return (set_errno(error)); 424*0Sstevel@tonic-gate 425*0Sstevel@tonic-gate if ((so2 = getsonode(svs[1], &error, NULL)) == NULL) { 426*0Sstevel@tonic-gate releasef(svs[0]); 427*0Sstevel@tonic-gate return (set_errno(error)); 428*0Sstevel@tonic-gate } 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate if (so1->so_family != AF_UNIX || so2->so_family != AF_UNIX) { 431*0Sstevel@tonic-gate error = EOPNOTSUPP; 432*0Sstevel@tonic-gate goto done; 433*0Sstevel@tonic-gate } 434*0Sstevel@tonic-gate 435*0Sstevel@tonic-gate /* 436*0Sstevel@tonic-gate * The code below makes assumptions about the "sockfs" implementation. 437*0Sstevel@tonic-gate * So make sure that the correct implementation is really used. 438*0Sstevel@tonic-gate */ 439*0Sstevel@tonic-gate ASSERT(so1->so_ops == &sotpi_sonodeops); 440*0Sstevel@tonic-gate ASSERT(so2->so_ops == &sotpi_sonodeops); 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate if (so1->so_type == SOCK_DGRAM) { 443*0Sstevel@tonic-gate /* 444*0Sstevel@tonic-gate * Bind both sockets and connect them with each other. 445*0Sstevel@tonic-gate * Need to allocate name/namelen for soconnect. 446*0Sstevel@tonic-gate */ 447*0Sstevel@tonic-gate error = SOP_BIND(so1, NULL, 0, _SOBIND_UNSPEC); 448*0Sstevel@tonic-gate if (error) { 449*0Sstevel@tonic-gate eprintsoline(so1, error); 450*0Sstevel@tonic-gate goto done; 451*0Sstevel@tonic-gate } 452*0Sstevel@tonic-gate error = SOP_BIND(so2, NULL, 0, _SOBIND_UNSPEC); 453*0Sstevel@tonic-gate if (error) { 454*0Sstevel@tonic-gate eprintsoline(so2, error); 455*0Sstevel@tonic-gate goto done; 456*0Sstevel@tonic-gate } 457*0Sstevel@tonic-gate namelen = sizeof (struct sockaddr_ux); 458*0Sstevel@tonic-gate name = kmem_alloc(namelen, KM_SLEEP); 459*0Sstevel@tonic-gate name->sou_family = AF_UNIX; 460*0Sstevel@tonic-gate name->sou_addr = so2->so_ux_laddr; 461*0Sstevel@tonic-gate error = SOP_CONNECT(so1, 462*0Sstevel@tonic-gate (struct sockaddr *)name, 463*0Sstevel@tonic-gate (socklen_t)namelen, 464*0Sstevel@tonic-gate 0, _SOCONNECT_NOXLATE); 465*0Sstevel@tonic-gate if (error) { 466*0Sstevel@tonic-gate kmem_free(name, namelen); 467*0Sstevel@tonic-gate eprintsoline(so1, error); 468*0Sstevel@tonic-gate goto done; 469*0Sstevel@tonic-gate } 470*0Sstevel@tonic-gate name->sou_addr = so1->so_ux_laddr; 471*0Sstevel@tonic-gate error = SOP_CONNECT(so2, 472*0Sstevel@tonic-gate (struct sockaddr *)name, 473*0Sstevel@tonic-gate (socklen_t)namelen, 474*0Sstevel@tonic-gate 0, _SOCONNECT_NOXLATE); 475*0Sstevel@tonic-gate kmem_free(name, namelen); 476*0Sstevel@tonic-gate if (error) { 477*0Sstevel@tonic-gate eprintsoline(so2, error); 478*0Sstevel@tonic-gate goto done; 479*0Sstevel@tonic-gate } 480*0Sstevel@tonic-gate releasef(svs[0]); 481*0Sstevel@tonic-gate releasef(svs[1]); 482*0Sstevel@tonic-gate } else { 483*0Sstevel@tonic-gate /* 484*0Sstevel@tonic-gate * Bind both sockets, with so1 being a listener. 485*0Sstevel@tonic-gate * Connect so2 to so1 - nonblocking to avoid waiting for 486*0Sstevel@tonic-gate * soaccept to complete. 487*0Sstevel@tonic-gate * Accept a connection on so1. Pass out the new fd as sv[0]. 488*0Sstevel@tonic-gate * The library will detect the changed fd and close 489*0Sstevel@tonic-gate * the original one. 490*0Sstevel@tonic-gate */ 491*0Sstevel@tonic-gate struct sonode *nso; 492*0Sstevel@tonic-gate struct vnode *nvp; 493*0Sstevel@tonic-gate struct file *nfp; 494*0Sstevel@tonic-gate int nfd; 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate /* 497*0Sstevel@tonic-gate * We could simply call SOP_LISTEN() here (which would do the 498*0Sstevel@tonic-gate * binding automatically) if the code didn't rely on passing 499*0Sstevel@tonic-gate * _SOBIND_NOXLATE to the TPI implementation of SOP_BIND(). 500*0Sstevel@tonic-gate */ 501*0Sstevel@tonic-gate error = SOP_BIND(so1, NULL, 0, _SOBIND_UNSPEC|_SOBIND_NOXLATE| 502*0Sstevel@tonic-gate _SOBIND_LISTEN|_SOBIND_SOCKETPAIR); 503*0Sstevel@tonic-gate if (error) { 504*0Sstevel@tonic-gate eprintsoline(so1, error); 505*0Sstevel@tonic-gate goto done; 506*0Sstevel@tonic-gate } 507*0Sstevel@tonic-gate error = SOP_BIND(so2, NULL, 0, _SOBIND_UNSPEC); 508*0Sstevel@tonic-gate if (error) { 509*0Sstevel@tonic-gate eprintsoline(so2, error); 510*0Sstevel@tonic-gate goto done; 511*0Sstevel@tonic-gate } 512*0Sstevel@tonic-gate 513*0Sstevel@tonic-gate namelen = sizeof (struct sockaddr_ux); 514*0Sstevel@tonic-gate name = kmem_alloc(namelen, KM_SLEEP); 515*0Sstevel@tonic-gate name->sou_family = AF_UNIX; 516*0Sstevel@tonic-gate name->sou_addr = so1->so_ux_laddr; 517*0Sstevel@tonic-gate error = SOP_CONNECT(so2, 518*0Sstevel@tonic-gate (struct sockaddr *)name, 519*0Sstevel@tonic-gate (socklen_t)namelen, 520*0Sstevel@tonic-gate FNONBLOCK, _SOCONNECT_NOXLATE); 521*0Sstevel@tonic-gate kmem_free(name, namelen); 522*0Sstevel@tonic-gate if (error) { 523*0Sstevel@tonic-gate if (error != EINPROGRESS) { 524*0Sstevel@tonic-gate eprintsoline(so2, error); 525*0Sstevel@tonic-gate goto done; 526*0Sstevel@tonic-gate } 527*0Sstevel@tonic-gate } 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate error = SOP_ACCEPT(so1, 0, &nso); 530*0Sstevel@tonic-gate if (error) { 531*0Sstevel@tonic-gate eprintsoline(so1, error); 532*0Sstevel@tonic-gate goto done; 533*0Sstevel@tonic-gate } 534*0Sstevel@tonic-gate 535*0Sstevel@tonic-gate /* wait for so2 being SS_CONNECTED ignoring signals */ 536*0Sstevel@tonic-gate mutex_enter(&so2->so_lock); 537*0Sstevel@tonic-gate error = sowaitconnected(so2, 0, 1); 538*0Sstevel@tonic-gate mutex_exit(&so2->so_lock); 539*0Sstevel@tonic-gate nvp = SOTOV(nso); 540*0Sstevel@tonic-gate if (error != 0) { 541*0Sstevel@tonic-gate (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 542*0Sstevel@tonic-gate VN_RELE(nvp); 543*0Sstevel@tonic-gate eprintsoline(so2, error); 544*0Sstevel@tonic-gate goto done; 545*0Sstevel@tonic-gate } 546*0Sstevel@tonic-gate 547*0Sstevel@tonic-gate if (error = falloc(nvp, FWRITE|FREAD, &nfp, &nfd)) { 548*0Sstevel@tonic-gate (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 549*0Sstevel@tonic-gate VN_RELE(nvp); 550*0Sstevel@tonic-gate eprintsoline(nso, error); 551*0Sstevel@tonic-gate goto done; 552*0Sstevel@tonic-gate } 553*0Sstevel@tonic-gate /* 554*0Sstevel@tonic-gate * fill in the entries that falloc reserved 555*0Sstevel@tonic-gate */ 556*0Sstevel@tonic-gate mutex_exit(&nfp->f_tlock); 557*0Sstevel@tonic-gate setf(nfd, nfp); 558*0Sstevel@tonic-gate 559*0Sstevel@tonic-gate releasef(svs[0]); 560*0Sstevel@tonic-gate releasef(svs[1]); 561*0Sstevel@tonic-gate svs[0] = nfd; 562*0Sstevel@tonic-gate 563*0Sstevel@tonic-gate /* 564*0Sstevel@tonic-gate * The socketpair library routine will close the original 565*0Sstevel@tonic-gate * svs[0] when this code passes out a different file 566*0Sstevel@tonic-gate * descriptor. 567*0Sstevel@tonic-gate */ 568*0Sstevel@tonic-gate if (copyout(svs, sv, sizeof (svs))) { 569*0Sstevel@tonic-gate (void) closeandsetf(nfd, NULL); 570*0Sstevel@tonic-gate eprintline(EFAULT); 571*0Sstevel@tonic-gate return (set_errno(EFAULT)); 572*0Sstevel@tonic-gate } 573*0Sstevel@tonic-gate } 574*0Sstevel@tonic-gate return (0); 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate done: 577*0Sstevel@tonic-gate releasef(svs[0]); 578*0Sstevel@tonic-gate releasef(svs[1]); 579*0Sstevel@tonic-gate return (set_errno(error)); 580*0Sstevel@tonic-gate } 581*0Sstevel@tonic-gate 582*0Sstevel@tonic-gate int 583*0Sstevel@tonic-gate bind(int sock, struct sockaddr *name, socklen_t namelen, int version) 584*0Sstevel@tonic-gate { 585*0Sstevel@tonic-gate struct sonode *so; 586*0Sstevel@tonic-gate int error; 587*0Sstevel@tonic-gate 588*0Sstevel@tonic-gate dprint(1, ("bind(%d, %p, %d)\n", 589*0Sstevel@tonic-gate sock, name, namelen)); 590*0Sstevel@tonic-gate 591*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 592*0Sstevel@tonic-gate return (set_errno(error)); 593*0Sstevel@tonic-gate 594*0Sstevel@tonic-gate /* Allocate and copyin name */ 595*0Sstevel@tonic-gate /* 596*0Sstevel@tonic-gate * X/Open test does not expect EFAULT with NULL name and non-zero 597*0Sstevel@tonic-gate * namelen. 598*0Sstevel@tonic-gate */ 599*0Sstevel@tonic-gate if (name != NULL && namelen != 0) { 600*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 601*0Sstevel@tonic-gate name = copyin_name(so, name, &namelen, &error); 602*0Sstevel@tonic-gate if (name == NULL) { 603*0Sstevel@tonic-gate releasef(sock); 604*0Sstevel@tonic-gate return (set_errno(error)); 605*0Sstevel@tonic-gate } 606*0Sstevel@tonic-gate } else { 607*0Sstevel@tonic-gate name = NULL; 608*0Sstevel@tonic-gate namelen = 0; 609*0Sstevel@tonic-gate } 610*0Sstevel@tonic-gate 611*0Sstevel@tonic-gate switch (version) { 612*0Sstevel@tonic-gate default: 613*0Sstevel@tonic-gate error = SOP_BIND(so, name, namelen, 0); 614*0Sstevel@tonic-gate break; 615*0Sstevel@tonic-gate case SOV_XPG4_2: 616*0Sstevel@tonic-gate error = SOP_BIND(so, name, namelen, _SOBIND_XPG4_2); 617*0Sstevel@tonic-gate break; 618*0Sstevel@tonic-gate case SOV_SOCKBSD: 619*0Sstevel@tonic-gate error = SOP_BIND(so, name, namelen, _SOBIND_SOCKBSD); 620*0Sstevel@tonic-gate break; 621*0Sstevel@tonic-gate } 622*0Sstevel@tonic-gate done: 623*0Sstevel@tonic-gate releasef(sock); 624*0Sstevel@tonic-gate if (name != NULL) 625*0Sstevel@tonic-gate kmem_free(name, (size_t)namelen); 626*0Sstevel@tonic-gate 627*0Sstevel@tonic-gate if (error) 628*0Sstevel@tonic-gate return (set_errno(error)); 629*0Sstevel@tonic-gate return (0); 630*0Sstevel@tonic-gate } 631*0Sstevel@tonic-gate 632*0Sstevel@tonic-gate /* ARGSUSED2 */ 633*0Sstevel@tonic-gate int 634*0Sstevel@tonic-gate listen(int sock, int backlog, int version) 635*0Sstevel@tonic-gate { 636*0Sstevel@tonic-gate struct sonode *so; 637*0Sstevel@tonic-gate int error; 638*0Sstevel@tonic-gate 639*0Sstevel@tonic-gate dprint(1, ("listen(%d, %d)\n", 640*0Sstevel@tonic-gate sock, backlog)); 641*0Sstevel@tonic-gate 642*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 643*0Sstevel@tonic-gate return (set_errno(error)); 644*0Sstevel@tonic-gate 645*0Sstevel@tonic-gate error = SOP_LISTEN(so, backlog); 646*0Sstevel@tonic-gate 647*0Sstevel@tonic-gate releasef(sock); 648*0Sstevel@tonic-gate if (error) 649*0Sstevel@tonic-gate return (set_errno(error)); 650*0Sstevel@tonic-gate return (0); 651*0Sstevel@tonic-gate } 652*0Sstevel@tonic-gate 653*0Sstevel@tonic-gate /*ARGSUSED3*/ 654*0Sstevel@tonic-gate int 655*0Sstevel@tonic-gate accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version) 656*0Sstevel@tonic-gate { 657*0Sstevel@tonic-gate struct sonode *so; 658*0Sstevel@tonic-gate file_t *fp; 659*0Sstevel@tonic-gate int error; 660*0Sstevel@tonic-gate socklen_t namelen; 661*0Sstevel@tonic-gate struct sonode *nso; 662*0Sstevel@tonic-gate struct vnode *nvp; 663*0Sstevel@tonic-gate struct file *nfp; 664*0Sstevel@tonic-gate int nfd; 665*0Sstevel@tonic-gate 666*0Sstevel@tonic-gate dprint(1, ("accept(%d, %p, %p)\n", 667*0Sstevel@tonic-gate sock, name, namelenp)); 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, &fp)) == NULL) 670*0Sstevel@tonic-gate return (set_errno(error)); 671*0Sstevel@tonic-gate 672*0Sstevel@tonic-gate if (name != NULL) { 673*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 674*0Sstevel@tonic-gate if (copyin(namelenp, &namelen, sizeof (namelen))) { 675*0Sstevel@tonic-gate releasef(sock); 676*0Sstevel@tonic-gate return (set_errno(EFAULT)); 677*0Sstevel@tonic-gate } 678*0Sstevel@tonic-gate if (namelen != 0) { 679*0Sstevel@tonic-gate error = useracc(name, (size_t)namelen, B_WRITE); 680*0Sstevel@tonic-gate if (error && do_useracc) { 681*0Sstevel@tonic-gate releasef(sock); 682*0Sstevel@tonic-gate return (set_errno(EFAULT)); 683*0Sstevel@tonic-gate } 684*0Sstevel@tonic-gate } else 685*0Sstevel@tonic-gate name = NULL; 686*0Sstevel@tonic-gate } else { 687*0Sstevel@tonic-gate namelen = 0; 688*0Sstevel@tonic-gate } 689*0Sstevel@tonic-gate 690*0Sstevel@tonic-gate /* 691*0Sstevel@tonic-gate * Allocate the user fd before SOP_ACCEPT() in order to 692*0Sstevel@tonic-gate * catch EMFILE errors before calling SOP_ACCEPT(). 693*0Sstevel@tonic-gate */ 694*0Sstevel@tonic-gate if ((nfd = ufalloc(0)) == -1) { 695*0Sstevel@tonic-gate eprintsoline(so, EMFILE); 696*0Sstevel@tonic-gate releasef(sock); 697*0Sstevel@tonic-gate return (set_errno(EMFILE)); 698*0Sstevel@tonic-gate } 699*0Sstevel@tonic-gate error = SOP_ACCEPT(so, fp->f_flag, &nso); 700*0Sstevel@tonic-gate releasef(sock); 701*0Sstevel@tonic-gate if (error) { 702*0Sstevel@tonic-gate setf(nfd, NULL); 703*0Sstevel@tonic-gate return (set_errno(error)); 704*0Sstevel@tonic-gate } 705*0Sstevel@tonic-gate 706*0Sstevel@tonic-gate nvp = SOTOV(nso); 707*0Sstevel@tonic-gate 708*0Sstevel@tonic-gate /* 709*0Sstevel@tonic-gate * so_faddr_sa can not go away even though we are not holding so_lock. 710*0Sstevel@tonic-gate * However, in theory its content could change from underneath us. 711*0Sstevel@tonic-gate * But this is not possible in practice since it can only 712*0Sstevel@tonic-gate * change due to either some socket system call 713*0Sstevel@tonic-gate * or due to a T_CONN_CON being received from the stream head. 714*0Sstevel@tonic-gate * Since the falloc/setf have not yet been done no thread 715*0Sstevel@tonic-gate * can do any system call on nso and T_CONN_CON can not arrive 716*0Sstevel@tonic-gate * on a socket that is already connected. 717*0Sstevel@tonic-gate * Thus there is no reason to hold so_lock here. 718*0Sstevel@tonic-gate * 719*0Sstevel@tonic-gate * SOP_ACCEPT() is required to have set the valid bit for the faddr, 720*0Sstevel@tonic-gate * but it could be instantly cleared by a disconnect from the transport. 721*0Sstevel@tonic-gate * For that reason we ignore it here. 722*0Sstevel@tonic-gate */ 723*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&nso->so_lock)); 724*0Sstevel@tonic-gate error = copyout_name(name, namelen, namelenp, 725*0Sstevel@tonic-gate nso->so_faddr_sa, (socklen_t)nso->so_faddr_len); 726*0Sstevel@tonic-gate if (error) { 727*0Sstevel@tonic-gate setf(nfd, NULL); 728*0Sstevel@tonic-gate (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 729*0Sstevel@tonic-gate VN_RELE(nvp); 730*0Sstevel@tonic-gate return (set_errno(error)); 731*0Sstevel@tonic-gate } 732*0Sstevel@tonic-gate if (error = falloc(NULL, FWRITE|FREAD, &nfp, NULL)) { 733*0Sstevel@tonic-gate setf(nfd, NULL); 734*0Sstevel@tonic-gate (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); 735*0Sstevel@tonic-gate VN_RELE(nvp); 736*0Sstevel@tonic-gate eprintsoline(so, error); 737*0Sstevel@tonic-gate return (set_errno(error)); 738*0Sstevel@tonic-gate } 739*0Sstevel@tonic-gate /* 740*0Sstevel@tonic-gate * fill in the entries that falloc reserved 741*0Sstevel@tonic-gate */ 742*0Sstevel@tonic-gate nfp->f_vnode = nvp; 743*0Sstevel@tonic-gate mutex_exit(&nfp->f_tlock); 744*0Sstevel@tonic-gate setf(nfd, nfp); 745*0Sstevel@tonic-gate 746*0Sstevel@tonic-gate /* 747*0Sstevel@tonic-gate * Copy FNDELAY and FNONBLOCK from listener to acceptor 748*0Sstevel@tonic-gate */ 749*0Sstevel@tonic-gate if (so->so_state & (SS_NDELAY|SS_NONBLOCK)) { 750*0Sstevel@tonic-gate uint_t oflag = nfp->f_flag; 751*0Sstevel@tonic-gate int arg = 0; 752*0Sstevel@tonic-gate 753*0Sstevel@tonic-gate if (so->so_state & SS_NONBLOCK) 754*0Sstevel@tonic-gate arg |= FNONBLOCK; 755*0Sstevel@tonic-gate else if (so->so_state & SS_NDELAY) 756*0Sstevel@tonic-gate arg |= FNDELAY; 757*0Sstevel@tonic-gate 758*0Sstevel@tonic-gate /* 759*0Sstevel@tonic-gate * This code is a simplification of the F_SETFL code in fcntl() 760*0Sstevel@tonic-gate * Ignore any errors from VOP_SETFL. 761*0Sstevel@tonic-gate */ 762*0Sstevel@tonic-gate if ((error = VOP_SETFL(nvp, oflag, arg, nfp->f_cred)) != 0) { 763*0Sstevel@tonic-gate eprintsoline(so, error); 764*0Sstevel@tonic-gate error = 0; 765*0Sstevel@tonic-gate } else { 766*0Sstevel@tonic-gate mutex_enter(&nfp->f_tlock); 767*0Sstevel@tonic-gate nfp->f_flag &= ~FMASK | (FREAD|FWRITE); 768*0Sstevel@tonic-gate nfp->f_flag |= arg; 769*0Sstevel@tonic-gate mutex_exit(&nfp->f_tlock); 770*0Sstevel@tonic-gate } 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate return (nfd); 773*0Sstevel@tonic-gate } 774*0Sstevel@tonic-gate 775*0Sstevel@tonic-gate int 776*0Sstevel@tonic-gate connect(int sock, struct sockaddr *name, socklen_t namelen, int version) 777*0Sstevel@tonic-gate { 778*0Sstevel@tonic-gate struct sonode *so; 779*0Sstevel@tonic-gate file_t *fp; 780*0Sstevel@tonic-gate int error; 781*0Sstevel@tonic-gate 782*0Sstevel@tonic-gate dprint(1, ("connect(%d, %p, %d)\n", 783*0Sstevel@tonic-gate sock, name, namelen)); 784*0Sstevel@tonic-gate 785*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, &fp)) == NULL) 786*0Sstevel@tonic-gate return (set_errno(error)); 787*0Sstevel@tonic-gate 788*0Sstevel@tonic-gate /* Allocate and copyin name */ 789*0Sstevel@tonic-gate if (namelen != 0) { 790*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 791*0Sstevel@tonic-gate name = copyin_name(so, name, &namelen, &error); 792*0Sstevel@tonic-gate if (name == NULL) { 793*0Sstevel@tonic-gate releasef(sock); 794*0Sstevel@tonic-gate return (set_errno(error)); 795*0Sstevel@tonic-gate } 796*0Sstevel@tonic-gate } else 797*0Sstevel@tonic-gate name = NULL; 798*0Sstevel@tonic-gate 799*0Sstevel@tonic-gate error = SOP_CONNECT(so, name, namelen, fp->f_flag, 800*0Sstevel@tonic-gate (version != SOV_XPG4_2) ? 0 : _SOCONNECT_XPG4_2); 801*0Sstevel@tonic-gate releasef(sock); 802*0Sstevel@tonic-gate if (name) 803*0Sstevel@tonic-gate kmem_free(name, (size_t)namelen); 804*0Sstevel@tonic-gate if (error) 805*0Sstevel@tonic-gate return (set_errno(error)); 806*0Sstevel@tonic-gate return (0); 807*0Sstevel@tonic-gate } 808*0Sstevel@tonic-gate 809*0Sstevel@tonic-gate /*ARGSUSED2*/ 810*0Sstevel@tonic-gate int 811*0Sstevel@tonic-gate shutdown(int sock, int how, int version) 812*0Sstevel@tonic-gate { 813*0Sstevel@tonic-gate struct sonode *so; 814*0Sstevel@tonic-gate int error; 815*0Sstevel@tonic-gate 816*0Sstevel@tonic-gate dprint(1, ("shutdown(%d, %d)\n", 817*0Sstevel@tonic-gate sock, how)); 818*0Sstevel@tonic-gate 819*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 820*0Sstevel@tonic-gate return (set_errno(error)); 821*0Sstevel@tonic-gate 822*0Sstevel@tonic-gate error = SOP_SHUTDOWN(so, how); 823*0Sstevel@tonic-gate 824*0Sstevel@tonic-gate releasef(sock); 825*0Sstevel@tonic-gate if (error) 826*0Sstevel@tonic-gate return (set_errno(error)); 827*0Sstevel@tonic-gate return (0); 828*0Sstevel@tonic-gate } 829*0Sstevel@tonic-gate 830*0Sstevel@tonic-gate /* 831*0Sstevel@tonic-gate * Common receive routine. 832*0Sstevel@tonic-gate */ 833*0Sstevel@tonic-gate static ssize_t 834*0Sstevel@tonic-gate recvit(int sock, 835*0Sstevel@tonic-gate struct nmsghdr *msg, 836*0Sstevel@tonic-gate struct uio *uiop, 837*0Sstevel@tonic-gate int flags, 838*0Sstevel@tonic-gate socklen_t *namelenp, 839*0Sstevel@tonic-gate socklen_t *controllenp, 840*0Sstevel@tonic-gate int *flagsp) 841*0Sstevel@tonic-gate { 842*0Sstevel@tonic-gate struct sonode *so; 843*0Sstevel@tonic-gate file_t *fp; 844*0Sstevel@tonic-gate void *name; 845*0Sstevel@tonic-gate socklen_t namelen; 846*0Sstevel@tonic-gate void *control; 847*0Sstevel@tonic-gate socklen_t controllen; 848*0Sstevel@tonic-gate ssize_t len; 849*0Sstevel@tonic-gate int error; 850*0Sstevel@tonic-gate 851*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, &fp)) == NULL) 852*0Sstevel@tonic-gate return (set_errno(error)); 853*0Sstevel@tonic-gate 854*0Sstevel@tonic-gate len = uiop->uio_resid; 855*0Sstevel@tonic-gate uiop->uio_fmode = fp->f_flag; 856*0Sstevel@tonic-gate uiop->uio_extflg = UIO_COPY_CACHED; 857*0Sstevel@tonic-gate 858*0Sstevel@tonic-gate name = msg->msg_name; 859*0Sstevel@tonic-gate namelen = msg->msg_namelen; 860*0Sstevel@tonic-gate control = msg->msg_control; 861*0Sstevel@tonic-gate controllen = msg->msg_controllen; 862*0Sstevel@tonic-gate 863*0Sstevel@tonic-gate msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL | 864*0Sstevel@tonic-gate MSG_DONTWAIT | MSG_XPG4_2); 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate error = SOP_RECVMSG(so, msg, uiop); 867*0Sstevel@tonic-gate if (error) { 868*0Sstevel@tonic-gate releasef(sock); 869*0Sstevel@tonic-gate return (set_errno(error)); 870*0Sstevel@tonic-gate } 871*0Sstevel@tonic-gate lwp_stat_update(LWP_STAT_MSGRCV, 1); 872*0Sstevel@tonic-gate so_update_attrs(so, SOACC); 873*0Sstevel@tonic-gate releasef(sock); 874*0Sstevel@tonic-gate 875*0Sstevel@tonic-gate error = copyout_name(name, namelen, namelenp, 876*0Sstevel@tonic-gate msg->msg_name, msg->msg_namelen); 877*0Sstevel@tonic-gate if (error) 878*0Sstevel@tonic-gate goto err; 879*0Sstevel@tonic-gate 880*0Sstevel@tonic-gate if (flagsp != NULL) { 881*0Sstevel@tonic-gate /* 882*0Sstevel@tonic-gate * Clear internal flag. 883*0Sstevel@tonic-gate */ 884*0Sstevel@tonic-gate msg->msg_flags &= ~MSG_XPG4_2; 885*0Sstevel@tonic-gate 886*0Sstevel@tonic-gate /* 887*0Sstevel@tonic-gate * Determine MSG_CTRUNC. sorecvmsg sets MSG_CTRUNC only 888*0Sstevel@tonic-gate * when controllen is zero and there is control data to 889*0Sstevel@tonic-gate * copy out. 890*0Sstevel@tonic-gate */ 891*0Sstevel@tonic-gate if (controllen != 0 && 892*0Sstevel@tonic-gate (msg->msg_controllen > controllen || control == NULL)) { 893*0Sstevel@tonic-gate dprint(1, ("recvit: CTRUNC %d %d %p\n", 894*0Sstevel@tonic-gate msg->msg_controllen, controllen, control)); 895*0Sstevel@tonic-gate 896*0Sstevel@tonic-gate msg->msg_flags |= MSG_CTRUNC; 897*0Sstevel@tonic-gate } 898*0Sstevel@tonic-gate if (copyout(&msg->msg_flags, flagsp, 899*0Sstevel@tonic-gate sizeof (msg->msg_flags))) { 900*0Sstevel@tonic-gate error = EFAULT; 901*0Sstevel@tonic-gate goto err; 902*0Sstevel@tonic-gate } 903*0Sstevel@tonic-gate } 904*0Sstevel@tonic-gate /* 905*0Sstevel@tonic-gate * Note: This MUST be done last. There can be no "goto err" after this 906*0Sstevel@tonic-gate * point since it could make so_closefds run twice on some part 907*0Sstevel@tonic-gate * of the file descriptor array. 908*0Sstevel@tonic-gate */ 909*0Sstevel@tonic-gate if (controllen != 0) { 910*0Sstevel@tonic-gate if (!(flags & MSG_XPG4_2)) { 911*0Sstevel@tonic-gate /* 912*0Sstevel@tonic-gate * Good old msg_accrights can only return a multiple 913*0Sstevel@tonic-gate * of 4 bytes. 914*0Sstevel@tonic-gate */ 915*0Sstevel@tonic-gate controllen &= ~((int)sizeof (uint32_t) - 1); 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate error = copyout_arg(control, controllen, controllenp, 918*0Sstevel@tonic-gate msg->msg_control, msg->msg_controllen); 919*0Sstevel@tonic-gate if (error) 920*0Sstevel@tonic-gate goto err; 921*0Sstevel@tonic-gate 922*0Sstevel@tonic-gate if (msg->msg_controllen > controllen || control == NULL) { 923*0Sstevel@tonic-gate if (control == NULL) 924*0Sstevel@tonic-gate controllen = 0; 925*0Sstevel@tonic-gate so_closefds(msg->msg_control, msg->msg_controllen, 926*0Sstevel@tonic-gate !(flags & MSG_XPG4_2), controllen); 927*0Sstevel@tonic-gate } 928*0Sstevel@tonic-gate } 929*0Sstevel@tonic-gate if (msg->msg_namelen != 0) 930*0Sstevel@tonic-gate kmem_free(msg->msg_name, (size_t)msg->msg_namelen); 931*0Sstevel@tonic-gate if (msg->msg_controllen != 0) 932*0Sstevel@tonic-gate kmem_free(msg->msg_control, (size_t)msg->msg_controllen); 933*0Sstevel@tonic-gate return (len - uiop->uio_resid); 934*0Sstevel@tonic-gate 935*0Sstevel@tonic-gate err: 936*0Sstevel@tonic-gate /* 937*0Sstevel@tonic-gate * If we fail and the control part contains file descriptors 938*0Sstevel@tonic-gate * we have to close the fd's. 939*0Sstevel@tonic-gate */ 940*0Sstevel@tonic-gate if (msg->msg_controllen != 0) 941*0Sstevel@tonic-gate so_closefds(msg->msg_control, msg->msg_controllen, 942*0Sstevel@tonic-gate !(flags & MSG_XPG4_2), 0); 943*0Sstevel@tonic-gate if (msg->msg_namelen != 0) 944*0Sstevel@tonic-gate kmem_free(msg->msg_name, (size_t)msg->msg_namelen); 945*0Sstevel@tonic-gate if (msg->msg_controllen != 0) 946*0Sstevel@tonic-gate kmem_free(msg->msg_control, (size_t)msg->msg_controllen); 947*0Sstevel@tonic-gate return (set_errno(error)); 948*0Sstevel@tonic-gate } 949*0Sstevel@tonic-gate 950*0Sstevel@tonic-gate /* 951*0Sstevel@tonic-gate * Native system call 952*0Sstevel@tonic-gate */ 953*0Sstevel@tonic-gate ssize_t 954*0Sstevel@tonic-gate recv(int sock, void *buffer, size_t len, int flags) 955*0Sstevel@tonic-gate { 956*0Sstevel@tonic-gate struct nmsghdr lmsg; 957*0Sstevel@tonic-gate struct uio auio; 958*0Sstevel@tonic-gate struct iovec aiov[1]; 959*0Sstevel@tonic-gate 960*0Sstevel@tonic-gate dprint(1, ("recv(%d, %p, %ld, %d)\n", 961*0Sstevel@tonic-gate sock, buffer, len, flags)); 962*0Sstevel@tonic-gate 963*0Sstevel@tonic-gate if ((ssize_t)len < 0) { 964*0Sstevel@tonic-gate return (set_errno(EINVAL)); 965*0Sstevel@tonic-gate } 966*0Sstevel@tonic-gate 967*0Sstevel@tonic-gate aiov[0].iov_base = buffer; 968*0Sstevel@tonic-gate aiov[0].iov_len = len; 969*0Sstevel@tonic-gate auio.uio_loffset = 0; 970*0Sstevel@tonic-gate auio.uio_iov = aiov; 971*0Sstevel@tonic-gate auio.uio_iovcnt = 1; 972*0Sstevel@tonic-gate auio.uio_resid = len; 973*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 974*0Sstevel@tonic-gate auio.uio_limit = 0; 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate lmsg.msg_namelen = 0; 977*0Sstevel@tonic-gate lmsg.msg_controllen = 0; 978*0Sstevel@tonic-gate lmsg.msg_flags = 0; 979*0Sstevel@tonic-gate return (recvit(sock, &lmsg, &auio, flags, NULL, NULL, NULL)); 980*0Sstevel@tonic-gate } 981*0Sstevel@tonic-gate 982*0Sstevel@tonic-gate ssize_t 983*0Sstevel@tonic-gate recvfrom(int sock, void *buffer, size_t len, int flags, 984*0Sstevel@tonic-gate struct sockaddr *name, socklen_t *namelenp) 985*0Sstevel@tonic-gate { 986*0Sstevel@tonic-gate struct nmsghdr lmsg; 987*0Sstevel@tonic-gate struct uio auio; 988*0Sstevel@tonic-gate struct iovec aiov[1]; 989*0Sstevel@tonic-gate 990*0Sstevel@tonic-gate dprint(1, ("recvfrom(%d, %p, %ld, %d, %p, %p)\n", 991*0Sstevel@tonic-gate sock, buffer, len, flags, name, namelenp)); 992*0Sstevel@tonic-gate 993*0Sstevel@tonic-gate if ((ssize_t)len < 0) { 994*0Sstevel@tonic-gate return (set_errno(EINVAL)); 995*0Sstevel@tonic-gate } 996*0Sstevel@tonic-gate 997*0Sstevel@tonic-gate aiov[0].iov_base = buffer; 998*0Sstevel@tonic-gate aiov[0].iov_len = len; 999*0Sstevel@tonic-gate auio.uio_loffset = 0; 1000*0Sstevel@tonic-gate auio.uio_iov = aiov; 1001*0Sstevel@tonic-gate auio.uio_iovcnt = 1; 1002*0Sstevel@tonic-gate auio.uio_resid = len; 1003*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1004*0Sstevel@tonic-gate auio.uio_limit = 0; 1005*0Sstevel@tonic-gate 1006*0Sstevel@tonic-gate lmsg.msg_name = (char *)name; 1007*0Sstevel@tonic-gate if (namelenp != NULL) { 1008*0Sstevel@tonic-gate if (copyin(namelenp, &lmsg.msg_namelen, 1009*0Sstevel@tonic-gate sizeof (lmsg.msg_namelen))) 1010*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1011*0Sstevel@tonic-gate } else { 1012*0Sstevel@tonic-gate lmsg.msg_namelen = 0; 1013*0Sstevel@tonic-gate } 1014*0Sstevel@tonic-gate lmsg.msg_controllen = 0; 1015*0Sstevel@tonic-gate lmsg.msg_flags = 0; 1016*0Sstevel@tonic-gate 1017*0Sstevel@tonic-gate return (recvit(sock, &lmsg, &auio, flags, namelenp, NULL, NULL)); 1018*0Sstevel@tonic-gate } 1019*0Sstevel@tonic-gate 1020*0Sstevel@tonic-gate /* 1021*0Sstevel@tonic-gate * Uses the MSG_XPG4_2 flag to determine if the caller is using 1022*0Sstevel@tonic-gate * struct omsghdr or struct nmsghdr. 1023*0Sstevel@tonic-gate */ 1024*0Sstevel@tonic-gate ssize_t 1025*0Sstevel@tonic-gate recvmsg(int sock, struct nmsghdr *msg, int flags) 1026*0Sstevel@tonic-gate { 1027*0Sstevel@tonic-gate STRUCT_DECL(nmsghdr, u_lmsg); 1028*0Sstevel@tonic-gate STRUCT_HANDLE(nmsghdr, umsgptr); 1029*0Sstevel@tonic-gate struct nmsghdr lmsg; 1030*0Sstevel@tonic-gate struct uio auio; 1031*0Sstevel@tonic-gate struct iovec aiov[MSG_MAXIOVLEN]; 1032*0Sstevel@tonic-gate int iovcnt; 1033*0Sstevel@tonic-gate ssize_t len; 1034*0Sstevel@tonic-gate int i; 1035*0Sstevel@tonic-gate int *flagsp; 1036*0Sstevel@tonic-gate model_t model; 1037*0Sstevel@tonic-gate 1038*0Sstevel@tonic-gate dprint(1, ("recvmsg(%d, %p, %d)\n", 1039*0Sstevel@tonic-gate sock, msg, flags)); 1040*0Sstevel@tonic-gate 1041*0Sstevel@tonic-gate model = get_udatamodel(); 1042*0Sstevel@tonic-gate STRUCT_INIT(u_lmsg, model); 1043*0Sstevel@tonic-gate STRUCT_SET_HANDLE(umsgptr, model, msg); 1044*0Sstevel@tonic-gate 1045*0Sstevel@tonic-gate if (flags & MSG_XPG4_2) { 1046*0Sstevel@tonic-gate if (copyin(msg, STRUCT_BUF(u_lmsg), STRUCT_SIZE(u_lmsg))) 1047*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1048*0Sstevel@tonic-gate flagsp = STRUCT_FADDR(umsgptr, msg_flags); 1049*0Sstevel@tonic-gate } else { 1050*0Sstevel@tonic-gate /* 1051*0Sstevel@tonic-gate * Assumes that nmsghdr and omsghdr are identically shaped 1052*0Sstevel@tonic-gate * except for the added msg_flags field. 1053*0Sstevel@tonic-gate */ 1054*0Sstevel@tonic-gate if (copyin(msg, STRUCT_BUF(u_lmsg), 1055*0Sstevel@tonic-gate SIZEOF_STRUCT(omsghdr, model))) 1056*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1057*0Sstevel@tonic-gate STRUCT_FSET(u_lmsg, msg_flags, 0); 1058*0Sstevel@tonic-gate flagsp = NULL; 1059*0Sstevel@tonic-gate } 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate /* 1062*0Sstevel@tonic-gate * Code below us will kmem_alloc memory and hang it 1063*0Sstevel@tonic-gate * off msg_control and msg_name fields. This forces 1064*0Sstevel@tonic-gate * us to copy the structure to its native form. 1065*0Sstevel@tonic-gate */ 1066*0Sstevel@tonic-gate lmsg.msg_name = STRUCT_FGETP(u_lmsg, msg_name); 1067*0Sstevel@tonic-gate lmsg.msg_namelen = STRUCT_FGET(u_lmsg, msg_namelen); 1068*0Sstevel@tonic-gate lmsg.msg_iov = STRUCT_FGETP(u_lmsg, msg_iov); 1069*0Sstevel@tonic-gate lmsg.msg_iovlen = STRUCT_FGET(u_lmsg, msg_iovlen); 1070*0Sstevel@tonic-gate lmsg.msg_control = STRUCT_FGETP(u_lmsg, msg_control); 1071*0Sstevel@tonic-gate lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen); 1072*0Sstevel@tonic-gate lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags); 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate iovcnt = lmsg.msg_iovlen; 1075*0Sstevel@tonic-gate 1076*0Sstevel@tonic-gate if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) { 1077*0Sstevel@tonic-gate return (set_errno(EMSGSIZE)); 1078*0Sstevel@tonic-gate } 1079*0Sstevel@tonic-gate 1080*0Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1081*0Sstevel@tonic-gate /* 1082*0Sstevel@tonic-gate * 32-bit callers need to have their iovec expanded, while ensuring 1083*0Sstevel@tonic-gate * that they can't move more than 2Gbytes of data in a single call. 1084*0Sstevel@tonic-gate */ 1085*0Sstevel@tonic-gate if (model == DATAMODEL_ILP32) { 1086*0Sstevel@tonic-gate struct iovec32 aiov32[MSG_MAXIOVLEN]; 1087*0Sstevel@tonic-gate ssize32_t count32; 1088*0Sstevel@tonic-gate 1089*0Sstevel@tonic-gate if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32, 1090*0Sstevel@tonic-gate iovcnt * sizeof (struct iovec32))) 1091*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1092*0Sstevel@tonic-gate 1093*0Sstevel@tonic-gate count32 = 0; 1094*0Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 1095*0Sstevel@tonic-gate ssize32_t iovlen32; 1096*0Sstevel@tonic-gate 1097*0Sstevel@tonic-gate iovlen32 = aiov32[i].iov_len; 1098*0Sstevel@tonic-gate count32 += iovlen32; 1099*0Sstevel@tonic-gate if (iovlen32 < 0 || count32 < 0) 1100*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1101*0Sstevel@tonic-gate aiov[i].iov_len = iovlen32; 1102*0Sstevel@tonic-gate aiov[i].iov_base = 1103*0Sstevel@tonic-gate (caddr_t)(uintptr_t)aiov32[i].iov_base; 1104*0Sstevel@tonic-gate } 1105*0Sstevel@tonic-gate } else 1106*0Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1107*0Sstevel@tonic-gate if (copyin(lmsg.msg_iov, aiov, iovcnt * sizeof (struct iovec))) { 1108*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1109*0Sstevel@tonic-gate } 1110*0Sstevel@tonic-gate len = 0; 1111*0Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 1112*0Sstevel@tonic-gate ssize_t iovlen = aiov[i].iov_len; 1113*0Sstevel@tonic-gate len += iovlen; 1114*0Sstevel@tonic-gate if (iovlen < 0 || len < 0) { 1115*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1116*0Sstevel@tonic-gate } 1117*0Sstevel@tonic-gate } 1118*0Sstevel@tonic-gate auio.uio_loffset = 0; 1119*0Sstevel@tonic-gate auio.uio_iov = aiov; 1120*0Sstevel@tonic-gate auio.uio_iovcnt = iovcnt; 1121*0Sstevel@tonic-gate auio.uio_resid = len; 1122*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1123*0Sstevel@tonic-gate auio.uio_limit = 0; 1124*0Sstevel@tonic-gate 1125*0Sstevel@tonic-gate if (lmsg.msg_control != NULL && 1126*0Sstevel@tonic-gate (do_useracc == 0 || 1127*0Sstevel@tonic-gate useracc(lmsg.msg_control, lmsg.msg_controllen, 1128*0Sstevel@tonic-gate B_WRITE) != 0)) { 1129*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1130*0Sstevel@tonic-gate } 1131*0Sstevel@tonic-gate 1132*0Sstevel@tonic-gate return (recvit(sock, &lmsg, &auio, flags, 1133*0Sstevel@tonic-gate STRUCT_FADDR(umsgptr, msg_namelen), 1134*0Sstevel@tonic-gate STRUCT_FADDR(umsgptr, msg_controllen), flagsp)); 1135*0Sstevel@tonic-gate } 1136*0Sstevel@tonic-gate 1137*0Sstevel@tonic-gate /* 1138*0Sstevel@tonic-gate * Common send function. 1139*0Sstevel@tonic-gate */ 1140*0Sstevel@tonic-gate static ssize_t 1141*0Sstevel@tonic-gate sendit(int sock, struct nmsghdr *msg, struct uio *uiop, int flags) 1142*0Sstevel@tonic-gate { 1143*0Sstevel@tonic-gate struct sonode *so; 1144*0Sstevel@tonic-gate file_t *fp; 1145*0Sstevel@tonic-gate void *name; 1146*0Sstevel@tonic-gate socklen_t namelen; 1147*0Sstevel@tonic-gate void *control; 1148*0Sstevel@tonic-gate socklen_t controllen; 1149*0Sstevel@tonic-gate ssize_t len; 1150*0Sstevel@tonic-gate int error; 1151*0Sstevel@tonic-gate 1152*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, &fp)) == NULL) 1153*0Sstevel@tonic-gate return (set_errno(error)); 1154*0Sstevel@tonic-gate 1155*0Sstevel@tonic-gate uiop->uio_fmode = fp->f_flag; 1156*0Sstevel@tonic-gate 1157*0Sstevel@tonic-gate if (so->so_family == AF_UNIX) 1158*0Sstevel@tonic-gate uiop->uio_extflg = UIO_COPY_CACHED; 1159*0Sstevel@tonic-gate else 1160*0Sstevel@tonic-gate uiop->uio_extflg = UIO_COPY_DEFAULT; 1161*0Sstevel@tonic-gate 1162*0Sstevel@tonic-gate /* Allocate and copyin name and control */ 1163*0Sstevel@tonic-gate name = msg->msg_name; 1164*0Sstevel@tonic-gate namelen = msg->msg_namelen; 1165*0Sstevel@tonic-gate if (name != NULL && namelen != 0) { 1166*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1167*0Sstevel@tonic-gate name = copyin_name(so, 1168*0Sstevel@tonic-gate (struct sockaddr *)name, 1169*0Sstevel@tonic-gate &namelen, &error); 1170*0Sstevel@tonic-gate if (name == NULL) 1171*0Sstevel@tonic-gate goto done3; 1172*0Sstevel@tonic-gate /* copyin_name null terminates addresses for AF_UNIX */ 1173*0Sstevel@tonic-gate msg->msg_namelen = namelen; 1174*0Sstevel@tonic-gate msg->msg_name = name; 1175*0Sstevel@tonic-gate } else { 1176*0Sstevel@tonic-gate msg->msg_name = name = NULL; 1177*0Sstevel@tonic-gate msg->msg_namelen = namelen = 0; 1178*0Sstevel@tonic-gate } 1179*0Sstevel@tonic-gate 1180*0Sstevel@tonic-gate control = msg->msg_control; 1181*0Sstevel@tonic-gate controllen = msg->msg_controllen; 1182*0Sstevel@tonic-gate if ((control != NULL) && (controllen != 0)) { 1183*0Sstevel@tonic-gate /* 1184*0Sstevel@tonic-gate * Verify that the length is not excessive to prevent 1185*0Sstevel@tonic-gate * an application from consuming all of kernel memory. 1186*0Sstevel@tonic-gate */ 1187*0Sstevel@tonic-gate if (controllen > SO_MAXARGSIZE) { 1188*0Sstevel@tonic-gate error = EINVAL; 1189*0Sstevel@tonic-gate goto done2; 1190*0Sstevel@tonic-gate } 1191*0Sstevel@tonic-gate control = kmem_alloc(controllen, KM_SLEEP); 1192*0Sstevel@tonic-gate 1193*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1194*0Sstevel@tonic-gate if (copyin(msg->msg_control, control, controllen)) { 1195*0Sstevel@tonic-gate error = EFAULT; 1196*0Sstevel@tonic-gate goto done1; 1197*0Sstevel@tonic-gate } 1198*0Sstevel@tonic-gate msg->msg_control = control; 1199*0Sstevel@tonic-gate } else { 1200*0Sstevel@tonic-gate msg->msg_control = control = NULL; 1201*0Sstevel@tonic-gate msg->msg_controllen = controllen = 0; 1202*0Sstevel@tonic-gate } 1203*0Sstevel@tonic-gate 1204*0Sstevel@tonic-gate len = uiop->uio_resid; 1205*0Sstevel@tonic-gate msg->msg_flags = flags; 1206*0Sstevel@tonic-gate 1207*0Sstevel@tonic-gate error = SOP_SENDMSG(so, msg, uiop); 1208*0Sstevel@tonic-gate done1: 1209*0Sstevel@tonic-gate if (control != NULL) 1210*0Sstevel@tonic-gate kmem_free(control, controllen); 1211*0Sstevel@tonic-gate done2: 1212*0Sstevel@tonic-gate if (name != NULL) 1213*0Sstevel@tonic-gate kmem_free(name, namelen); 1214*0Sstevel@tonic-gate done3: 1215*0Sstevel@tonic-gate if (error != 0) { 1216*0Sstevel@tonic-gate releasef(sock); 1217*0Sstevel@tonic-gate return (set_errno(error)); 1218*0Sstevel@tonic-gate } 1219*0Sstevel@tonic-gate lwp_stat_update(LWP_STAT_MSGSND, 1); 1220*0Sstevel@tonic-gate so_update_attrs(so, SOMOD); 1221*0Sstevel@tonic-gate releasef(sock); 1222*0Sstevel@tonic-gate return (len - uiop->uio_resid); 1223*0Sstevel@tonic-gate } 1224*0Sstevel@tonic-gate 1225*0Sstevel@tonic-gate /* 1226*0Sstevel@tonic-gate * Native system call 1227*0Sstevel@tonic-gate */ 1228*0Sstevel@tonic-gate ssize_t 1229*0Sstevel@tonic-gate send(int sock, void *buffer, size_t len, int flags) 1230*0Sstevel@tonic-gate { 1231*0Sstevel@tonic-gate struct nmsghdr lmsg; 1232*0Sstevel@tonic-gate struct uio auio; 1233*0Sstevel@tonic-gate struct iovec aiov[1]; 1234*0Sstevel@tonic-gate 1235*0Sstevel@tonic-gate dprint(1, ("send(%d, %p, %ld, %d)\n", 1236*0Sstevel@tonic-gate sock, buffer, len, flags)); 1237*0Sstevel@tonic-gate 1238*0Sstevel@tonic-gate if ((ssize_t)len < 0) { 1239*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1240*0Sstevel@tonic-gate } 1241*0Sstevel@tonic-gate 1242*0Sstevel@tonic-gate aiov[0].iov_base = buffer; 1243*0Sstevel@tonic-gate aiov[0].iov_len = len; 1244*0Sstevel@tonic-gate auio.uio_loffset = 0; 1245*0Sstevel@tonic-gate auio.uio_iov = aiov; 1246*0Sstevel@tonic-gate auio.uio_iovcnt = 1; 1247*0Sstevel@tonic-gate auio.uio_resid = len; 1248*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1249*0Sstevel@tonic-gate auio.uio_limit = 0; 1250*0Sstevel@tonic-gate 1251*0Sstevel@tonic-gate lmsg.msg_name = NULL; 1252*0Sstevel@tonic-gate lmsg.msg_control = NULL; 1253*0Sstevel@tonic-gate if (!(flags & MSG_XPG4_2)) { 1254*0Sstevel@tonic-gate /* 1255*0Sstevel@tonic-gate * In order to be compatible with the libsocket/sockmod 1256*0Sstevel@tonic-gate * implementation we set EOR for all send* calls. 1257*0Sstevel@tonic-gate */ 1258*0Sstevel@tonic-gate flags |= MSG_EOR; 1259*0Sstevel@tonic-gate } 1260*0Sstevel@tonic-gate return (sendit(sock, &lmsg, &auio, flags)); 1261*0Sstevel@tonic-gate } 1262*0Sstevel@tonic-gate 1263*0Sstevel@tonic-gate /* 1264*0Sstevel@tonic-gate * Uses the MSG_XPG4_2 flag to determine if the caller is using 1265*0Sstevel@tonic-gate * struct omsghdr or struct nmsghdr. 1266*0Sstevel@tonic-gate */ 1267*0Sstevel@tonic-gate ssize_t 1268*0Sstevel@tonic-gate sendmsg(int sock, struct nmsghdr *msg, int flags) 1269*0Sstevel@tonic-gate { 1270*0Sstevel@tonic-gate struct nmsghdr lmsg; 1271*0Sstevel@tonic-gate STRUCT_DECL(nmsghdr, u_lmsg); 1272*0Sstevel@tonic-gate struct uio auio; 1273*0Sstevel@tonic-gate struct iovec aiov[MSG_MAXIOVLEN]; 1274*0Sstevel@tonic-gate int iovcnt; 1275*0Sstevel@tonic-gate ssize_t len; 1276*0Sstevel@tonic-gate int i; 1277*0Sstevel@tonic-gate model_t model; 1278*0Sstevel@tonic-gate 1279*0Sstevel@tonic-gate dprint(1, ("sendmsg(%d, %p, %d)\n", sock, msg, flags)); 1280*0Sstevel@tonic-gate 1281*0Sstevel@tonic-gate model = get_udatamodel(); 1282*0Sstevel@tonic-gate STRUCT_INIT(u_lmsg, model); 1283*0Sstevel@tonic-gate 1284*0Sstevel@tonic-gate if (flags & MSG_XPG4_2) { 1285*0Sstevel@tonic-gate if (copyin(msg, (char *)STRUCT_BUF(u_lmsg), 1286*0Sstevel@tonic-gate STRUCT_SIZE(u_lmsg))) 1287*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1288*0Sstevel@tonic-gate } else { 1289*0Sstevel@tonic-gate /* 1290*0Sstevel@tonic-gate * Assumes that nmsghdr and omsghdr are identically shaped 1291*0Sstevel@tonic-gate * except for the added msg_flags field. 1292*0Sstevel@tonic-gate */ 1293*0Sstevel@tonic-gate if (copyin(msg, (char *)STRUCT_BUF(u_lmsg), 1294*0Sstevel@tonic-gate SIZEOF_STRUCT(omsghdr, model))) 1295*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1296*0Sstevel@tonic-gate /* 1297*0Sstevel@tonic-gate * In order to be compatible with the libsocket/sockmod 1298*0Sstevel@tonic-gate * implementation we set EOR for all send* calls. 1299*0Sstevel@tonic-gate */ 1300*0Sstevel@tonic-gate flags |= MSG_EOR; 1301*0Sstevel@tonic-gate } 1302*0Sstevel@tonic-gate 1303*0Sstevel@tonic-gate /* 1304*0Sstevel@tonic-gate * Code below us will kmem_alloc memory and hang it 1305*0Sstevel@tonic-gate * off msg_control and msg_name fields. This forces 1306*0Sstevel@tonic-gate * us to copy the structure to its native form. 1307*0Sstevel@tonic-gate */ 1308*0Sstevel@tonic-gate lmsg.msg_name = STRUCT_FGETP(u_lmsg, msg_name); 1309*0Sstevel@tonic-gate lmsg.msg_namelen = STRUCT_FGET(u_lmsg, msg_namelen); 1310*0Sstevel@tonic-gate lmsg.msg_iov = STRUCT_FGETP(u_lmsg, msg_iov); 1311*0Sstevel@tonic-gate lmsg.msg_iovlen = STRUCT_FGET(u_lmsg, msg_iovlen); 1312*0Sstevel@tonic-gate lmsg.msg_control = STRUCT_FGETP(u_lmsg, msg_control); 1313*0Sstevel@tonic-gate lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen); 1314*0Sstevel@tonic-gate lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags); 1315*0Sstevel@tonic-gate 1316*0Sstevel@tonic-gate iovcnt = lmsg.msg_iovlen; 1317*0Sstevel@tonic-gate 1318*0Sstevel@tonic-gate if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) { 1319*0Sstevel@tonic-gate /* 1320*0Sstevel@tonic-gate * Unless this is XPG 4.2 we allow iovcnt == 0 to 1321*0Sstevel@tonic-gate * be compatible with SunOS 4.X and 4.4BSD. 1322*0Sstevel@tonic-gate */ 1323*0Sstevel@tonic-gate if (iovcnt != 0 || (flags & MSG_XPG4_2)) 1324*0Sstevel@tonic-gate return (set_errno(EMSGSIZE)); 1325*0Sstevel@tonic-gate } 1326*0Sstevel@tonic-gate 1327*0Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1328*0Sstevel@tonic-gate /* 1329*0Sstevel@tonic-gate * 32-bit callers need to have their iovec expanded, while ensuring 1330*0Sstevel@tonic-gate * that they can't move more than 2Gbytes of data in a single call. 1331*0Sstevel@tonic-gate */ 1332*0Sstevel@tonic-gate if (model == DATAMODEL_ILP32) { 1333*0Sstevel@tonic-gate struct iovec32 aiov32[MSG_MAXIOVLEN]; 1334*0Sstevel@tonic-gate ssize32_t count32; 1335*0Sstevel@tonic-gate 1336*0Sstevel@tonic-gate if (iovcnt != 0 && 1337*0Sstevel@tonic-gate copyin((struct iovec32 *)lmsg.msg_iov, aiov32, 1338*0Sstevel@tonic-gate iovcnt * sizeof (struct iovec32))) 1339*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1340*0Sstevel@tonic-gate 1341*0Sstevel@tonic-gate count32 = 0; 1342*0Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 1343*0Sstevel@tonic-gate ssize32_t iovlen32; 1344*0Sstevel@tonic-gate 1345*0Sstevel@tonic-gate iovlen32 = aiov32[i].iov_len; 1346*0Sstevel@tonic-gate count32 += iovlen32; 1347*0Sstevel@tonic-gate if (iovlen32 < 0 || count32 < 0) 1348*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1349*0Sstevel@tonic-gate aiov[i].iov_len = iovlen32; 1350*0Sstevel@tonic-gate aiov[i].iov_base = 1351*0Sstevel@tonic-gate (caddr_t)(uintptr_t)aiov32[i].iov_base; 1352*0Sstevel@tonic-gate } 1353*0Sstevel@tonic-gate } else 1354*0Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1355*0Sstevel@tonic-gate if (iovcnt != 0 && 1356*0Sstevel@tonic-gate copyin(lmsg.msg_iov, aiov, 1357*0Sstevel@tonic-gate (unsigned)iovcnt * sizeof (struct iovec))) { 1358*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1359*0Sstevel@tonic-gate } 1360*0Sstevel@tonic-gate len = 0; 1361*0Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 1362*0Sstevel@tonic-gate ssize_t iovlen = aiov[i].iov_len; 1363*0Sstevel@tonic-gate len += iovlen; 1364*0Sstevel@tonic-gate if (iovlen < 0 || len < 0) { 1365*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1366*0Sstevel@tonic-gate } 1367*0Sstevel@tonic-gate } 1368*0Sstevel@tonic-gate auio.uio_loffset = 0; 1369*0Sstevel@tonic-gate auio.uio_iov = aiov; 1370*0Sstevel@tonic-gate auio.uio_iovcnt = iovcnt; 1371*0Sstevel@tonic-gate auio.uio_resid = len; 1372*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1373*0Sstevel@tonic-gate auio.uio_limit = 0; 1374*0Sstevel@tonic-gate 1375*0Sstevel@tonic-gate return (sendit(sock, &lmsg, &auio, flags)); 1376*0Sstevel@tonic-gate } 1377*0Sstevel@tonic-gate 1378*0Sstevel@tonic-gate ssize_t 1379*0Sstevel@tonic-gate sendto(int sock, void *buffer, size_t len, int flags, 1380*0Sstevel@tonic-gate struct sockaddr *name, socklen_t namelen) 1381*0Sstevel@tonic-gate { 1382*0Sstevel@tonic-gate struct nmsghdr lmsg; 1383*0Sstevel@tonic-gate struct uio auio; 1384*0Sstevel@tonic-gate struct iovec aiov[1]; 1385*0Sstevel@tonic-gate 1386*0Sstevel@tonic-gate dprint(1, ("sendto(%d, %p, %ld, %d, %p, %d)\n", 1387*0Sstevel@tonic-gate sock, buffer, len, flags, name, namelen)); 1388*0Sstevel@tonic-gate 1389*0Sstevel@tonic-gate if ((ssize_t)len < 0) { 1390*0Sstevel@tonic-gate return (set_errno(EINVAL)); 1391*0Sstevel@tonic-gate } 1392*0Sstevel@tonic-gate 1393*0Sstevel@tonic-gate aiov[0].iov_base = buffer; 1394*0Sstevel@tonic-gate aiov[0].iov_len = len; 1395*0Sstevel@tonic-gate auio.uio_loffset = 0; 1396*0Sstevel@tonic-gate auio.uio_iov = aiov; 1397*0Sstevel@tonic-gate auio.uio_iovcnt = 1; 1398*0Sstevel@tonic-gate auio.uio_resid = len; 1399*0Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1400*0Sstevel@tonic-gate auio.uio_limit = 0; 1401*0Sstevel@tonic-gate 1402*0Sstevel@tonic-gate lmsg.msg_name = (char *)name; 1403*0Sstevel@tonic-gate lmsg.msg_namelen = namelen; 1404*0Sstevel@tonic-gate lmsg.msg_control = NULL; 1405*0Sstevel@tonic-gate if (!(flags & MSG_XPG4_2)) { 1406*0Sstevel@tonic-gate /* 1407*0Sstevel@tonic-gate * In order to be compatible with the libsocket/sockmod 1408*0Sstevel@tonic-gate * implementation we set EOR for all send* calls. 1409*0Sstevel@tonic-gate */ 1410*0Sstevel@tonic-gate flags |= MSG_EOR; 1411*0Sstevel@tonic-gate } 1412*0Sstevel@tonic-gate return (sendit(sock, &lmsg, &auio, flags)); 1413*0Sstevel@tonic-gate } 1414*0Sstevel@tonic-gate 1415*0Sstevel@tonic-gate /*ARGSUSED3*/ 1416*0Sstevel@tonic-gate int 1417*0Sstevel@tonic-gate getpeername(int sock, struct sockaddr *name, socklen_t *namelenp, int version) 1418*0Sstevel@tonic-gate { 1419*0Sstevel@tonic-gate struct sonode *so; 1420*0Sstevel@tonic-gate int error; 1421*0Sstevel@tonic-gate socklen_t namelen; 1422*0Sstevel@tonic-gate union { 1423*0Sstevel@tonic-gate struct sockaddr_in sin; 1424*0Sstevel@tonic-gate struct sockaddr_in6 sin6; 1425*0Sstevel@tonic-gate } sin; /* Temporary buffer, common case */ 1426*0Sstevel@tonic-gate void *addr; /* Temporary buffer, uncommon case */ 1427*0Sstevel@tonic-gate socklen_t addrlen, size; 1428*0Sstevel@tonic-gate 1429*0Sstevel@tonic-gate dprint(1, ("getpeername(%d, %p, %p)\n", 1430*0Sstevel@tonic-gate sock, name, namelenp)); 1431*0Sstevel@tonic-gate 1432*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 1433*0Sstevel@tonic-gate goto bad; 1434*0Sstevel@tonic-gate 1435*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1436*0Sstevel@tonic-gate if (copyin(namelenp, &namelen, sizeof (namelen)) || 1437*0Sstevel@tonic-gate (name == NULL && namelen != 0)) { 1438*0Sstevel@tonic-gate error = EFAULT; 1439*0Sstevel@tonic-gate goto rel_out; 1440*0Sstevel@tonic-gate } 1441*0Sstevel@tonic-gate /* 1442*0Sstevel@tonic-gate * If a connect or accept has been done, unless we're an Xnet socket, 1443*0Sstevel@tonic-gate * the remote address has already been updated in so_faddr_sa. 1444*0Sstevel@tonic-gate */ 1445*0Sstevel@tonic-gate if (so->so_version != SOV_SOCKSTREAM && so->so_version != SOV_SOCKBSD || 1446*0Sstevel@tonic-gate !(so->so_state & SS_FADDR_VALID)) { 1447*0Sstevel@tonic-gate if ((error = SOP_GETPEERNAME(so)) != 0) 1448*0Sstevel@tonic-gate goto rel_out; 1449*0Sstevel@tonic-gate } 1450*0Sstevel@tonic-gate 1451*0Sstevel@tonic-gate if (so->so_faddr_maxlen <= sizeof (sin)) { 1452*0Sstevel@tonic-gate size = 0; 1453*0Sstevel@tonic-gate addr = &sin; 1454*0Sstevel@tonic-gate } else { 1455*0Sstevel@tonic-gate /* 1456*0Sstevel@tonic-gate * Allocate temporary to avoid holding so_lock across 1457*0Sstevel@tonic-gate * copyout 1458*0Sstevel@tonic-gate */ 1459*0Sstevel@tonic-gate size = so->so_faddr_maxlen; 1460*0Sstevel@tonic-gate addr = kmem_alloc(size, KM_SLEEP); 1461*0Sstevel@tonic-gate } 1462*0Sstevel@tonic-gate /* Prevent so_faddr_sa/len from changing while accessed */ 1463*0Sstevel@tonic-gate mutex_enter(&so->so_lock); 1464*0Sstevel@tonic-gate if (!(so->so_state & SS_ISCONNECTED)) { 1465*0Sstevel@tonic-gate mutex_exit(&so->so_lock); 1466*0Sstevel@tonic-gate error = ENOTCONN; 1467*0Sstevel@tonic-gate goto free_out; 1468*0Sstevel@tonic-gate } 1469*0Sstevel@tonic-gate addrlen = so->so_faddr_len; 1470*0Sstevel@tonic-gate bcopy(so->so_faddr_sa, addr, addrlen); 1471*0Sstevel@tonic-gate mutex_exit(&so->so_lock); 1472*0Sstevel@tonic-gate 1473*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1474*0Sstevel@tonic-gate error = copyout_name(name, namelen, namelenp, addr, 1475*0Sstevel@tonic-gate (so->so_state & SS_FADDR_NOXLATE) ? 0 : addrlen); 1476*0Sstevel@tonic-gate free_out: 1477*0Sstevel@tonic-gate if (size != 0) 1478*0Sstevel@tonic-gate kmem_free(addr, size); 1479*0Sstevel@tonic-gate rel_out: 1480*0Sstevel@tonic-gate releasef(sock); 1481*0Sstevel@tonic-gate bad: return (error != 0 ? set_errno(error) : 0); 1482*0Sstevel@tonic-gate } 1483*0Sstevel@tonic-gate 1484*0Sstevel@tonic-gate /*ARGSUSED3*/ 1485*0Sstevel@tonic-gate int 1486*0Sstevel@tonic-gate getsockname(int sock, struct sockaddr *name, 1487*0Sstevel@tonic-gate socklen_t *namelenp, int version) 1488*0Sstevel@tonic-gate { 1489*0Sstevel@tonic-gate struct sonode *so; 1490*0Sstevel@tonic-gate int error; 1491*0Sstevel@tonic-gate socklen_t namelen; 1492*0Sstevel@tonic-gate union { 1493*0Sstevel@tonic-gate struct sockaddr_in sin; 1494*0Sstevel@tonic-gate struct sockaddr_in6 sin6; 1495*0Sstevel@tonic-gate } sin; /* Temporary buffer, common case */ 1496*0Sstevel@tonic-gate void *addr; /* Temporary buffer, uncommon case */ 1497*0Sstevel@tonic-gate socklen_t addrlen, size; 1498*0Sstevel@tonic-gate 1499*0Sstevel@tonic-gate dprint(1, ("getsockname(%d, %p, %p)\n", 1500*0Sstevel@tonic-gate sock, name, namelenp)); 1501*0Sstevel@tonic-gate 1502*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 1503*0Sstevel@tonic-gate goto bad; 1504*0Sstevel@tonic-gate 1505*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1506*0Sstevel@tonic-gate if (copyin(namelenp, &namelen, sizeof (namelen)) || 1507*0Sstevel@tonic-gate (name == NULL && namelen != 0)) { 1508*0Sstevel@tonic-gate error = EFAULT; 1509*0Sstevel@tonic-gate goto rel_out; 1510*0Sstevel@tonic-gate } 1511*0Sstevel@tonic-gate 1512*0Sstevel@tonic-gate /* 1513*0Sstevel@tonic-gate * If a bind or accept has been done, unless we're an Xnet endpoint, 1514*0Sstevel@tonic-gate * the local address has already been updated in so_laddr_sa. 1515*0Sstevel@tonic-gate */ 1516*0Sstevel@tonic-gate if ((so->so_version != SOV_SOCKSTREAM && 1517*0Sstevel@tonic-gate so->so_version != SOV_SOCKBSD) || 1518*0Sstevel@tonic-gate !(so->so_state & SS_LADDR_VALID)) { 1519*0Sstevel@tonic-gate if ((error = SOP_GETSOCKNAME(so)) != 0) 1520*0Sstevel@tonic-gate goto rel_out; 1521*0Sstevel@tonic-gate } 1522*0Sstevel@tonic-gate 1523*0Sstevel@tonic-gate if (so->so_laddr_maxlen <= sizeof (sin)) { 1524*0Sstevel@tonic-gate size = 0; 1525*0Sstevel@tonic-gate addr = &sin; 1526*0Sstevel@tonic-gate } else { 1527*0Sstevel@tonic-gate /* 1528*0Sstevel@tonic-gate * Allocate temporary to avoid holding so_lock across 1529*0Sstevel@tonic-gate * copyout 1530*0Sstevel@tonic-gate */ 1531*0Sstevel@tonic-gate size = so->so_laddr_maxlen; 1532*0Sstevel@tonic-gate addr = kmem_alloc(size, KM_SLEEP); 1533*0Sstevel@tonic-gate } 1534*0Sstevel@tonic-gate /* Prevent so_laddr_sa/len from changing while accessed */ 1535*0Sstevel@tonic-gate mutex_enter(&so->so_lock); 1536*0Sstevel@tonic-gate addrlen = so->so_laddr_len; 1537*0Sstevel@tonic-gate bcopy(so->so_laddr_sa, addr, addrlen); 1538*0Sstevel@tonic-gate mutex_exit(&so->so_lock); 1539*0Sstevel@tonic-gate 1540*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1541*0Sstevel@tonic-gate error = copyout_name(name, namelen, namelenp, 1542*0Sstevel@tonic-gate addr, addrlen); 1543*0Sstevel@tonic-gate if (size != 0) 1544*0Sstevel@tonic-gate kmem_free(addr, size); 1545*0Sstevel@tonic-gate rel_out: 1546*0Sstevel@tonic-gate releasef(sock); 1547*0Sstevel@tonic-gate bad: return (error != 0 ? set_errno(error) : 0); 1548*0Sstevel@tonic-gate } 1549*0Sstevel@tonic-gate 1550*0Sstevel@tonic-gate /*ARGSUSED5*/ 1551*0Sstevel@tonic-gate int 1552*0Sstevel@tonic-gate getsockopt(int sock, 1553*0Sstevel@tonic-gate int level, 1554*0Sstevel@tonic-gate int option_name, 1555*0Sstevel@tonic-gate void *option_value, 1556*0Sstevel@tonic-gate socklen_t *option_lenp, 1557*0Sstevel@tonic-gate int version) 1558*0Sstevel@tonic-gate { 1559*0Sstevel@tonic-gate struct sonode *so; 1560*0Sstevel@tonic-gate socklen_t optlen, optlen_res; 1561*0Sstevel@tonic-gate void *optval; 1562*0Sstevel@tonic-gate int error; 1563*0Sstevel@tonic-gate 1564*0Sstevel@tonic-gate dprint(1, ("getsockopt(%d, %d, %d, %p, %p)\n", 1565*0Sstevel@tonic-gate sock, level, option_name, option_value, option_lenp)); 1566*0Sstevel@tonic-gate 1567*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 1568*0Sstevel@tonic-gate return (set_errno(error)); 1569*0Sstevel@tonic-gate 1570*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1571*0Sstevel@tonic-gate if (copyin(option_lenp, &optlen, sizeof (optlen))) { 1572*0Sstevel@tonic-gate releasef(sock); 1573*0Sstevel@tonic-gate return (set_errno(EFAULT)); 1574*0Sstevel@tonic-gate } 1575*0Sstevel@tonic-gate /* 1576*0Sstevel@tonic-gate * Verify that the length is not excessive to prevent 1577*0Sstevel@tonic-gate * an application from consuming all of kernel memory. 1578*0Sstevel@tonic-gate */ 1579*0Sstevel@tonic-gate if (optlen > SO_MAXARGSIZE) { 1580*0Sstevel@tonic-gate error = EINVAL; 1581*0Sstevel@tonic-gate releasef(sock); 1582*0Sstevel@tonic-gate return (set_errno(error)); 1583*0Sstevel@tonic-gate } 1584*0Sstevel@tonic-gate optval = kmem_alloc(optlen, KM_SLEEP); 1585*0Sstevel@tonic-gate optlen_res = optlen; 1586*0Sstevel@tonic-gate error = SOP_GETSOCKOPT(so, level, option_name, optval, 1587*0Sstevel@tonic-gate &optlen_res, (version != SOV_XPG4_2) ? 0 : _SOGETSOCKOPT_XPG4_2); 1588*0Sstevel@tonic-gate releasef(sock); 1589*0Sstevel@tonic-gate if (error) { 1590*0Sstevel@tonic-gate kmem_free(optval, optlen); 1591*0Sstevel@tonic-gate return (set_errno(error)); 1592*0Sstevel@tonic-gate } 1593*0Sstevel@tonic-gate error = copyout_arg(option_value, optlen, option_lenp, 1594*0Sstevel@tonic-gate optval, optlen_res); 1595*0Sstevel@tonic-gate kmem_free(optval, optlen); 1596*0Sstevel@tonic-gate if (error) 1597*0Sstevel@tonic-gate return (set_errno(error)); 1598*0Sstevel@tonic-gate return (0); 1599*0Sstevel@tonic-gate } 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gate /*ARGSUSED5*/ 1602*0Sstevel@tonic-gate int 1603*0Sstevel@tonic-gate setsockopt(int sock, 1604*0Sstevel@tonic-gate int level, 1605*0Sstevel@tonic-gate int option_name, 1606*0Sstevel@tonic-gate void *option_value, 1607*0Sstevel@tonic-gate socklen_t option_len, 1608*0Sstevel@tonic-gate int version) 1609*0Sstevel@tonic-gate { 1610*0Sstevel@tonic-gate struct sonode *so; 1611*0Sstevel@tonic-gate intptr_t buffer[2]; 1612*0Sstevel@tonic-gate void *optval = NULL; 1613*0Sstevel@tonic-gate int error; 1614*0Sstevel@tonic-gate 1615*0Sstevel@tonic-gate dprint(1, ("setsockopt(%d, %d, %d, %p, %d)\n", 1616*0Sstevel@tonic-gate sock, level, option_name, option_value, option_len)); 1617*0Sstevel@tonic-gate 1618*0Sstevel@tonic-gate if ((so = getsonode(sock, &error, NULL)) == NULL) 1619*0Sstevel@tonic-gate return (set_errno(error)); 1620*0Sstevel@tonic-gate 1621*0Sstevel@tonic-gate if (option_value != NULL) { 1622*0Sstevel@tonic-gate if (option_len != 0) { 1623*0Sstevel@tonic-gate /* 1624*0Sstevel@tonic-gate * Verify that the length is not excessive to prevent 1625*0Sstevel@tonic-gate * an application from consuming all of kernel memory. 1626*0Sstevel@tonic-gate */ 1627*0Sstevel@tonic-gate if (option_len > SO_MAXARGSIZE) { 1628*0Sstevel@tonic-gate error = EINVAL; 1629*0Sstevel@tonic-gate goto done2; 1630*0Sstevel@tonic-gate } 1631*0Sstevel@tonic-gate optval = option_len <= sizeof (buffer) ? 1632*0Sstevel@tonic-gate &buffer : kmem_alloc((size_t)option_len, KM_SLEEP); 1633*0Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 1634*0Sstevel@tonic-gate if (copyin(option_value, optval, (size_t)option_len)) { 1635*0Sstevel@tonic-gate error = EFAULT; 1636*0Sstevel@tonic-gate goto done1; 1637*0Sstevel@tonic-gate } 1638*0Sstevel@tonic-gate } 1639*0Sstevel@tonic-gate } else 1640*0Sstevel@tonic-gate option_len = 0; 1641*0Sstevel@tonic-gate 1642*0Sstevel@tonic-gate error = SOP_SETSOCKOPT(so, level, option_name, optval, 1643*0Sstevel@tonic-gate (t_uscalar_t)option_len); 1644*0Sstevel@tonic-gate done1: 1645*0Sstevel@tonic-gate if (optval != buffer) 1646*0Sstevel@tonic-gate kmem_free(optval, (size_t)option_len); 1647*0Sstevel@tonic-gate done2: 1648*0Sstevel@tonic-gate releasef(sock); 1649*0Sstevel@tonic-gate if (error) 1650*0Sstevel@tonic-gate return (set_errno(error)); 1651*0Sstevel@tonic-gate return (0); 1652*0Sstevel@tonic-gate } 1653*0Sstevel@tonic-gate 1654*0Sstevel@tonic-gate /* 1655*0Sstevel@tonic-gate * Add config info when devpath is non-NULL; delete info when devpath is NULL. 1656*0Sstevel@tonic-gate * devpath is a user address. 1657*0Sstevel@tonic-gate */ 1658*0Sstevel@tonic-gate int 1659*0Sstevel@tonic-gate sockconfig(int domain, int type, int protocol, char *devpath) 1660*0Sstevel@tonic-gate { 1661*0Sstevel@tonic-gate char *kdevpath; /* Copied in devpath string */ 1662*0Sstevel@tonic-gate size_t kdevpathlen; 1663*0Sstevel@tonic-gate int error = 0; 1664*0Sstevel@tonic-gate 1665*0Sstevel@tonic-gate dprint(1, ("sockconfig(%d, %d, %d, %p)\n", 1666*0Sstevel@tonic-gate domain, type, protocol, devpath)); 1667*0Sstevel@tonic-gate 1668*0Sstevel@tonic-gate if (secpolicy_net_config(CRED(), B_FALSE) != 0) 1669*0Sstevel@tonic-gate return (set_errno(EPERM)); 1670*0Sstevel@tonic-gate 1671*0Sstevel@tonic-gate if (devpath == NULL) { 1672*0Sstevel@tonic-gate /* Deleting an entry */ 1673*0Sstevel@tonic-gate kdevpath = NULL; 1674*0Sstevel@tonic-gate kdevpathlen = 0; 1675*0Sstevel@tonic-gate } else { 1676*0Sstevel@tonic-gate /* 1677*0Sstevel@tonic-gate * Adding an entry. 1678*0Sstevel@tonic-gate * Copyin the devpath. 1679*0Sstevel@tonic-gate * This also makes it possible to check for too long pathnames. 1680*0Sstevel@tonic-gate * Compress the space needed for the devpath before passing it 1681*0Sstevel@tonic-gate * to soconfig - soconfig will store the string until 1682*0Sstevel@tonic-gate * the configuration is removed. 1683*0Sstevel@tonic-gate */ 1684*0Sstevel@tonic-gate char *buf; 1685*0Sstevel@tonic-gate 1686*0Sstevel@tonic-gate buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1687*0Sstevel@tonic-gate if ((error = copyinstr(devpath, buf, MAXPATHLEN, 1688*0Sstevel@tonic-gate &kdevpathlen)) != 0) { 1689*0Sstevel@tonic-gate kmem_free(buf, MAXPATHLEN); 1690*0Sstevel@tonic-gate goto done; 1691*0Sstevel@tonic-gate } 1692*0Sstevel@tonic-gate 1693*0Sstevel@tonic-gate kdevpath = kmem_alloc(kdevpathlen, KM_SLEEP); 1694*0Sstevel@tonic-gate bcopy(buf, kdevpath, kdevpathlen); 1695*0Sstevel@tonic-gate kdevpath[kdevpathlen - 1] = '\0'; 1696*0Sstevel@tonic-gate 1697*0Sstevel@tonic-gate kmem_free(buf, MAXPATHLEN); 1698*0Sstevel@tonic-gate } 1699*0Sstevel@tonic-gate error = soconfig(domain, type, protocol, kdevpath, (int)kdevpathlen); 1700*0Sstevel@tonic-gate done: 1701*0Sstevel@tonic-gate if (error) { 1702*0Sstevel@tonic-gate eprintline(error); 1703*0Sstevel@tonic-gate return (set_errno(error)); 1704*0Sstevel@tonic-gate } 1705*0Sstevel@tonic-gate return (0); 1706*0Sstevel@tonic-gate } 1707*0Sstevel@tonic-gate 1708*0Sstevel@tonic-gate 1709*0Sstevel@tonic-gate /* 1710*0Sstevel@tonic-gate * Sendfile is implemented through two schemes, direct I/O or by 1711*0Sstevel@tonic-gate * caching in the filesystem page cache. We cache the input file by 1712*0Sstevel@tonic-gate * default and use direct I/O only if sendfile_max_size is set 1713*0Sstevel@tonic-gate * appropriately as explained below. Note that this logic is consistent 1714*0Sstevel@tonic-gate * with other filesystems where caching is turned on by default 1715*0Sstevel@tonic-gate * unless explicitly turned off by using the DIRECTIO ioctl. 1716*0Sstevel@tonic-gate * 1717*0Sstevel@tonic-gate * We choose a slightly different scheme here. One can turn off 1718*0Sstevel@tonic-gate * caching by setting sendfile_max_size to 0. One can also enable 1719*0Sstevel@tonic-gate * caching of files <= sendfile_max_size by setting sendfile_max_size 1720*0Sstevel@tonic-gate * to an appropriate value. By default sendfile_max_size is set to the 1721*0Sstevel@tonic-gate * maximum value so that all files are cached. In future, we may provide 1722*0Sstevel@tonic-gate * better interfaces for caching the file. 1723*0Sstevel@tonic-gate * 1724*0Sstevel@tonic-gate * Sendfile through Direct I/O (Zero copy) 1725*0Sstevel@tonic-gate * -------------------------------------- 1726*0Sstevel@tonic-gate * 1727*0Sstevel@tonic-gate * As disks are normally slower than the network, we can't have a 1728*0Sstevel@tonic-gate * single thread that reads the disk and writes to the network. We 1729*0Sstevel@tonic-gate * need to have parallelism. This is done by having the sendfile 1730*0Sstevel@tonic-gate * thread create another thread that reads from the filesystem 1731*0Sstevel@tonic-gate * and queues it for network processing. In this scheme, the data 1732*0Sstevel@tonic-gate * is never copied anywhere i.e it is zero copy unlike the other 1733*0Sstevel@tonic-gate * scheme. 1734*0Sstevel@tonic-gate * 1735*0Sstevel@tonic-gate * We have a sendfile queue (snfq) where each sendfile 1736*0Sstevel@tonic-gate * request (snf_req_t) is queued for processing by a thread. Number 1737*0Sstevel@tonic-gate * of threads is dynamically allocated and they exit if they are idling 1738*0Sstevel@tonic-gate * beyond a specified amount of time. When each request (snf_req_t) is 1739*0Sstevel@tonic-gate * processed by a thread, it produces a number of mblk_t structures to 1740*0Sstevel@tonic-gate * be consumed by the sendfile thread. snf_deque and snf_enque are 1741*0Sstevel@tonic-gate * used for consuming and producing mblks. Size of the filesystem 1742*0Sstevel@tonic-gate * read is determined by the tuneable (sendfile_read_size). A single 1743*0Sstevel@tonic-gate * mblk holds sendfile_read_size worth of data (except the last 1744*0Sstevel@tonic-gate * read of the file) which is sent down as a whole to the network. 1745*0Sstevel@tonic-gate * sendfile_read_size is set to 1 MB as this seems to be the optimal 1746*0Sstevel@tonic-gate * value for the UFS filesystem backed by a striped storage array. 1747*0Sstevel@tonic-gate * 1748*0Sstevel@tonic-gate * Synchronisation between read (producer) and write (consumer) threads. 1749*0Sstevel@tonic-gate * -------------------------------------------------------------------- 1750*0Sstevel@tonic-gate * 1751*0Sstevel@tonic-gate * sr_lock protects sr_ib_head and sr_ib_tail. The lock is held while 1752*0Sstevel@tonic-gate * adding and deleting items in this list. Error can happen anytime 1753*0Sstevel@tonic-gate * during read or write. There could be unprocessed mblks in the 1754*0Sstevel@tonic-gate * sr_ib_XXX list when a read or write error occurs. Whenever error 1755*0Sstevel@tonic-gate * is encountered, we need two things to happen : 1756*0Sstevel@tonic-gate * 1757*0Sstevel@tonic-gate * a) One of the threads need to clean the mblks. 1758*0Sstevel@tonic-gate * b) When one thread encounters an error, the other should stop. 1759*0Sstevel@tonic-gate * 1760*0Sstevel@tonic-gate * For (a), we don't want to penalise the reader thread as it could do 1761*0Sstevel@tonic-gate * some useful work processing other requests. For (b), the error can 1762*0Sstevel@tonic-gate * be detected by examining sr_read_error or sr_write_error. 1763*0Sstevel@tonic-gate * sr_lock protects sr_read_error and sr_write_error. If both reader and 1764*0Sstevel@tonic-gate * writer encounters error, we need to report the write error back to 1765*0Sstevel@tonic-gate * the application as that's what would have happened if the operations 1766*0Sstevel@tonic-gate * were done sequentially. With this in mind, following should work : 1767*0Sstevel@tonic-gate * 1768*0Sstevel@tonic-gate * - Check for errors before read or write. 1769*0Sstevel@tonic-gate * - If the reader encounters error, set the error in sr_read_error. 1770*0Sstevel@tonic-gate * Check sr_write_error, if it is set, send cv_signal as it is 1771*0Sstevel@tonic-gate * waiting for reader to complete. If it is not set, the writer 1772*0Sstevel@tonic-gate * is either running sinking data to the network or blocked 1773*0Sstevel@tonic-gate * because of flow control. For handling the latter case, we 1774*0Sstevel@tonic-gate * always send a signal. In any case, it will examine sr_read_error 1775*0Sstevel@tonic-gate * and return. sr_read_error is marked with SR_READ_DONE to tell 1776*0Sstevel@tonic-gate * the writer that the reader is done in all the cases. 1777*0Sstevel@tonic-gate * - If the writer encounters error, set the error in sr_write_error. 1778*0Sstevel@tonic-gate * The reader thread is either blocked because of flow control or 1779*0Sstevel@tonic-gate * running reading data from the disk. For the former, we need to 1780*0Sstevel@tonic-gate * wakeup the thread. Again to keep it simple, we always wake up 1781*0Sstevel@tonic-gate * the reader thread. Then, wait for the read thread to complete 1782*0Sstevel@tonic-gate * if it is not done yet. Cleanup and return. 1783*0Sstevel@tonic-gate * 1784*0Sstevel@tonic-gate * High and low water marks for the read thread. 1785*0Sstevel@tonic-gate * -------------------------------------------- 1786*0Sstevel@tonic-gate * 1787*0Sstevel@tonic-gate * If sendfile() is used to send data over a slow network, we need to 1788*0Sstevel@tonic-gate * make sure that the read thread does not produce data at a faster 1789*0Sstevel@tonic-gate * rate than the network. This can happen if the disk is faster than 1790*0Sstevel@tonic-gate * the network. In such a case, we don't want to build a very large queue. 1791*0Sstevel@tonic-gate * But we would still like to get all of the network throughput possible. 1792*0Sstevel@tonic-gate * This implies that network should never block waiting for data. 1793*0Sstevel@tonic-gate * As there are lot of disk throughput/network throughput combinations 1794*0Sstevel@tonic-gate * possible, it is difficult to come up with an accurate number. 1795*0Sstevel@tonic-gate * A typical 10K RPM disk has a max seek latency 17ms and rotational 1796*0Sstevel@tonic-gate * latency of 3ms for reading a disk block. Thus, the total latency to 1797*0Sstevel@tonic-gate * initiate a new read, transfer data from the disk and queue for 1798*0Sstevel@tonic-gate * transmission would take about a max of 25ms. Todays max transfer rate 1799*0Sstevel@tonic-gate * for network is 100MB/sec. If the thread is blocked because of flow 1800*0Sstevel@tonic-gate * control, it would take 25ms to get new data ready for transmission. 1801*0Sstevel@tonic-gate * We have to make sure that network is not idling, while we are initiating 1802*0Sstevel@tonic-gate * new transfers. So, at 100MB/sec, to keep network busy we would need 1803*0Sstevel@tonic-gate * 2.5MB of data. Roundig off, we keep the low water mark to be 3MB of data. 1804*0Sstevel@tonic-gate * We need to pick a high water mark so that the woken up thread would 1805*0Sstevel@tonic-gate * do considerable work before blocking again to prevent thrashing. Currently, 1806*0Sstevel@tonic-gate * we pick this to be 10 times that of the low water mark. 1807*0Sstevel@tonic-gate * 1808*0Sstevel@tonic-gate * Sendfile with segmap caching (One copy from page cache to mblks). 1809*0Sstevel@tonic-gate * ---------------------------------------------------------------- 1810*0Sstevel@tonic-gate * 1811*0Sstevel@tonic-gate * We use the segmap cache for caching the file, if the size of file 1812*0Sstevel@tonic-gate * is <= sendfile_max_size. In this case we don't use threads as VM 1813*0Sstevel@tonic-gate * is reasonably fast enough to keep up with the network. If the underlying 1814*0Sstevel@tonic-gate * transport allows, we call segmap_getmapflt() to map MAXBSIZE (8K) worth 1815*0Sstevel@tonic-gate * of data into segmap space, and use the virtual address from segmap 1816*0Sstevel@tonic-gate * directly through desballoc() to avoid copy. Once the transport is done 1817*0Sstevel@tonic-gate * with the data, the mapping will be released through segmap_release() 1818*0Sstevel@tonic-gate * called by the call-back routine. 1819*0Sstevel@tonic-gate * 1820*0Sstevel@tonic-gate * If zero-copy is not allowed by the transport, we simply call VOP_READ() 1821*0Sstevel@tonic-gate * to copy the data from the filesystem into our temporary network buffer. 1822*0Sstevel@tonic-gate * 1823*0Sstevel@tonic-gate * To disable caching, set sendfile_max_size to 0. 1824*0Sstevel@tonic-gate */ 1825*0Sstevel@tonic-gate 1826*0Sstevel@tonic-gate uint_t sendfile_read_size = 1024 * 1024; 1827*0Sstevel@tonic-gate #define SENDFILE_REQ_LOWAT 3 * 1024 * 1024 1828*0Sstevel@tonic-gate uint_t sendfile_req_lowat = SENDFILE_REQ_LOWAT; 1829*0Sstevel@tonic-gate uint_t sendfile_req_hiwat = 10 * SENDFILE_REQ_LOWAT; 1830*0Sstevel@tonic-gate struct sendfile_stats sf_stats; 1831*0Sstevel@tonic-gate struct sendfile_queue *snfq; 1832*0Sstevel@tonic-gate clock_t snfq_timeout; 1833*0Sstevel@tonic-gate off64_t sendfile_max_size; 1834*0Sstevel@tonic-gate 1835*0Sstevel@tonic-gate static void snf_enque(snf_req_t *, mblk_t *); 1836*0Sstevel@tonic-gate static mblk_t *snf_deque(snf_req_t *); 1837*0Sstevel@tonic-gate 1838*0Sstevel@tonic-gate void 1839*0Sstevel@tonic-gate sendfile_init(void) 1840*0Sstevel@tonic-gate { 1841*0Sstevel@tonic-gate snfq = kmem_zalloc(sizeof (struct sendfile_queue), KM_SLEEP); 1842*0Sstevel@tonic-gate 1843*0Sstevel@tonic-gate mutex_init(&snfq->snfq_lock, NULL, MUTEX_DEFAULT, NULL); 1844*0Sstevel@tonic-gate cv_init(&snfq->snfq_cv, NULL, CV_DEFAULT, NULL); 1845*0Sstevel@tonic-gate snfq->snfq_max_threads = max_ncpus; 1846*0Sstevel@tonic-gate snfq_timeout = SNFQ_TIMEOUT; 1847*0Sstevel@tonic-gate /* Cache all files by default. */ 1848*0Sstevel@tonic-gate sendfile_max_size = MAXOFFSET_T; 1849*0Sstevel@tonic-gate } 1850*0Sstevel@tonic-gate 1851*0Sstevel@tonic-gate /* 1852*0Sstevel@tonic-gate * Queues a mblk_t for network processing. 1853*0Sstevel@tonic-gate */ 1854*0Sstevel@tonic-gate static void 1855*0Sstevel@tonic-gate snf_enque(snf_req_t *sr, mblk_t *mp) 1856*0Sstevel@tonic-gate { 1857*0Sstevel@tonic-gate mp->b_next = NULL; 1858*0Sstevel@tonic-gate mutex_enter(&sr->sr_lock); 1859*0Sstevel@tonic-gate if (sr->sr_mp_head == NULL) { 1860*0Sstevel@tonic-gate sr->sr_mp_head = sr->sr_mp_tail = mp; 1861*0Sstevel@tonic-gate cv_signal(&sr->sr_cv); 1862*0Sstevel@tonic-gate } else { 1863*0Sstevel@tonic-gate sr->sr_mp_tail->b_next = mp; 1864*0Sstevel@tonic-gate sr->sr_mp_tail = mp; 1865*0Sstevel@tonic-gate } 1866*0Sstevel@tonic-gate sr->sr_qlen += MBLKL(mp); 1867*0Sstevel@tonic-gate while ((sr->sr_qlen > sr->sr_hiwat) && 1868*0Sstevel@tonic-gate (sr->sr_write_error == 0)) { 1869*0Sstevel@tonic-gate sf_stats.ss_full_waits++; 1870*0Sstevel@tonic-gate cv_wait(&sr->sr_cv, &sr->sr_lock); 1871*0Sstevel@tonic-gate } 1872*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 1873*0Sstevel@tonic-gate } 1874*0Sstevel@tonic-gate 1875*0Sstevel@tonic-gate /* 1876*0Sstevel@tonic-gate * De-queues a mblk_t for network processing. 1877*0Sstevel@tonic-gate */ 1878*0Sstevel@tonic-gate static mblk_t * 1879*0Sstevel@tonic-gate snf_deque(snf_req_t *sr) 1880*0Sstevel@tonic-gate { 1881*0Sstevel@tonic-gate mblk_t *mp; 1882*0Sstevel@tonic-gate 1883*0Sstevel@tonic-gate mutex_enter(&sr->sr_lock); 1884*0Sstevel@tonic-gate /* 1885*0Sstevel@tonic-gate * If we have encountered an error on read or read is 1886*0Sstevel@tonic-gate * completed and no more mblks, return NULL. 1887*0Sstevel@tonic-gate * We need to check for NULL sr_mp_head also as 1888*0Sstevel@tonic-gate * the reads could have completed and there is 1889*0Sstevel@tonic-gate * nothing more to come. 1890*0Sstevel@tonic-gate */ 1891*0Sstevel@tonic-gate if (((sr->sr_read_error & ~SR_READ_DONE) != 0) || 1892*0Sstevel@tonic-gate ((sr->sr_read_error & SR_READ_DONE) && 1893*0Sstevel@tonic-gate sr->sr_mp_head == NULL)) { 1894*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 1895*0Sstevel@tonic-gate return (NULL); 1896*0Sstevel@tonic-gate } 1897*0Sstevel@tonic-gate /* 1898*0Sstevel@tonic-gate * To start with neither SR_READ_DONE is marked nor 1899*0Sstevel@tonic-gate * the error is set. When we wake up from cv_wait, 1900*0Sstevel@tonic-gate * following are the possibilities : 1901*0Sstevel@tonic-gate * 1902*0Sstevel@tonic-gate * a) sr_read_error is zero and mblks are queued. 1903*0Sstevel@tonic-gate * b) sr_read_error is set to SR_READ_DONE 1904*0Sstevel@tonic-gate * and mblks are queued. 1905*0Sstevel@tonic-gate * c) sr_read_error is set to SR_READ_DONE 1906*0Sstevel@tonic-gate * and no mblks. 1907*0Sstevel@tonic-gate * d) sr_read_error is set to some error other 1908*0Sstevel@tonic-gate * than SR_READ_DONE. 1909*0Sstevel@tonic-gate */ 1910*0Sstevel@tonic-gate 1911*0Sstevel@tonic-gate while ((sr->sr_read_error == 0) && (sr->sr_mp_head == NULL)) { 1912*0Sstevel@tonic-gate sf_stats.ss_empty_waits++; 1913*0Sstevel@tonic-gate cv_wait(&sr->sr_cv, &sr->sr_lock); 1914*0Sstevel@tonic-gate } 1915*0Sstevel@tonic-gate /* Handle (a) and (b) first - the normal case. */ 1916*0Sstevel@tonic-gate if (((sr->sr_read_error & ~SR_READ_DONE) == 0) && 1917*0Sstevel@tonic-gate (sr->sr_mp_head != NULL)) { 1918*0Sstevel@tonic-gate mp = sr->sr_mp_head; 1919*0Sstevel@tonic-gate sr->sr_mp_head = mp->b_next; 1920*0Sstevel@tonic-gate sr->sr_qlen -= MBLKL(mp); 1921*0Sstevel@tonic-gate if (sr->sr_qlen < sr->sr_lowat) 1922*0Sstevel@tonic-gate cv_signal(&sr->sr_cv); 1923*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 1924*0Sstevel@tonic-gate mp->b_next = NULL; 1925*0Sstevel@tonic-gate return (mp); 1926*0Sstevel@tonic-gate } 1927*0Sstevel@tonic-gate /* Handle (c) and (d). */ 1928*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 1929*0Sstevel@tonic-gate return (NULL); 1930*0Sstevel@tonic-gate } 1931*0Sstevel@tonic-gate 1932*0Sstevel@tonic-gate /* 1933*0Sstevel@tonic-gate * Reads data from the filesystem and queues it for network processing. 1934*0Sstevel@tonic-gate */ 1935*0Sstevel@tonic-gate void 1936*0Sstevel@tonic-gate snf_async_read(snf_req_t *sr) 1937*0Sstevel@tonic-gate { 1938*0Sstevel@tonic-gate size_t iosize; 1939*0Sstevel@tonic-gate u_offset_t fileoff; 1940*0Sstevel@tonic-gate u_offset_t size; 1941*0Sstevel@tonic-gate int ret_size; 1942*0Sstevel@tonic-gate int error; 1943*0Sstevel@tonic-gate file_t *fp; 1944*0Sstevel@tonic-gate mblk_t *mp; 1945*0Sstevel@tonic-gate 1946*0Sstevel@tonic-gate fp = sr->sr_fp; 1947*0Sstevel@tonic-gate size = sr->sr_file_size; 1948*0Sstevel@tonic-gate fileoff = sr->sr_file_off; 1949*0Sstevel@tonic-gate 1950*0Sstevel@tonic-gate /* 1951*0Sstevel@tonic-gate * Ignore the error for filesystems that doesn't support DIRECTIO. 1952*0Sstevel@tonic-gate */ 1953*0Sstevel@tonic-gate (void) VOP_IOCTL(fp->f_vnode, _FIODIRECTIO, DIRECTIO_ON, 0, 1954*0Sstevel@tonic-gate kcred, NULL); 1955*0Sstevel@tonic-gate 1956*0Sstevel@tonic-gate while ((size != 0) && (sr->sr_write_error == 0)) { 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate iosize = (int)MIN(sr->sr_maxpsz, size); 1959*0Sstevel@tonic-gate 1960*0Sstevel@tonic-gate if ((mp = allocb(iosize, BPRI_MED)) == NULL) { 1961*0Sstevel@tonic-gate error = EAGAIN; 1962*0Sstevel@tonic-gate break; 1963*0Sstevel@tonic-gate } 1964*0Sstevel@tonic-gate ret_size = soreadfile(fp, mp->b_rptr, fileoff, &error, iosize); 1965*0Sstevel@tonic-gate 1966*0Sstevel@tonic-gate /* Error or Reached EOF ? */ 1967*0Sstevel@tonic-gate if ((error != 0) || (ret_size == 0)) { 1968*0Sstevel@tonic-gate freeb(mp); 1969*0Sstevel@tonic-gate break; 1970*0Sstevel@tonic-gate } 1971*0Sstevel@tonic-gate mp->b_wptr = mp->b_rptr + ret_size; 1972*0Sstevel@tonic-gate 1973*0Sstevel@tonic-gate snf_enque(sr, mp); 1974*0Sstevel@tonic-gate size -= ret_size; 1975*0Sstevel@tonic-gate fileoff += ret_size; 1976*0Sstevel@tonic-gate } 1977*0Sstevel@tonic-gate (void) VOP_IOCTL(fp->f_vnode, _FIODIRECTIO, DIRECTIO_OFF, 0, 1978*0Sstevel@tonic-gate kcred, NULL); 1979*0Sstevel@tonic-gate mutex_enter(&sr->sr_lock); 1980*0Sstevel@tonic-gate sr->sr_read_error = error; 1981*0Sstevel@tonic-gate sr->sr_read_error |= SR_READ_DONE; 1982*0Sstevel@tonic-gate cv_signal(&sr->sr_cv); 1983*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 1984*0Sstevel@tonic-gate } 1985*0Sstevel@tonic-gate 1986*0Sstevel@tonic-gate void 1987*0Sstevel@tonic-gate snf_async_thread(void) 1988*0Sstevel@tonic-gate { 1989*0Sstevel@tonic-gate snf_req_t *sr; 1990*0Sstevel@tonic-gate callb_cpr_t cprinfo; 1991*0Sstevel@tonic-gate clock_t time_left = 1; 1992*0Sstevel@tonic-gate clock_t now; 1993*0Sstevel@tonic-gate 1994*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &snfq->snfq_lock, callb_generic_cpr, "snfq"); 1995*0Sstevel@tonic-gate 1996*0Sstevel@tonic-gate mutex_enter(&snfq->snfq_lock); 1997*0Sstevel@tonic-gate for (;;) { 1998*0Sstevel@tonic-gate /* 1999*0Sstevel@tonic-gate * If we didn't find a entry, then block until woken up 2000*0Sstevel@tonic-gate * again and then look through the queues again. 2001*0Sstevel@tonic-gate */ 2002*0Sstevel@tonic-gate while ((sr = snfq->snfq_req_head) == NULL) { 2003*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 2004*0Sstevel@tonic-gate if (time_left <= 0) { 2005*0Sstevel@tonic-gate snfq->snfq_svc_threads--; 2006*0Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 2007*0Sstevel@tonic-gate thread_exit(); 2008*0Sstevel@tonic-gate /* NOTREACHED */ 2009*0Sstevel@tonic-gate } 2010*0Sstevel@tonic-gate snfq->snfq_idle_cnt++; 2011*0Sstevel@tonic-gate 2012*0Sstevel@tonic-gate time_to_wait(&now, snfq_timeout); 2013*0Sstevel@tonic-gate time_left = cv_timedwait(&snfq->snfq_cv, 2014*0Sstevel@tonic-gate &snfq->snfq_lock, now); 2015*0Sstevel@tonic-gate snfq->snfq_idle_cnt--; 2016*0Sstevel@tonic-gate 2017*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &snfq->snfq_lock); 2018*0Sstevel@tonic-gate } 2019*0Sstevel@tonic-gate snfq->snfq_req_head = sr->sr_next; 2020*0Sstevel@tonic-gate snfq->snfq_req_cnt--; 2021*0Sstevel@tonic-gate mutex_exit(&snfq->snfq_lock); 2022*0Sstevel@tonic-gate snf_async_read(sr); 2023*0Sstevel@tonic-gate mutex_enter(&snfq->snfq_lock); 2024*0Sstevel@tonic-gate } 2025*0Sstevel@tonic-gate } 2026*0Sstevel@tonic-gate 2027*0Sstevel@tonic-gate 2028*0Sstevel@tonic-gate snf_req_t * 2029*0Sstevel@tonic-gate create_thread(int operation, struct vnode *vp, file_t *fp, 2030*0Sstevel@tonic-gate u_offset_t fileoff, u_offset_t size) 2031*0Sstevel@tonic-gate { 2032*0Sstevel@tonic-gate snf_req_t *sr; 2033*0Sstevel@tonic-gate stdata_t *stp; 2034*0Sstevel@tonic-gate 2035*0Sstevel@tonic-gate sr = (snf_req_t *)kmem_zalloc(sizeof (snf_req_t), KM_SLEEP); 2036*0Sstevel@tonic-gate 2037*0Sstevel@tonic-gate sr->sr_vp = vp; 2038*0Sstevel@tonic-gate sr->sr_fp = fp; 2039*0Sstevel@tonic-gate stp = vp->v_stream; 2040*0Sstevel@tonic-gate 2041*0Sstevel@tonic-gate /* 2042*0Sstevel@tonic-gate * store sd_qn_maxpsz into sr_maxpsz while we have stream head. 2043*0Sstevel@tonic-gate * stream might be closed before thread returns from snf_async_read. 2044*0Sstevel@tonic-gate */ 2045*0Sstevel@tonic-gate if (stp->sd_qn_maxpsz > 0) { 2046*0Sstevel@tonic-gate sr->sr_maxpsz = MIN(MAXBSIZE, stp->sd_qn_maxpsz); 2047*0Sstevel@tonic-gate } else { 2048*0Sstevel@tonic-gate sr->sr_maxpsz = MAXBSIZE; 2049*0Sstevel@tonic-gate } 2050*0Sstevel@tonic-gate 2051*0Sstevel@tonic-gate sr->sr_operation = operation; 2052*0Sstevel@tonic-gate sr->sr_file_off = fileoff; 2053*0Sstevel@tonic-gate sr->sr_file_size = size; 2054*0Sstevel@tonic-gate sr->sr_hiwat = sendfile_req_hiwat; 2055*0Sstevel@tonic-gate sr->sr_lowat = sendfile_req_lowat; 2056*0Sstevel@tonic-gate mutex_init(&sr->sr_lock, NULL, MUTEX_DEFAULT, NULL); 2057*0Sstevel@tonic-gate cv_init(&sr->sr_cv, NULL, CV_DEFAULT, NULL); 2058*0Sstevel@tonic-gate /* 2059*0Sstevel@tonic-gate * See whether we need another thread for servicing this 2060*0Sstevel@tonic-gate * request. If there are already enough requests queued 2061*0Sstevel@tonic-gate * for the threads, create one if not exceeding 2062*0Sstevel@tonic-gate * snfq_max_threads. 2063*0Sstevel@tonic-gate */ 2064*0Sstevel@tonic-gate mutex_enter(&snfq->snfq_lock); 2065*0Sstevel@tonic-gate if (snfq->snfq_req_cnt >= snfq->snfq_idle_cnt && 2066*0Sstevel@tonic-gate snfq->snfq_svc_threads < snfq->snfq_max_threads) { 2067*0Sstevel@tonic-gate (void) thread_create(NULL, 0, &snf_async_thread, 0, 0, &p0, 2068*0Sstevel@tonic-gate TS_RUN, minclsyspri); 2069*0Sstevel@tonic-gate snfq->snfq_svc_threads++; 2070*0Sstevel@tonic-gate } 2071*0Sstevel@tonic-gate if (snfq->snfq_req_head == NULL) { 2072*0Sstevel@tonic-gate snfq->snfq_req_head = snfq->snfq_req_tail = sr; 2073*0Sstevel@tonic-gate cv_signal(&snfq->snfq_cv); 2074*0Sstevel@tonic-gate } else { 2075*0Sstevel@tonic-gate snfq->snfq_req_tail->sr_next = sr; 2076*0Sstevel@tonic-gate snfq->snfq_req_tail = sr; 2077*0Sstevel@tonic-gate } 2078*0Sstevel@tonic-gate snfq->snfq_req_cnt++; 2079*0Sstevel@tonic-gate mutex_exit(&snfq->snfq_lock); 2080*0Sstevel@tonic-gate return (sr); 2081*0Sstevel@tonic-gate } 2082*0Sstevel@tonic-gate 2083*0Sstevel@tonic-gate int 2084*0Sstevel@tonic-gate snf_direct_io(file_t *fp, file_t *rfp, u_offset_t fileoff, u_offset_t size, 2085*0Sstevel@tonic-gate ssize_t *count) 2086*0Sstevel@tonic-gate { 2087*0Sstevel@tonic-gate snf_req_t *sr; 2088*0Sstevel@tonic-gate mblk_t *mp; 2089*0Sstevel@tonic-gate int iosize; 2090*0Sstevel@tonic-gate int error = 0; 2091*0Sstevel@tonic-gate short fflag; 2092*0Sstevel@tonic-gate struct vnode *vp; 2093*0Sstevel@tonic-gate int ksize; 2094*0Sstevel@tonic-gate 2095*0Sstevel@tonic-gate ksize = 0; 2096*0Sstevel@tonic-gate *count = 0; 2097*0Sstevel@tonic-gate 2098*0Sstevel@tonic-gate vp = fp->f_vnode; 2099*0Sstevel@tonic-gate fflag = fp->f_flag; 2100*0Sstevel@tonic-gate if ((sr = create_thread(READ_OP, vp, rfp, fileoff, size)) == NULL) 2101*0Sstevel@tonic-gate return (EAGAIN); 2102*0Sstevel@tonic-gate 2103*0Sstevel@tonic-gate /* 2104*0Sstevel@tonic-gate * We check for read error in snf_deque. It has to check 2105*0Sstevel@tonic-gate * for successful READ_DONE and return NULL, and we might 2106*0Sstevel@tonic-gate * as well make an additional check there. 2107*0Sstevel@tonic-gate */ 2108*0Sstevel@tonic-gate while ((mp = snf_deque(sr)) != NULL) { 2109*0Sstevel@tonic-gate 2110*0Sstevel@tonic-gate if (ISSIG(curthread, JUSTLOOKING)) { 2111*0Sstevel@tonic-gate freeb(mp); 2112*0Sstevel@tonic-gate error = EINTR; 2113*0Sstevel@tonic-gate break; 2114*0Sstevel@tonic-gate } 2115*0Sstevel@tonic-gate iosize = MBLKL(mp); 2116*0Sstevel@tonic-gate 2117*0Sstevel@tonic-gate if ((error = kstrwritemp(vp, mp, fflag)) != 0) { 2118*0Sstevel@tonic-gate freeb(mp); 2119*0Sstevel@tonic-gate break; 2120*0Sstevel@tonic-gate } 2121*0Sstevel@tonic-gate ksize += iosize; 2122*0Sstevel@tonic-gate } 2123*0Sstevel@tonic-gate *count = ksize; 2124*0Sstevel@tonic-gate 2125*0Sstevel@tonic-gate mutex_enter(&sr->sr_lock); 2126*0Sstevel@tonic-gate sr->sr_write_error = error; 2127*0Sstevel@tonic-gate /* Look at the big comments on why we cv_signal here. */ 2128*0Sstevel@tonic-gate cv_signal(&sr->sr_cv); 2129*0Sstevel@tonic-gate 2130*0Sstevel@tonic-gate /* Wait for the reader to complete always. */ 2131*0Sstevel@tonic-gate while (!(sr->sr_read_error & SR_READ_DONE)) { 2132*0Sstevel@tonic-gate cv_wait(&sr->sr_cv, &sr->sr_lock); 2133*0Sstevel@tonic-gate } 2134*0Sstevel@tonic-gate /* If there is no write error, check for read error. */ 2135*0Sstevel@tonic-gate if (error == 0) 2136*0Sstevel@tonic-gate error = (sr->sr_read_error & ~SR_READ_DONE); 2137*0Sstevel@tonic-gate 2138*0Sstevel@tonic-gate if (error != 0) { 2139*0Sstevel@tonic-gate mblk_t *next_mp; 2140*0Sstevel@tonic-gate 2141*0Sstevel@tonic-gate mp = sr->sr_mp_head; 2142*0Sstevel@tonic-gate while (mp != NULL) { 2143*0Sstevel@tonic-gate next_mp = mp->b_next; 2144*0Sstevel@tonic-gate mp->b_next = NULL; 2145*0Sstevel@tonic-gate freeb(mp); 2146*0Sstevel@tonic-gate mp = next_mp; 2147*0Sstevel@tonic-gate } 2148*0Sstevel@tonic-gate } 2149*0Sstevel@tonic-gate mutex_exit(&sr->sr_lock); 2150*0Sstevel@tonic-gate kmem_free(sr, sizeof (snf_req_t)); 2151*0Sstevel@tonic-gate return (error); 2152*0Sstevel@tonic-gate } 2153*0Sstevel@tonic-gate 2154*0Sstevel@tonic-gate typedef struct { 2155*0Sstevel@tonic-gate frtn_t snfi_frtn; 2156*0Sstevel@tonic-gate caddr_t snfi_base; 2157*0Sstevel@tonic-gate uint_t snfi_mapoff; 2158*0Sstevel@tonic-gate size_t snfi_len; 2159*0Sstevel@tonic-gate vnode_t *snfi_vp; 2160*0Sstevel@tonic-gate } snf_smap_desbinfo; 2161*0Sstevel@tonic-gate 2162*0Sstevel@tonic-gate /* 2163*0Sstevel@tonic-gate * The callback function when the last ref of the mblk is dropped, 2164*0Sstevel@tonic-gate * normally occurs when TCP receives the ack. But it can be the driver 2165*0Sstevel@tonic-gate * too due to lazy reclaim. 2166*0Sstevel@tonic-gate */ 2167*0Sstevel@tonic-gate void 2168*0Sstevel@tonic-gate snf_smap_desbfree(snf_smap_desbinfo *snfi) 2169*0Sstevel@tonic-gate { 2170*0Sstevel@tonic-gate if (!segmap_kpm) { 2171*0Sstevel@tonic-gate /* 2172*0Sstevel@tonic-gate * We don't need to call segmap_fault(F_SOFTUNLOCK) for 2173*0Sstevel@tonic-gate * segmap_kpm as long as the latter never falls back to 2174*0Sstevel@tonic-gate * "use_segmap_range". (See segmap_getmapflt().) 2175*0Sstevel@tonic-gate * 2176*0Sstevel@tonic-gate * Using S_OTHER saves an redundant hat_setref() in 2177*0Sstevel@tonic-gate * segmap_unlock() 2178*0Sstevel@tonic-gate */ 2179*0Sstevel@tonic-gate (void) segmap_fault(kas.a_hat, segkmap, 2180*0Sstevel@tonic-gate (caddr_t)(((uintptr_t)snfi->snfi_base + snfi->snfi_mapoff) 2181*0Sstevel@tonic-gate & PAGEMASK), snfi->snfi_len, F_SOFTUNLOCK, S_OTHER); 2182*0Sstevel@tonic-gate } 2183*0Sstevel@tonic-gate (void) segmap_release(segkmap, snfi->snfi_base, SM_DONTNEED); 2184*0Sstevel@tonic-gate VN_RELE(snfi->snfi_vp); 2185*0Sstevel@tonic-gate kmem_free(snfi, sizeof (*snfi)); 2186*0Sstevel@tonic-gate } 2187*0Sstevel@tonic-gate 2188*0Sstevel@tonic-gate /* 2189*0Sstevel@tonic-gate * Use segmap instead of bcopy to send down a chain of desballoca'ed, mblks. 2190*0Sstevel@tonic-gate * Each mblk contains a segmap slot of no more than MAXBSIZE. The total 2191*0Sstevel@tonic-gate * length of a chain is no more than sd_qn_maxpsz. 2192*0Sstevel@tonic-gate * 2193*0Sstevel@tonic-gate * At the end of the whole sendfile() operation, we wait till the data from 2194*0Sstevel@tonic-gate * the last mblk is ack'ed by the transport before returning so that the 2195*0Sstevel@tonic-gate * caller of sendfile() can safely modify the file content. 2196*0Sstevel@tonic-gate */ 2197*0Sstevel@tonic-gate int 2198*0Sstevel@tonic-gate snf_segmap(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size, 2199*0Sstevel@tonic-gate uint_t maxpsz, ssize_t *count, boolean_t nowait) 2200*0Sstevel@tonic-gate { 2201*0Sstevel@tonic-gate caddr_t base; 2202*0Sstevel@tonic-gate int mapoff; 2203*0Sstevel@tonic-gate vnode_t *vp; 2204*0Sstevel@tonic-gate mblk_t *mp, *mp1; 2205*0Sstevel@tonic-gate int iosize, iosize1; 2206*0Sstevel@tonic-gate int error; 2207*0Sstevel@tonic-gate short fflag; 2208*0Sstevel@tonic-gate int ksize; 2209*0Sstevel@tonic-gate snf_smap_desbinfo *snfi; 2210*0Sstevel@tonic-gate struct vattr va; 2211*0Sstevel@tonic-gate boolean_t dowait = B_FALSE; 2212*0Sstevel@tonic-gate 2213*0Sstevel@tonic-gate vp = fp->f_vnode; 2214*0Sstevel@tonic-gate fflag = fp->f_flag; 2215*0Sstevel@tonic-gate ksize = 0; 2216*0Sstevel@tonic-gate for (;;) { 2217*0Sstevel@tonic-gate if (ISSIG(curthread, JUSTLOOKING)) { 2218*0Sstevel@tonic-gate error = EINTR; 2219*0Sstevel@tonic-gate break; 2220*0Sstevel@tonic-gate } 2221*0Sstevel@tonic-gate iosize = 0; 2222*0Sstevel@tonic-gate mp = NULL; 2223*0Sstevel@tonic-gate do { 2224*0Sstevel@tonic-gate mapoff = fileoff & MAXBOFFSET; 2225*0Sstevel@tonic-gate iosize1 = MAXBSIZE - mapoff; 2226*0Sstevel@tonic-gate if (iosize1 > size) 2227*0Sstevel@tonic-gate iosize1 = size; 2228*0Sstevel@tonic-gate /* 2229*0Sstevel@tonic-gate * we don't forcefault because we'll call 2230*0Sstevel@tonic-gate * segmap_fault(F_SOFTLOCK) next. 2231*0Sstevel@tonic-gate * 2232*0Sstevel@tonic-gate * S_READ will get the ref bit set (by either 2233*0Sstevel@tonic-gate * segmap_getmapflt() or segmap_fault()) and page 2234*0Sstevel@tonic-gate * shared locked. 2235*0Sstevel@tonic-gate */ 2236*0Sstevel@tonic-gate base = segmap_getmapflt(segkmap, fvp, fileoff, iosize1, 2237*0Sstevel@tonic-gate segmap_kpm ? SM_FAULT : 0, S_READ); 2238*0Sstevel@tonic-gate 2239*0Sstevel@tonic-gate snfi = kmem_alloc(sizeof (*snfi), KM_SLEEP); 2240*0Sstevel@tonic-gate snfi->snfi_len = (size_t)roundup(mapoff+iosize1, 2241*0Sstevel@tonic-gate PAGESIZE)- (mapoff & PAGEMASK); 2242*0Sstevel@tonic-gate /* 2243*0Sstevel@tonic-gate * We must call segmap_fault() even for segmap_kpm 2244*0Sstevel@tonic-gate * because that's how error gets returned. 2245*0Sstevel@tonic-gate * (segmap_getmapflt() never fails but segmap_fault() 2246*0Sstevel@tonic-gate * does.) 2247*0Sstevel@tonic-gate */ 2248*0Sstevel@tonic-gate if (segmap_fault(kas.a_hat, segkmap, 2249*0Sstevel@tonic-gate (caddr_t)(((uintptr_t)base + mapoff) & PAGEMASK), 2250*0Sstevel@tonic-gate snfi->snfi_len, F_SOFTLOCK, S_READ) != 0) { 2251*0Sstevel@tonic-gate (void) segmap_release(segkmap, base, 0); 2252*0Sstevel@tonic-gate kmem_free(snfi, sizeof (*snfi)); 2253*0Sstevel@tonic-gate freemsg(mp); 2254*0Sstevel@tonic-gate error = EIO; 2255*0Sstevel@tonic-gate goto out; 2256*0Sstevel@tonic-gate } 2257*0Sstevel@tonic-gate snfi->snfi_frtn.free_func = snf_smap_desbfree; 2258*0Sstevel@tonic-gate snfi->snfi_frtn.free_arg = (caddr_t)snfi; 2259*0Sstevel@tonic-gate snfi->snfi_base = base; 2260*0Sstevel@tonic-gate snfi->snfi_mapoff = mapoff; 2261*0Sstevel@tonic-gate mp1 = desballoca((uchar_t *)base + mapoff, 2262*0Sstevel@tonic-gate iosize1, BPRI_HI, &snfi->snfi_frtn); 2263*0Sstevel@tonic-gate 2264*0Sstevel@tonic-gate if (mp1 == NULL) { 2265*0Sstevel@tonic-gate (void) segmap_fault(kas.a_hat, segkmap, 2266*0Sstevel@tonic-gate (caddr_t)(((uintptr_t)base + mapoff) & 2267*0Sstevel@tonic-gate PAGEMASK), snfi->snfi_len, 2268*0Sstevel@tonic-gate F_SOFTUNLOCK, S_OTHER); 2269*0Sstevel@tonic-gate (void) segmap_release(segkmap, base, 0); 2270*0Sstevel@tonic-gate kmem_free(snfi, sizeof (*snfi)); 2271*0Sstevel@tonic-gate freemsg(mp); 2272*0Sstevel@tonic-gate error = EAGAIN; 2273*0Sstevel@tonic-gate goto out; 2274*0Sstevel@tonic-gate } 2275*0Sstevel@tonic-gate VN_HOLD(fvp); 2276*0Sstevel@tonic-gate snfi->snfi_vp = fvp; 2277*0Sstevel@tonic-gate mp1->b_wptr += iosize1; 2278*0Sstevel@tonic-gate 2279*0Sstevel@tonic-gate /* Mark this dblk with the zero-copy flag */ 2280*0Sstevel@tonic-gate mp1->b_datap->db_struioflag |= STRUIO_ZC; 2281*0Sstevel@tonic-gate if (mp == NULL) 2282*0Sstevel@tonic-gate mp = mp1; 2283*0Sstevel@tonic-gate else 2284*0Sstevel@tonic-gate linkb(mp, mp1); 2285*0Sstevel@tonic-gate iosize += iosize1; 2286*0Sstevel@tonic-gate fileoff += iosize1; 2287*0Sstevel@tonic-gate size -= iosize1; 2288*0Sstevel@tonic-gate } while (iosize < maxpsz && size != 0); 2289*0Sstevel@tonic-gate 2290*0Sstevel@tonic-gate if (size == 0 && !nowait) { 2291*0Sstevel@tonic-gate ASSERT(!dowait); 2292*0Sstevel@tonic-gate dowait = B_TRUE; 2293*0Sstevel@tonic-gate mp1->b_datap->db_struioflag |= STRUIO_ZCNOTIFY; 2294*0Sstevel@tonic-gate } 2295*0Sstevel@tonic-gate VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2296*0Sstevel@tonic-gate if ((error = kstrwritemp(vp, mp, fflag)) != 0) { 2297*0Sstevel@tonic-gate *count = ksize; 2298*0Sstevel@tonic-gate freemsg(mp); 2299*0Sstevel@tonic-gate return (error); 2300*0Sstevel@tonic-gate } 2301*0Sstevel@tonic-gate ksize += iosize; 2302*0Sstevel@tonic-gate if (size == 0) 2303*0Sstevel@tonic-gate goto done; 2304*0Sstevel@tonic-gate 2305*0Sstevel@tonic-gate (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2306*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 2307*0Sstevel@tonic-gate error = VOP_GETATTR(fvp, &va, 0, kcred); 2308*0Sstevel@tonic-gate if (error) 2309*0Sstevel@tonic-gate break; 2310*0Sstevel@tonic-gate /* Read as much as possible. */ 2311*0Sstevel@tonic-gate if (fileoff >= va.va_size) 2312*0Sstevel@tonic-gate break; 2313*0Sstevel@tonic-gate if (size + fileoff > va.va_size) 2314*0Sstevel@tonic-gate size = va.va_size - fileoff; 2315*0Sstevel@tonic-gate } 2316*0Sstevel@tonic-gate out: 2317*0Sstevel@tonic-gate VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2318*0Sstevel@tonic-gate done: 2319*0Sstevel@tonic-gate *count = ksize; 2320*0Sstevel@tonic-gate if (dowait) { 2321*0Sstevel@tonic-gate stdata_t *stp; 2322*0Sstevel@tonic-gate 2323*0Sstevel@tonic-gate stp = vp->v_stream; 2324*0Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 2325*0Sstevel@tonic-gate while (!(stp->sd_flag & STZCNOTIFY)) { 2326*0Sstevel@tonic-gate (void) cv_wait_sig(&stp->sd_zcopy_wait, 2327*0Sstevel@tonic-gate &stp->sd_lock); 2328*0Sstevel@tonic-gate } 2329*0Sstevel@tonic-gate stp->sd_flag &= ~STZCNOTIFY; 2330*0Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 2331*0Sstevel@tonic-gate } 2332*0Sstevel@tonic-gate return (error); 2333*0Sstevel@tonic-gate } 2334*0Sstevel@tonic-gate 2335*0Sstevel@tonic-gate int 2336*0Sstevel@tonic-gate snf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size, 2337*0Sstevel@tonic-gate uint_t maxpsz, ssize_t *count) 2338*0Sstevel@tonic-gate { 2339*0Sstevel@tonic-gate struct vnode *vp; 2340*0Sstevel@tonic-gate mblk_t *mp; 2341*0Sstevel@tonic-gate int iosize; 2342*0Sstevel@tonic-gate int error; 2343*0Sstevel@tonic-gate short fflag; 2344*0Sstevel@tonic-gate int ksize; 2345*0Sstevel@tonic-gate int ioflag; 2346*0Sstevel@tonic-gate struct uio auio; 2347*0Sstevel@tonic-gate struct iovec aiov; 2348*0Sstevel@tonic-gate struct vattr va; 2349*0Sstevel@tonic-gate 2350*0Sstevel@tonic-gate vp = fp->f_vnode; 2351*0Sstevel@tonic-gate fflag = fp->f_flag; 2352*0Sstevel@tonic-gate ksize = 0; 2353*0Sstevel@tonic-gate auio.uio_iov = &aiov; 2354*0Sstevel@tonic-gate auio.uio_iovcnt = 1; 2355*0Sstevel@tonic-gate auio.uio_segflg = UIO_SYSSPACE; 2356*0Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 2357*0Sstevel@tonic-gate auio.uio_fmode = fflag; 2358*0Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 2359*0Sstevel@tonic-gate ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 2360*0Sstevel@tonic-gate /* If read sync is not asked for, filter sync flags */ 2361*0Sstevel@tonic-gate if ((ioflag & FRSYNC) == 0) 2362*0Sstevel@tonic-gate ioflag &= ~(FSYNC|FDSYNC); 2363*0Sstevel@tonic-gate for (;;) { 2364*0Sstevel@tonic-gate if (ISSIG(curthread, JUSTLOOKING)) { 2365*0Sstevel@tonic-gate error = EINTR; 2366*0Sstevel@tonic-gate break; 2367*0Sstevel@tonic-gate } 2368*0Sstevel@tonic-gate iosize = (int)MIN(maxpsz, size); 2369*0Sstevel@tonic-gate if ((mp = allocb(iosize, BPRI_MED)) == NULL) { 2370*0Sstevel@tonic-gate error = EAGAIN; 2371*0Sstevel@tonic-gate break; 2372*0Sstevel@tonic-gate } 2373*0Sstevel@tonic-gate aiov.iov_base = (caddr_t)mp->b_rptr; 2374*0Sstevel@tonic-gate aiov.iov_len = iosize; 2375*0Sstevel@tonic-gate auio.uio_loffset = fileoff; 2376*0Sstevel@tonic-gate auio.uio_resid = iosize; 2377*0Sstevel@tonic-gate 2378*0Sstevel@tonic-gate error = VOP_READ(fvp, &auio, ioflag, fp->f_cred, NULL); 2379*0Sstevel@tonic-gate iosize -= auio.uio_resid; 2380*0Sstevel@tonic-gate 2381*0Sstevel@tonic-gate if (error == EINTR && iosize != 0) 2382*0Sstevel@tonic-gate error = 0; 2383*0Sstevel@tonic-gate 2384*0Sstevel@tonic-gate if (error != 0 || iosize == 0) { 2385*0Sstevel@tonic-gate freeb(mp); 2386*0Sstevel@tonic-gate break; 2387*0Sstevel@tonic-gate } 2388*0Sstevel@tonic-gate mp->b_wptr = mp->b_rptr + iosize; 2389*0Sstevel@tonic-gate 2390*0Sstevel@tonic-gate VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2391*0Sstevel@tonic-gate if ((error = kstrwritemp(vp, mp, fflag)) != 0) { 2392*0Sstevel@tonic-gate *count = ksize; 2393*0Sstevel@tonic-gate freeb(mp); 2394*0Sstevel@tonic-gate return (error); 2395*0Sstevel@tonic-gate } 2396*0Sstevel@tonic-gate ksize += iosize; 2397*0Sstevel@tonic-gate size -= iosize; 2398*0Sstevel@tonic-gate if (size == 0) 2399*0Sstevel@tonic-gate goto done; 2400*0Sstevel@tonic-gate 2401*0Sstevel@tonic-gate fileoff += iosize; 2402*0Sstevel@tonic-gate (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2403*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 2404*0Sstevel@tonic-gate error = VOP_GETATTR(fvp, &va, 0, kcred); 2405*0Sstevel@tonic-gate if (error) 2406*0Sstevel@tonic-gate break; 2407*0Sstevel@tonic-gate /* Read as much as possible. */ 2408*0Sstevel@tonic-gate if (fileoff >= va.va_size) 2409*0Sstevel@tonic-gate size = 0; 2410*0Sstevel@tonic-gate else if (size + fileoff > va.va_size) 2411*0Sstevel@tonic-gate size = va.va_size - fileoff; 2412*0Sstevel@tonic-gate } 2413*0Sstevel@tonic-gate VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2414*0Sstevel@tonic-gate done: 2415*0Sstevel@tonic-gate *count = ksize; 2416*0Sstevel@tonic-gate return (error); 2417*0Sstevel@tonic-gate } 2418*0Sstevel@tonic-gate 2419*0Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 2420*0Sstevel@tonic-gate /* 2421*0Sstevel@tonic-gate * Largefile support for 32 bit applications only. 2422*0Sstevel@tonic-gate */ 2423*0Sstevel@tonic-gate int 2424*0Sstevel@tonic-gate sosendfile64(file_t *fp, file_t *rfp, const struct ksendfilevec64 *sfv, 2425*0Sstevel@tonic-gate ssize32_t *count32) 2426*0Sstevel@tonic-gate { 2427*0Sstevel@tonic-gate ssize32_t sfv_len; 2428*0Sstevel@tonic-gate u_offset_t sfv_off, va_size; 2429*0Sstevel@tonic-gate struct vnode *vp, *fvp, *realvp; 2430*0Sstevel@tonic-gate struct vattr va; 2431*0Sstevel@tonic-gate stdata_t *stp; 2432*0Sstevel@tonic-gate ssize_t count = 0; 2433*0Sstevel@tonic-gate int error = 0; 2434*0Sstevel@tonic-gate boolean_t dozcopy = B_FALSE; 2435*0Sstevel@tonic-gate uint_t maxpsz; 2436*0Sstevel@tonic-gate 2437*0Sstevel@tonic-gate sfv_len = (ssize32_t)sfv->sfv_len; 2438*0Sstevel@tonic-gate if (sfv_len < 0) { 2439*0Sstevel@tonic-gate error = EINVAL; 2440*0Sstevel@tonic-gate goto out; 2441*0Sstevel@tonic-gate } 2442*0Sstevel@tonic-gate 2443*0Sstevel@tonic-gate if (sfv_len == 0) goto out; 2444*0Sstevel@tonic-gate 2445*0Sstevel@tonic-gate sfv_off = (u_offset_t)sfv->sfv_off; 2446*0Sstevel@tonic-gate 2447*0Sstevel@tonic-gate /* Same checks as in pread */ 2448*0Sstevel@tonic-gate if (sfv_off > MAXOFFSET_T) { 2449*0Sstevel@tonic-gate error = EINVAL; 2450*0Sstevel@tonic-gate goto out; 2451*0Sstevel@tonic-gate } 2452*0Sstevel@tonic-gate if (sfv_off + sfv_len > MAXOFFSET_T) 2453*0Sstevel@tonic-gate sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off); 2454*0Sstevel@tonic-gate 2455*0Sstevel@tonic-gate /* 2456*0Sstevel@tonic-gate * There are no more checks on sfv_len. So, we cast it to 2457*0Sstevel@tonic-gate * u_offset_t and share the snf_direct_io/snf_cache code between 2458*0Sstevel@tonic-gate * 32 bit and 64 bit. 2459*0Sstevel@tonic-gate * 2460*0Sstevel@tonic-gate * TODO: should do nbl_need_check() like read()? 2461*0Sstevel@tonic-gate */ 2462*0Sstevel@tonic-gate if (sfv_len > sendfile_max_size) { 2463*0Sstevel@tonic-gate sf_stats.ss_file_not_cached++; 2464*0Sstevel@tonic-gate error = snf_direct_io(fp, rfp, sfv_off, (u_offset_t)sfv_len, 2465*0Sstevel@tonic-gate &count); 2466*0Sstevel@tonic-gate goto out; 2467*0Sstevel@tonic-gate } 2468*0Sstevel@tonic-gate fvp = rfp->f_vnode; 2469*0Sstevel@tonic-gate if (VOP_REALVP(fvp, &realvp) == 0) 2470*0Sstevel@tonic-gate fvp = realvp; 2471*0Sstevel@tonic-gate /* 2472*0Sstevel@tonic-gate * Grab the lock as a reader to prevent the file size 2473*0Sstevel@tonic-gate * from changing underneath. 2474*0Sstevel@tonic-gate */ 2475*0Sstevel@tonic-gate (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2476*0Sstevel@tonic-gate va.va_mask = AT_SIZE; 2477*0Sstevel@tonic-gate error = VOP_GETATTR(fvp, &va, 0, kcred); 2478*0Sstevel@tonic-gate va_size = va.va_size; 2479*0Sstevel@tonic-gate if ((error != 0) || (va_size == 0) || (sfv_off >= va_size)) { 2480*0Sstevel@tonic-gate VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL); 2481*0Sstevel@tonic-gate goto out; 2482*0Sstevel@tonic-gate } 2483*0Sstevel@tonic-gate /* Read as much as possible. */ 2484*0Sstevel@tonic-gate if (sfv_off + sfv_len > va_size) 2485*0Sstevel@tonic-gate sfv_len = va_size - sfv_off; 2486*0Sstevel@tonic-gate 2487*0Sstevel@tonic-gate vp = fp->f_vnode; 2488*0Sstevel@tonic-gate stp = vp->v_stream; 2489*0Sstevel@tonic-gate if (stp->sd_qn_maxpsz == INFPSZ) 2490*0Sstevel@tonic-gate maxpsz = MAXOFF32_T; 2491*0Sstevel@tonic-gate else 2492*0Sstevel@tonic-gate maxpsz = roundup(stp->sd_qn_maxpsz, MAXBSIZE); 2493*0Sstevel@tonic-gate /* 2494*0Sstevel@tonic-gate * When the NOWAIT flag is not set, we enable zero-copy only if the 2495*0Sstevel@tonic-gate * transfer size is large enough. This prevents performance loss 2496*0Sstevel@tonic-gate * when the caller sends the file piece by piece. 2497*0Sstevel@tonic-gate */ 2498*0Sstevel@tonic-gate if (sfv_len >= MAXBSIZE && (sfv_len >= (va_size >> 1) || 2499*0Sstevel@tonic-gate (sfv->sfv_flag & SFV_NOWAIT) || sfv_len >= 0x1000000) && 2500*0Sstevel@tonic-gate !vn_has_flocks(fvp)) { 2501*0Sstevel@tonic-gate if ((stp->sd_copyflag & (STZCVMSAFE|STZCVMUNSAFE)) == 0) { 2502*0Sstevel@tonic-gate int on = 1; 2503*0Sstevel@tonic-gate 2504*0Sstevel@tonic-gate if (SOP_SETSOCKOPT(VTOSO(vp), SOL_SOCKET, 2505*0Sstevel@tonic-gate SO_SND_COPYAVOID, &on, sizeof (on)) == 0) 2506*0Sstevel@tonic-gate dozcopy = B_TRUE; 2507*0Sstevel@tonic-gate } else { 2508*0Sstevel@tonic-gate dozcopy = (stp->sd_copyflag & STZCVMSAFE); 2509*0Sstevel@tonic-gate } 2510*0Sstevel@tonic-gate } 2511*0Sstevel@tonic-gate if (dozcopy) { 2512*0Sstevel@tonic-gate sf_stats.ss_file_segmap++; 2513*0Sstevel@tonic-gate error = snf_segmap(fp, fvp, sfv_off, (u_offset_t)sfv_len, 2514*0Sstevel@tonic-gate maxpsz, &count, ((sfv->sfv_flag & SFV_NOWAIT) != 0)); 2515*0Sstevel@tonic-gate } else { 2516*0Sstevel@tonic-gate sf_stats.ss_file_cached++; 2517*0Sstevel@tonic-gate error = snf_cache(fp, fvp, sfv_off, (u_offset_t)sfv_len, 2518*0Sstevel@tonic-gate maxpsz, &count); 2519*0Sstevel@tonic-gate } 2520*0Sstevel@tonic-gate out: 2521*0Sstevel@tonic-gate releasef(sfv->sfv_fd); 2522*0Sstevel@tonic-gate *count32 = (ssize32_t)count; 2523*0Sstevel@tonic-gate return (error); 2524*0Sstevel@tonic-gate } 2525*0Sstevel@tonic-gate #endif 2526*0Sstevel@tonic-gate 2527*0Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2528*0Sstevel@tonic-gate /* 2529*0Sstevel@tonic-gate * recv32(), recvfrom32(), send32(), sendto32(): intentionally return a 2530*0Sstevel@tonic-gate * ssize_t rather than ssize32_t; see the comments above read32 for details. 2531*0Sstevel@tonic-gate */ 2532*0Sstevel@tonic-gate 2533*0Sstevel@tonic-gate ssize_t 2534*0Sstevel@tonic-gate recv32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags) 2535*0Sstevel@tonic-gate { 2536*0Sstevel@tonic-gate return (recv(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags)); 2537*0Sstevel@tonic-gate } 2538*0Sstevel@tonic-gate 2539*0Sstevel@tonic-gate ssize_t 2540*0Sstevel@tonic-gate recvfrom32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags, 2541*0Sstevel@tonic-gate caddr32_t name, caddr32_t namelenp) 2542*0Sstevel@tonic-gate { 2543*0Sstevel@tonic-gate return (recvfrom(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags, 2544*0Sstevel@tonic-gate (void *)(uintptr_t)name, (void *)(uintptr_t)namelenp)); 2545*0Sstevel@tonic-gate } 2546*0Sstevel@tonic-gate 2547*0Sstevel@tonic-gate ssize_t 2548*0Sstevel@tonic-gate send32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags) 2549*0Sstevel@tonic-gate { 2550*0Sstevel@tonic-gate return (send(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags)); 2551*0Sstevel@tonic-gate } 2552*0Sstevel@tonic-gate 2553*0Sstevel@tonic-gate ssize_t 2554*0Sstevel@tonic-gate sendto32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags, 2555*0Sstevel@tonic-gate caddr32_t name, socklen_t namelen) 2556*0Sstevel@tonic-gate { 2557*0Sstevel@tonic-gate return (sendto(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags, 2558*0Sstevel@tonic-gate (void *)(uintptr_t)name, namelen)); 2559*0Sstevel@tonic-gate } 2560*0Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 2561*0Sstevel@tonic-gate 2562*0Sstevel@tonic-gate /* 2563*0Sstevel@tonic-gate * Function wrappers (mostly arround the sonode switch) for 2564*0Sstevel@tonic-gate * backward compatibility. 2565*0Sstevel@tonic-gate */ 2566*0Sstevel@tonic-gate 2567*0Sstevel@tonic-gate int 2568*0Sstevel@tonic-gate soaccept(struct sonode *so, int fflag, struct sonode **nsop) 2569*0Sstevel@tonic-gate { 2570*0Sstevel@tonic-gate return (SOP_ACCEPT(so, fflag, nsop)); 2571*0Sstevel@tonic-gate } 2572*0Sstevel@tonic-gate 2573*0Sstevel@tonic-gate int 2574*0Sstevel@tonic-gate sobind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 2575*0Sstevel@tonic-gate int backlog, int flags) 2576*0Sstevel@tonic-gate { 2577*0Sstevel@tonic-gate int error; 2578*0Sstevel@tonic-gate 2579*0Sstevel@tonic-gate error = SOP_BIND(so, name, namelen, flags); 2580*0Sstevel@tonic-gate if (error == 0 && backlog != 0) 2581*0Sstevel@tonic-gate return (SOP_LISTEN(so, backlog)); 2582*0Sstevel@tonic-gate 2583*0Sstevel@tonic-gate return (error); 2584*0Sstevel@tonic-gate } 2585*0Sstevel@tonic-gate 2586*0Sstevel@tonic-gate int 2587*0Sstevel@tonic-gate solisten(struct sonode *so, int backlog) 2588*0Sstevel@tonic-gate { 2589*0Sstevel@tonic-gate return (SOP_LISTEN(so, backlog)); 2590*0Sstevel@tonic-gate } 2591*0Sstevel@tonic-gate 2592*0Sstevel@tonic-gate int 2593*0Sstevel@tonic-gate soconnect(struct sonode *so, const struct sockaddr *name, socklen_t namelen, 2594*0Sstevel@tonic-gate int fflag, int flags) 2595*0Sstevel@tonic-gate { 2596*0Sstevel@tonic-gate return (SOP_CONNECT(so, name, namelen, fflag, flags)); 2597*0Sstevel@tonic-gate } 2598*0Sstevel@tonic-gate 2599*0Sstevel@tonic-gate int 2600*0Sstevel@tonic-gate sorecvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2601*0Sstevel@tonic-gate { 2602*0Sstevel@tonic-gate return (SOP_RECVMSG(so, msg, uiop)); 2603*0Sstevel@tonic-gate } 2604*0Sstevel@tonic-gate 2605*0Sstevel@tonic-gate int 2606*0Sstevel@tonic-gate sosendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2607*0Sstevel@tonic-gate { 2608*0Sstevel@tonic-gate return (SOP_SENDMSG(so, msg, uiop)); 2609*0Sstevel@tonic-gate } 2610*0Sstevel@tonic-gate 2611*0Sstevel@tonic-gate int 2612*0Sstevel@tonic-gate sogetpeername(struct sonode *so) 2613*0Sstevel@tonic-gate { 2614*0Sstevel@tonic-gate return (SOP_GETPEERNAME(so)); 2615*0Sstevel@tonic-gate } 2616*0Sstevel@tonic-gate 2617*0Sstevel@tonic-gate int 2618*0Sstevel@tonic-gate sogetsockname(struct sonode *so) 2619*0Sstevel@tonic-gate { 2620*0Sstevel@tonic-gate return (SOP_GETSOCKNAME(so)); 2621*0Sstevel@tonic-gate } 2622*0Sstevel@tonic-gate 2623*0Sstevel@tonic-gate int 2624*0Sstevel@tonic-gate soshutdown(struct sonode *so, int how) 2625*0Sstevel@tonic-gate { 2626*0Sstevel@tonic-gate return (SOP_SHUTDOWN(so, how)); 2627*0Sstevel@tonic-gate } 2628*0Sstevel@tonic-gate 2629*0Sstevel@tonic-gate int 2630*0Sstevel@tonic-gate sogetsockopt(struct sonode *so, int level, int option_name, void *optval, 2631*0Sstevel@tonic-gate socklen_t *optlenp, int flags) 2632*0Sstevel@tonic-gate { 2633*0Sstevel@tonic-gate return (SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, 2634*0Sstevel@tonic-gate flags)); 2635*0Sstevel@tonic-gate } 2636*0Sstevel@tonic-gate 2637*0Sstevel@tonic-gate int 2638*0Sstevel@tonic-gate sosetsockopt(struct sonode *so, int level, int option_name, const void *optval, 2639*0Sstevel@tonic-gate t_uscalar_t optlen) 2640*0Sstevel@tonic-gate { 2641*0Sstevel@tonic-gate return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen)); 2642*0Sstevel@tonic-gate } 2643*0Sstevel@tonic-gate 2644*0Sstevel@tonic-gate /* 2645*0Sstevel@tonic-gate * Because this is backward compatibility interface it only needs to be 2646*0Sstevel@tonic-gate * able to handle the creation of TPI sockfs sockets. 2647*0Sstevel@tonic-gate */ 2648*0Sstevel@tonic-gate struct sonode * 2649*0Sstevel@tonic-gate socreate(vnode_t *accessvp, int domain, int type, int protocol, int version, 2650*0Sstevel@tonic-gate struct sonode *tso, int *errorp) 2651*0Sstevel@tonic-gate { 2652*0Sstevel@tonic-gate return (sotpi_create(accessvp, domain, type, protocol, version, tso, 2653*0Sstevel@tonic-gate errorp)); 2654*0Sstevel@tonic-gate } 2655