1*8348SEric.Yu@Sun.COM /* 2*8348SEric.Yu@Sun.COM * CDDL HEADER START 3*8348SEric.Yu@Sun.COM * 4*8348SEric.Yu@Sun.COM * The contents of this file are subject to the terms of the 5*8348SEric.Yu@Sun.COM * Common Development and Distribution License (the "License"). 6*8348SEric.Yu@Sun.COM * You may not use this file except in compliance with the License. 7*8348SEric.Yu@Sun.COM * 8*8348SEric.Yu@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*8348SEric.Yu@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*8348SEric.Yu@Sun.COM * See the License for the specific language governing permissions 11*8348SEric.Yu@Sun.COM * and limitations under the License. 12*8348SEric.Yu@Sun.COM * 13*8348SEric.Yu@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*8348SEric.Yu@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*8348SEric.Yu@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*8348SEric.Yu@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*8348SEric.Yu@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*8348SEric.Yu@Sun.COM * 19*8348SEric.Yu@Sun.COM * CDDL HEADER END 20*8348SEric.Yu@Sun.COM */ 21*8348SEric.Yu@Sun.COM 22*8348SEric.Yu@Sun.COM /* 23*8348SEric.Yu@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*8348SEric.Yu@Sun.COM * Use is subject to license terms. 25*8348SEric.Yu@Sun.COM */ 26*8348SEric.Yu@Sun.COM 27*8348SEric.Yu@Sun.COM #include <sys/types.h> 28*8348SEric.Yu@Sun.COM #include <sys/param.h> 29*8348SEric.Yu@Sun.COM #include <sys/systm.h> 30*8348SEric.Yu@Sun.COM #include <sys/sysmacros.h> 31*8348SEric.Yu@Sun.COM #include <sys/debug.h> 32*8348SEric.Yu@Sun.COM #include <sys/cmn_err.h> 33*8348SEric.Yu@Sun.COM #include <sys/vfs.h> 34*8348SEric.Yu@Sun.COM #include <sys/policy.h> 35*8348SEric.Yu@Sun.COM #include <sys/modctl.h> 36*8348SEric.Yu@Sun.COM 37*8348SEric.Yu@Sun.COM #include <sys/sunddi.h> 38*8348SEric.Yu@Sun.COM 39*8348SEric.Yu@Sun.COM #include <sys/strsun.h> 40*8348SEric.Yu@Sun.COM #include <sys/stropts.h> 41*8348SEric.Yu@Sun.COM #include <sys/strsubr.h> 42*8348SEric.Yu@Sun.COM #include <sys/socket.h> 43*8348SEric.Yu@Sun.COM #include <sys/socketvar.h> 44*8348SEric.Yu@Sun.COM #include <sys/sodirect.h> 45*8348SEric.Yu@Sun.COM #include <sys/uio.h> 46*8348SEric.Yu@Sun.COM 47*8348SEric.Yu@Sun.COM #include <inet/ipclassifier.h> 48*8348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h> 49*8348SEric.Yu@Sun.COM #include <fs/sockfs/nl7c.h> 50*8348SEric.Yu@Sun.COM #include <inet/ip.h> 51*8348SEric.Yu@Sun.COM 52*8348SEric.Yu@Sun.COM extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print; 53*8348SEric.Yu@Sun.COM 54*8348SEric.Yu@Sun.COM static struct kmem_cache *sock_sod_cache; 55*8348SEric.Yu@Sun.COM 56*8348SEric.Yu@Sun.COM /* 57*8348SEric.Yu@Sun.COM * Common socket access functions. 58*8348SEric.Yu@Sun.COM * 59*8348SEric.Yu@Sun.COM * Instead of accessing the sonode switch directly (i.e., SOP_xxx()), 60*8348SEric.Yu@Sun.COM * the socket_xxx() function should be used. 61*8348SEric.Yu@Sun.COM */ 62*8348SEric.Yu@Sun.COM 63*8348SEric.Yu@Sun.COM /* 64*8348SEric.Yu@Sun.COM * Try to create a new sonode of the requested <family, type, protocol>. 65*8348SEric.Yu@Sun.COM */ 66*8348SEric.Yu@Sun.COM /* ARGSUSED */ 67*8348SEric.Yu@Sun.COM struct sonode * 68*8348SEric.Yu@Sun.COM socket_create(int family, int type, int protocol, char *devpath, char *mod, 69*8348SEric.Yu@Sun.COM int flags, int version, struct cred *cr, int *errorp) 70*8348SEric.Yu@Sun.COM { 71*8348SEric.Yu@Sun.COM struct sonode *so; 72*8348SEric.Yu@Sun.COM struct sockparams *sp = NULL; 73*8348SEric.Yu@Sun.COM 74*8348SEric.Yu@Sun.COM /* 75*8348SEric.Yu@Sun.COM * Look for a sockparams entry that match the given criteria. 76*8348SEric.Yu@Sun.COM * solookup() returns with the entry held. 77*8348SEric.Yu@Sun.COM */ 78*8348SEric.Yu@Sun.COM *errorp = solookup(family, type, protocol, &sp); 79*8348SEric.Yu@Sun.COM if (sp == NULL) { 80*8348SEric.Yu@Sun.COM int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 81*8348SEric.Yu@Sun.COM /* 82*8348SEric.Yu@Sun.COM * There is no matching sockparams entry. An ephemeral entry is 83*8348SEric.Yu@Sun.COM * created if the caller specifies a device or a socket module. 84*8348SEric.Yu@Sun.COM */ 85*8348SEric.Yu@Sun.COM if (devpath != NULL) { 86*8348SEric.Yu@Sun.COM sp = sockparams_hold_ephemeral_bydev(family, type, 87*8348SEric.Yu@Sun.COM protocol, devpath, kmflags, errorp); 88*8348SEric.Yu@Sun.COM } else if (mod != NULL) { 89*8348SEric.Yu@Sun.COM sp = sockparams_hold_ephemeral_bymod(family, type, 90*8348SEric.Yu@Sun.COM protocol, mod, kmflags, errorp); 91*8348SEric.Yu@Sun.COM } else { 92*8348SEric.Yu@Sun.COM return (NULL); 93*8348SEric.Yu@Sun.COM } 94*8348SEric.Yu@Sun.COM 95*8348SEric.Yu@Sun.COM if (sp == NULL) 96*8348SEric.Yu@Sun.COM return (NULL); 97*8348SEric.Yu@Sun.COM } 98*8348SEric.Yu@Sun.COM 99*8348SEric.Yu@Sun.COM ASSERT(sp->sp_smod_info != NULL); 100*8348SEric.Yu@Sun.COM ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP); 101*8348SEric.Yu@Sun.COM so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, 102*8348SEric.Yu@Sun.COM protocol, version, flags, errorp, cr); 103*8348SEric.Yu@Sun.COM if (so == NULL) { 104*8348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(sp); 105*8348SEric.Yu@Sun.COM } else { 106*8348SEric.Yu@Sun.COM if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) { 107*8348SEric.Yu@Sun.COM /* Cannot fail, only bumps so_count */ 108*8348SEric.Yu@Sun.COM (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 109*8348SEric.Yu@Sun.COM } else { 110*8348SEric.Yu@Sun.COM socket_destroy(so); 111*8348SEric.Yu@Sun.COM so = NULL; 112*8348SEric.Yu@Sun.COM } 113*8348SEric.Yu@Sun.COM } 114*8348SEric.Yu@Sun.COM return (so); 115*8348SEric.Yu@Sun.COM } 116*8348SEric.Yu@Sun.COM 117*8348SEric.Yu@Sun.COM struct sonode * 118*8348SEric.Yu@Sun.COM socket_newconn(struct sonode *parent, sock_lower_handle_t lh, 119*8348SEric.Yu@Sun.COM sock_downcalls_t *dc, int flags, int *errorp) 120*8348SEric.Yu@Sun.COM { 121*8348SEric.Yu@Sun.COM struct sonode *so; 122*8348SEric.Yu@Sun.COM struct sockparams *sp; 123*8348SEric.Yu@Sun.COM struct cred *cr; 124*8348SEric.Yu@Sun.COM 125*8348SEric.Yu@Sun.COM if ((cr = CRED()) == NULL) 126*8348SEric.Yu@Sun.COM cr = kcred; 127*8348SEric.Yu@Sun.COM 128*8348SEric.Yu@Sun.COM sp = parent->so_sockparams; 129*8348SEric.Yu@Sun.COM ASSERT(sp != NULL); 130*8348SEric.Yu@Sun.COM 131*8348SEric.Yu@Sun.COM so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family, 132*8348SEric.Yu@Sun.COM parent->so_type, parent->so_protocol, parent->so_version, flags, 133*8348SEric.Yu@Sun.COM errorp, cr); 134*8348SEric.Yu@Sun.COM if (so != NULL) { 135*8348SEric.Yu@Sun.COM SOCKPARAMS_INC_REF(sp); 136*8348SEric.Yu@Sun.COM 137*8348SEric.Yu@Sun.COM so->so_proto_handle = lh; 138*8348SEric.Yu@Sun.COM so->so_downcalls = dc; 139*8348SEric.Yu@Sun.COM /* 140*8348SEric.Yu@Sun.COM * This function may be called in interrupt context, and CRED() 141*8348SEric.Yu@Sun.COM * will be NULL. In this case, pass in kcred. 142*8348SEric.Yu@Sun.COM */ 143*8348SEric.Yu@Sun.COM if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) { 144*8348SEric.Yu@Sun.COM /* Cannot fail, only bumps so_count */ 145*8348SEric.Yu@Sun.COM (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 146*8348SEric.Yu@Sun.COM } else { 147*8348SEric.Yu@Sun.COM socket_destroy(so); 148*8348SEric.Yu@Sun.COM so = NULL; 149*8348SEric.Yu@Sun.COM } 150*8348SEric.Yu@Sun.COM } 151*8348SEric.Yu@Sun.COM 152*8348SEric.Yu@Sun.COM return (so); 153*8348SEric.Yu@Sun.COM } 154*8348SEric.Yu@Sun.COM 155*8348SEric.Yu@Sun.COM /* 156*8348SEric.Yu@Sun.COM * Bind local endpoint. 157*8348SEric.Yu@Sun.COM */ 158*8348SEric.Yu@Sun.COM int 159*8348SEric.Yu@Sun.COM socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 160*8348SEric.Yu@Sun.COM int flags, cred_t *cr) 161*8348SEric.Yu@Sun.COM { 162*8348SEric.Yu@Sun.COM return (SOP_BIND(so, name, namelen, flags, cr)); 163*8348SEric.Yu@Sun.COM } 164*8348SEric.Yu@Sun.COM 165*8348SEric.Yu@Sun.COM /* 166*8348SEric.Yu@Sun.COM * Turn socket into a listen socket. 167*8348SEric.Yu@Sun.COM */ 168*8348SEric.Yu@Sun.COM int 169*8348SEric.Yu@Sun.COM socket_listen(struct sonode *so, int backlog, cred_t *cr) 170*8348SEric.Yu@Sun.COM { 171*8348SEric.Yu@Sun.COM if (backlog < 0) { 172*8348SEric.Yu@Sun.COM backlog = 0; 173*8348SEric.Yu@Sun.COM } 174*8348SEric.Yu@Sun.COM 175*8348SEric.Yu@Sun.COM /* 176*8348SEric.Yu@Sun.COM * Use the same qlimit as in BSD. BSD checks the qlimit 177*8348SEric.Yu@Sun.COM * before queuing the next connection implying that a 178*8348SEric.Yu@Sun.COM * listen(sock, 0) allows one connection to be queued. 179*8348SEric.Yu@Sun.COM * BSD also uses 1.5 times the requested backlog. 180*8348SEric.Yu@Sun.COM * 181*8348SEric.Yu@Sun.COM * XNS Issue 4 required a strict interpretation of the backlog. 182*8348SEric.Yu@Sun.COM * This has been waived subsequently for Issue 4 and the change 183*8348SEric.Yu@Sun.COM * incorporated in XNS Issue 5. So we aren't required to do 184*8348SEric.Yu@Sun.COM * anything special for XPG apps. 185*8348SEric.Yu@Sun.COM */ 186*8348SEric.Yu@Sun.COM if (backlog >= (INT_MAX - 1) / 3) 187*8348SEric.Yu@Sun.COM backlog = INT_MAX; 188*8348SEric.Yu@Sun.COM else 189*8348SEric.Yu@Sun.COM backlog = backlog * 3 / 2 + 1; 190*8348SEric.Yu@Sun.COM 191*8348SEric.Yu@Sun.COM return (SOP_LISTEN(so, backlog, cr)); 192*8348SEric.Yu@Sun.COM } 193*8348SEric.Yu@Sun.COM 194*8348SEric.Yu@Sun.COM /* 195*8348SEric.Yu@Sun.COM * Accept incoming connection. 196*8348SEric.Yu@Sun.COM */ 197*8348SEric.Yu@Sun.COM int 198*8348SEric.Yu@Sun.COM socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop) 199*8348SEric.Yu@Sun.COM { 200*8348SEric.Yu@Sun.COM return (SOP_ACCEPT(lso, fflag, cr, nsop)); 201*8348SEric.Yu@Sun.COM } 202*8348SEric.Yu@Sun.COM 203*8348SEric.Yu@Sun.COM /* 204*8348SEric.Yu@Sun.COM * Active open. 205*8348SEric.Yu@Sun.COM */ 206*8348SEric.Yu@Sun.COM int 207*8348SEric.Yu@Sun.COM socket_connect(struct sonode *so, const struct sockaddr *name, 208*8348SEric.Yu@Sun.COM socklen_t namelen, int fflag, int flags, cred_t *cr) 209*8348SEric.Yu@Sun.COM { 210*8348SEric.Yu@Sun.COM int error; 211*8348SEric.Yu@Sun.COM 212*8348SEric.Yu@Sun.COM /* 213*8348SEric.Yu@Sun.COM * Handle a connect to a name parameter of type AF_UNSPEC like a 214*8348SEric.Yu@Sun.COM * connect to a null address. This is the portable method to 215*8348SEric.Yu@Sun.COM * unconnect a socket. 216*8348SEric.Yu@Sun.COM */ 217*8348SEric.Yu@Sun.COM if ((namelen >= sizeof (sa_family_t)) && 218*8348SEric.Yu@Sun.COM (name->sa_family == AF_UNSPEC)) { 219*8348SEric.Yu@Sun.COM name = NULL; 220*8348SEric.Yu@Sun.COM namelen = 0; 221*8348SEric.Yu@Sun.COM } 222*8348SEric.Yu@Sun.COM 223*8348SEric.Yu@Sun.COM error = SOP_CONNECT(so, name, namelen, fflag, flags, cr); 224*8348SEric.Yu@Sun.COM 225*8348SEric.Yu@Sun.COM if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) { 226*8348SEric.Yu@Sun.COM /* 227*8348SEric.Yu@Sun.COM * X/Open specification contains a requirement that 228*8348SEric.Yu@Sun.COM * ENETUNREACH be returned but does not require 229*8348SEric.Yu@Sun.COM * EHOSTUNREACH. In order to keep the test suite 230*8348SEric.Yu@Sun.COM * happy we mess with the errno here. 231*8348SEric.Yu@Sun.COM */ 232*8348SEric.Yu@Sun.COM error = ENETUNREACH; 233*8348SEric.Yu@Sun.COM } 234*8348SEric.Yu@Sun.COM 235*8348SEric.Yu@Sun.COM return (error); 236*8348SEric.Yu@Sun.COM } 237*8348SEric.Yu@Sun.COM 238*8348SEric.Yu@Sun.COM /* 239*8348SEric.Yu@Sun.COM * Get address of remote node. 240*8348SEric.Yu@Sun.COM */ 241*8348SEric.Yu@Sun.COM int 242*8348SEric.Yu@Sun.COM socket_getpeername(struct sonode *so, struct sockaddr *addr, 243*8348SEric.Yu@Sun.COM socklen_t *addrlen, boolean_t accept, cred_t *cr) 244*8348SEric.Yu@Sun.COM { 245*8348SEric.Yu@Sun.COM ASSERT(*addrlen > 0); 246*8348SEric.Yu@Sun.COM return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 247*8348SEric.Yu@Sun.COM 248*8348SEric.Yu@Sun.COM } 249*8348SEric.Yu@Sun.COM 250*8348SEric.Yu@Sun.COM /* 251*8348SEric.Yu@Sun.COM * Get local address. 252*8348SEric.Yu@Sun.COM */ 253*8348SEric.Yu@Sun.COM int 254*8348SEric.Yu@Sun.COM socket_getsockname(struct sonode *so, struct sockaddr *addr, 255*8348SEric.Yu@Sun.COM socklen_t *addrlen, cred_t *cr) 256*8348SEric.Yu@Sun.COM { 257*8348SEric.Yu@Sun.COM return (SOP_GETSOCKNAME(so, addr, addrlen, cr)); 258*8348SEric.Yu@Sun.COM 259*8348SEric.Yu@Sun.COM } 260*8348SEric.Yu@Sun.COM 261*8348SEric.Yu@Sun.COM /* 262*8348SEric.Yu@Sun.COM * Called from shutdown(). 263*8348SEric.Yu@Sun.COM */ 264*8348SEric.Yu@Sun.COM int 265*8348SEric.Yu@Sun.COM socket_shutdown(struct sonode *so, int how, cred_t *cr) 266*8348SEric.Yu@Sun.COM { 267*8348SEric.Yu@Sun.COM return (SOP_SHUTDOWN(so, how, cr)); 268*8348SEric.Yu@Sun.COM } 269*8348SEric.Yu@Sun.COM 270*8348SEric.Yu@Sun.COM /* 271*8348SEric.Yu@Sun.COM * Get socket options. 272*8348SEric.Yu@Sun.COM */ 273*8348SEric.Yu@Sun.COM /*ARGSUSED*/ 274*8348SEric.Yu@Sun.COM int 275*8348SEric.Yu@Sun.COM socket_getsockopt(struct sonode *so, int level, int option_name, 276*8348SEric.Yu@Sun.COM void *optval, socklen_t *optlenp, int flags, cred_t *cr) 277*8348SEric.Yu@Sun.COM { 278*8348SEric.Yu@Sun.COM return (SOP_GETSOCKOPT(so, level, option_name, optval, 279*8348SEric.Yu@Sun.COM optlenp, flags, cr)); 280*8348SEric.Yu@Sun.COM } 281*8348SEric.Yu@Sun.COM 282*8348SEric.Yu@Sun.COM /* 283*8348SEric.Yu@Sun.COM * Set socket options 284*8348SEric.Yu@Sun.COM */ 285*8348SEric.Yu@Sun.COM int 286*8348SEric.Yu@Sun.COM socket_setsockopt(struct sonode *so, int level, int option_name, 287*8348SEric.Yu@Sun.COM const void *optval, t_uscalar_t optlen, cred_t *cr) 288*8348SEric.Yu@Sun.COM { 289*8348SEric.Yu@Sun.COM /* Caller allocates aligned optval, or passes null */ 290*8348SEric.Yu@Sun.COM ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 291*8348SEric.Yu@Sun.COM /* If optval is null optlen is 0, and vice-versa */ 292*8348SEric.Yu@Sun.COM ASSERT(optval != NULL || optlen == 0); 293*8348SEric.Yu@Sun.COM ASSERT(optlen != 0 || optval == NULL); 294*8348SEric.Yu@Sun.COM 295*8348SEric.Yu@Sun.COM /* No options should be zero-length */ 296*8348SEric.Yu@Sun.COM if (optlen == 0) 297*8348SEric.Yu@Sun.COM return (EINVAL); 298*8348SEric.Yu@Sun.COM 299*8348SEric.Yu@Sun.COM return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 300*8348SEric.Yu@Sun.COM } 301*8348SEric.Yu@Sun.COM 302*8348SEric.Yu@Sun.COM int 303*8348SEric.Yu@Sun.COM socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 304*8348SEric.Yu@Sun.COM cred_t *cr) 305*8348SEric.Yu@Sun.COM { 306*8348SEric.Yu@Sun.COM int error = 0; 307*8348SEric.Yu@Sun.COM ssize_t orig_resid = uiop->uio_resid; 308*8348SEric.Yu@Sun.COM 309*8348SEric.Yu@Sun.COM /* 310*8348SEric.Yu@Sun.COM * Do not bypass the cache if we are doing a local (AF_UNIX) write. 311*8348SEric.Yu@Sun.COM */ 312*8348SEric.Yu@Sun.COM if (so->so_family == AF_UNIX) 313*8348SEric.Yu@Sun.COM uiop->uio_extflg |= UIO_COPY_CACHED; 314*8348SEric.Yu@Sun.COM else 315*8348SEric.Yu@Sun.COM uiop->uio_extflg &= ~UIO_COPY_CACHED; 316*8348SEric.Yu@Sun.COM 317*8348SEric.Yu@Sun.COM error = SOP_SENDMSG(so, msg, uiop, cr); 318*8348SEric.Yu@Sun.COM switch (error) { 319*8348SEric.Yu@Sun.COM default: 320*8348SEric.Yu@Sun.COM break; 321*8348SEric.Yu@Sun.COM case EINTR: 322*8348SEric.Yu@Sun.COM case ETIME: 323*8348SEric.Yu@Sun.COM case EWOULDBLOCK: 324*8348SEric.Yu@Sun.COM /* We did a partial send */ 325*8348SEric.Yu@Sun.COM if (uiop->uio_resid != orig_resid) 326*8348SEric.Yu@Sun.COM error = 0; 327*8348SEric.Yu@Sun.COM break; 328*8348SEric.Yu@Sun.COM case EPIPE: 329*8348SEric.Yu@Sun.COM if ((so->so_mode & SM_KERNEL) == 0) 330*8348SEric.Yu@Sun.COM tsignal(curthread, SIGPIPE); 331*8348SEric.Yu@Sun.COM break; 332*8348SEric.Yu@Sun.COM } 333*8348SEric.Yu@Sun.COM 334*8348SEric.Yu@Sun.COM return (error); 335*8348SEric.Yu@Sun.COM } 336*8348SEric.Yu@Sun.COM 337*8348SEric.Yu@Sun.COM int 338*8348SEric.Yu@Sun.COM socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 339*8348SEric.Yu@Sun.COM struct cred *cr, mblk_t **mpp) 340*8348SEric.Yu@Sun.COM { 341*8348SEric.Yu@Sun.COM int error = 0; 342*8348SEric.Yu@Sun.COM 343*8348SEric.Yu@Sun.COM error = SOP_SENDMBLK(so, msg, fflag, cr, mpp); 344*8348SEric.Yu@Sun.COM if (error == EPIPE) { 345*8348SEric.Yu@Sun.COM tsignal(curthread, SIGPIPE); 346*8348SEric.Yu@Sun.COM } 347*8348SEric.Yu@Sun.COM return (error); 348*8348SEric.Yu@Sun.COM } 349*8348SEric.Yu@Sun.COM 350*8348SEric.Yu@Sun.COM int 351*8348SEric.Yu@Sun.COM socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 352*8348SEric.Yu@Sun.COM cred_t *cr) 353*8348SEric.Yu@Sun.COM { 354*8348SEric.Yu@Sun.COM int error; 355*8348SEric.Yu@Sun.COM ssize_t orig_resid = uiop->uio_resid; 356*8348SEric.Yu@Sun.COM 357*8348SEric.Yu@Sun.COM /* 358*8348SEric.Yu@Sun.COM * Do not bypass the cache when reading data, as the application 359*8348SEric.Yu@Sun.COM * is likely to access the data shortly. 360*8348SEric.Yu@Sun.COM */ 361*8348SEric.Yu@Sun.COM uiop->uio_extflg |= UIO_COPY_CACHED; 362*8348SEric.Yu@Sun.COM 363*8348SEric.Yu@Sun.COM error = SOP_RECVMSG(so, msg, uiop, cr); 364*8348SEric.Yu@Sun.COM 365*8348SEric.Yu@Sun.COM switch (error) { 366*8348SEric.Yu@Sun.COM case EINTR: 367*8348SEric.Yu@Sun.COM case ETIME: 368*8348SEric.Yu@Sun.COM case EWOULDBLOCK: 369*8348SEric.Yu@Sun.COM /* We did a partial read */ 370*8348SEric.Yu@Sun.COM if (uiop->uio_resid != orig_resid) 371*8348SEric.Yu@Sun.COM error = 0; 372*8348SEric.Yu@Sun.COM break; 373*8348SEric.Yu@Sun.COM default: 374*8348SEric.Yu@Sun.COM break; 375*8348SEric.Yu@Sun.COM } 376*8348SEric.Yu@Sun.COM return (error); 377*8348SEric.Yu@Sun.COM } 378*8348SEric.Yu@Sun.COM 379*8348SEric.Yu@Sun.COM int 380*8348SEric.Yu@Sun.COM socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 381*8348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 382*8348SEric.Yu@Sun.COM { 383*8348SEric.Yu@Sun.COM return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 384*8348SEric.Yu@Sun.COM } 385*8348SEric.Yu@Sun.COM 386*8348SEric.Yu@Sun.COM int 387*8348SEric.Yu@Sun.COM socket_poll(struct sonode *so, short events, int anyyet, short *reventsp, 388*8348SEric.Yu@Sun.COM struct pollhead **phpp) 389*8348SEric.Yu@Sun.COM { 390*8348SEric.Yu@Sun.COM return (SOP_POLL(so, events, anyyet, reventsp, phpp)); 391*8348SEric.Yu@Sun.COM } 392*8348SEric.Yu@Sun.COM 393*8348SEric.Yu@Sun.COM int 394*8348SEric.Yu@Sun.COM socket_close(struct sonode *so, int flag, struct cred *cr) 395*8348SEric.Yu@Sun.COM { 396*8348SEric.Yu@Sun.COM return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL)); 397*8348SEric.Yu@Sun.COM } 398*8348SEric.Yu@Sun.COM 399*8348SEric.Yu@Sun.COM int 400*8348SEric.Yu@Sun.COM socket_close_internal(struct sonode *so, int flag, cred_t *cr) 401*8348SEric.Yu@Sun.COM { 402*8348SEric.Yu@Sun.COM ASSERT(so->so_count == 0); 403*8348SEric.Yu@Sun.COM 404*8348SEric.Yu@Sun.COM return (SOP_CLOSE(so, flag, cr)); 405*8348SEric.Yu@Sun.COM } 406*8348SEric.Yu@Sun.COM 407*8348SEric.Yu@Sun.COM void 408*8348SEric.Yu@Sun.COM socket_destroy(struct sonode *so) 409*8348SEric.Yu@Sun.COM { 410*8348SEric.Yu@Sun.COM vn_invalid(SOTOV(so)); 411*8348SEric.Yu@Sun.COM VN_RELE(SOTOV(so)); 412*8348SEric.Yu@Sun.COM } 413*8348SEric.Yu@Sun.COM 414*8348SEric.Yu@Sun.COM /* ARGSUSED */ 415*8348SEric.Yu@Sun.COM void 416*8348SEric.Yu@Sun.COM socket_destroy_internal(struct sonode *so, cred_t *cr) 417*8348SEric.Yu@Sun.COM { 418*8348SEric.Yu@Sun.COM struct sockparams *sp = so->so_sockparams; 419*8348SEric.Yu@Sun.COM ASSERT(so->so_count == 0 && sp != NULL); 420*8348SEric.Yu@Sun.COM 421*8348SEric.Yu@Sun.COM sp->sp_smod_info->smod_sock_destroy_func(so); 422*8348SEric.Yu@Sun.COM 423*8348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(sp); 424*8348SEric.Yu@Sun.COM } 425*8348SEric.Yu@Sun.COM 426*8348SEric.Yu@Sun.COM /* 427*8348SEric.Yu@Sun.COM * TODO Once the common vnode ops is available, then the vnops argument 428*8348SEric.Yu@Sun.COM * should be removed. 429*8348SEric.Yu@Sun.COM */ 430*8348SEric.Yu@Sun.COM /*ARGSUSED*/ 431*8348SEric.Yu@Sun.COM int 432*8348SEric.Yu@Sun.COM sonode_constructor(void *buf, void *cdrarg, int kmflags) 433*8348SEric.Yu@Sun.COM { 434*8348SEric.Yu@Sun.COM struct sonode *so = buf; 435*8348SEric.Yu@Sun.COM struct vnode *vp; 436*8348SEric.Yu@Sun.COM 437*8348SEric.Yu@Sun.COM vp = so->so_vnode = vn_alloc(kmflags); 438*8348SEric.Yu@Sun.COM if (vp == NULL) { 439*8348SEric.Yu@Sun.COM return (-1); 440*8348SEric.Yu@Sun.COM } 441*8348SEric.Yu@Sun.COM vp->v_data = so; 442*8348SEric.Yu@Sun.COM vn_setops(vp, socket_vnodeops); 443*8348SEric.Yu@Sun.COM 444*8348SEric.Yu@Sun.COM so->so_priv = NULL; 445*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 446*8348SEric.Yu@Sun.COM 447*8348SEric.Yu@Sun.COM so->so_proto_handle = NULL; 448*8348SEric.Yu@Sun.COM 449*8348SEric.Yu@Sun.COM so->so_peercred = NULL; 450*8348SEric.Yu@Sun.COM 451*8348SEric.Yu@Sun.COM so->so_rcv_queued = 0; 452*8348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 453*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 454*8348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 455*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 456*8348SEric.Yu@Sun.COM so->so_rcv_wanted = 0; 457*8348SEric.Yu@Sun.COM so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER; 458*8348SEric.Yu@Sun.COM so->so_rcv_timer_tid = 0; 459*8348SEric.Yu@Sun.COM so->so_rcv_thresh = 0; 460*8348SEric.Yu@Sun.COM 461*8348SEric.Yu@Sun.COM so->so_acceptq_head = NULL; 462*8348SEric.Yu@Sun.COM so->so_acceptq_tail = &so->so_acceptq_head; 463*8348SEric.Yu@Sun.COM so->so_acceptq_next = NULL; 464*8348SEric.Yu@Sun.COM so->so_acceptq_len = 0; 465*8348SEric.Yu@Sun.COM so->so_backlog = 0; 466*8348SEric.Yu@Sun.COM 467*8348SEric.Yu@Sun.COM so->so_snd_qfull = B_FALSE; 468*8348SEric.Yu@Sun.COM 469*8348SEric.Yu@Sun.COM mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 470*8348SEric.Yu@Sun.COM mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL); 471*8348SEric.Yu@Sun.COM rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL); 472*8348SEric.Yu@Sun.COM cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 473*8348SEric.Yu@Sun.COM cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 474*8348SEric.Yu@Sun.COM 475*8348SEric.Yu@Sun.COM cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL); 476*8348SEric.Yu@Sun.COM cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL); 477*8348SEric.Yu@Sun.COM cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL); 478*8348SEric.Yu@Sun.COM cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL); 479*8348SEric.Yu@Sun.COM cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL); 480*8348SEric.Yu@Sun.COM 481*8348SEric.Yu@Sun.COM return (0); 482*8348SEric.Yu@Sun.COM } 483*8348SEric.Yu@Sun.COM 484*8348SEric.Yu@Sun.COM /*ARGSUSED*/ 485*8348SEric.Yu@Sun.COM void 486*8348SEric.Yu@Sun.COM sonode_destructor(void *buf, void *cdrarg) 487*8348SEric.Yu@Sun.COM { 488*8348SEric.Yu@Sun.COM struct sonode *so = buf; 489*8348SEric.Yu@Sun.COM struct vnode *vp = SOTOV(so); 490*8348SEric.Yu@Sun.COM 491*8348SEric.Yu@Sun.COM ASSERT(so->so_priv == NULL); 492*8348SEric.Yu@Sun.COM ASSERT(so->so_peercred == NULL); 493*8348SEric.Yu@Sun.COM 494*8348SEric.Yu@Sun.COM ASSERT(so->so_oobmsg == NULL); 495*8348SEric.Yu@Sun.COM 496*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_head == NULL); 497*8348SEric.Yu@Sun.COM 498*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_head == NULL); 499*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_tail == &so->so_acceptq_head); 500*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_next == NULL); 501*8348SEric.Yu@Sun.COM 502*8348SEric.Yu@Sun.COM ASSERT(vp->v_data == so); 503*8348SEric.Yu@Sun.COM ASSERT(vn_matchops(vp, socket_vnodeops)); 504*8348SEric.Yu@Sun.COM 505*8348SEric.Yu@Sun.COM vn_free(vp); 506*8348SEric.Yu@Sun.COM 507*8348SEric.Yu@Sun.COM mutex_destroy(&so->so_lock); 508*8348SEric.Yu@Sun.COM mutex_destroy(&so->so_acceptq_lock); 509*8348SEric.Yu@Sun.COM rw_destroy(&so->so_fallback_rwlock); 510*8348SEric.Yu@Sun.COM 511*8348SEric.Yu@Sun.COM cv_destroy(&so->so_state_cv); 512*8348SEric.Yu@Sun.COM cv_destroy(&so->so_want_cv); 513*8348SEric.Yu@Sun.COM cv_destroy(&so->so_acceptq_cv); 514*8348SEric.Yu@Sun.COM cv_destroy(&so->so_snd_cv); 515*8348SEric.Yu@Sun.COM cv_destroy(&so->so_rcv_cv); 516*8348SEric.Yu@Sun.COM cv_destroy(&so->so_closing_cv); 517*8348SEric.Yu@Sun.COM } 518*8348SEric.Yu@Sun.COM 519*8348SEric.Yu@Sun.COM void 520*8348SEric.Yu@Sun.COM sonode_init(struct sonode *so, struct sockparams *sp, int family, 521*8348SEric.Yu@Sun.COM int type, int protocol, sonodeops_t *sops) 522*8348SEric.Yu@Sun.COM { 523*8348SEric.Yu@Sun.COM vnode_t *vp; 524*8348SEric.Yu@Sun.COM 525*8348SEric.Yu@Sun.COM vp = SOTOV(so); 526*8348SEric.Yu@Sun.COM 527*8348SEric.Yu@Sun.COM so->so_flag = 0; 528*8348SEric.Yu@Sun.COM 529*8348SEric.Yu@Sun.COM so->so_state = 0; 530*8348SEric.Yu@Sun.COM so->so_mode = 0; 531*8348SEric.Yu@Sun.COM 532*8348SEric.Yu@Sun.COM so->so_count = 0; 533*8348SEric.Yu@Sun.COM 534*8348SEric.Yu@Sun.COM so->so_family = family; 535*8348SEric.Yu@Sun.COM so->so_type = type; 536*8348SEric.Yu@Sun.COM so->so_protocol = protocol; 537*8348SEric.Yu@Sun.COM 538*8348SEric.Yu@Sun.COM SOCK_CONNID_INIT(so->so_proto_connid); 539*8348SEric.Yu@Sun.COM 540*8348SEric.Yu@Sun.COM so->so_options = 0; 541*8348SEric.Yu@Sun.COM so->so_linger.l_onoff = 0; 542*8348SEric.Yu@Sun.COM so->so_linger.l_linger = 0; 543*8348SEric.Yu@Sun.COM so->so_sndbuf = 0; 544*8348SEric.Yu@Sun.COM so->so_error = 0; 545*8348SEric.Yu@Sun.COM so->so_rcvtimeo = 0; 546*8348SEric.Yu@Sun.COM so->so_sndtimeo = 0; 547*8348SEric.Yu@Sun.COM 548*8348SEric.Yu@Sun.COM ASSERT(so->so_oobmsg == NULL); 549*8348SEric.Yu@Sun.COM so->so_oobmark = 0; 550*8348SEric.Yu@Sun.COM so->so_pgrp = 0; 551*8348SEric.Yu@Sun.COM 552*8348SEric.Yu@Sun.COM ASSERT(so->so_peercred == NULL); 553*8348SEric.Yu@Sun.COM 554*8348SEric.Yu@Sun.COM so->so_zoneid = getzoneid(); 555*8348SEric.Yu@Sun.COM 556*8348SEric.Yu@Sun.COM so->so_sockparams = sp; 557*8348SEric.Yu@Sun.COM 558*8348SEric.Yu@Sun.COM so->so_ops = sops; 559*8348SEric.Yu@Sun.COM 560*8348SEric.Yu@Sun.COM so->so_proto_handle = NULL; 561*8348SEric.Yu@Sun.COM 562*8348SEric.Yu@Sun.COM so->so_downcalls = NULL; 563*8348SEric.Yu@Sun.COM 564*8348SEric.Yu@Sun.COM so->so_copyflag = 0; 565*8348SEric.Yu@Sun.COM 566*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_head == NULL); 567*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_tail == &so->so_acceptq_head); 568*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_next == NULL); 569*8348SEric.Yu@Sun.COM 570*8348SEric.Yu@Sun.COM vn_reinit(vp); 571*8348SEric.Yu@Sun.COM vp->v_vfsp = rootvfs; 572*8348SEric.Yu@Sun.COM vp->v_type = VSOCK; 573*8348SEric.Yu@Sun.COM vp->v_rdev = sockdev; 574*8348SEric.Yu@Sun.COM 575*8348SEric.Yu@Sun.COM so->so_rcv_queued = 0; 576*8348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 577*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 578*8348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 579*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 580*8348SEric.Yu@Sun.COM 581*8348SEric.Yu@Sun.COM so->so_snd_qfull = B_FALSE; 582*8348SEric.Yu@Sun.COM so->so_minpsz = 0; 583*8348SEric.Yu@Sun.COM 584*8348SEric.Yu@Sun.COM so->so_rcv_wakeup = B_FALSE; 585*8348SEric.Yu@Sun.COM so->so_snd_wakeup = B_FALSE; 586*8348SEric.Yu@Sun.COM so->so_flowctrld = B_FALSE; 587*8348SEric.Yu@Sun.COM 588*8348SEric.Yu@Sun.COM so->so_pollev = 0; 589*8348SEric.Yu@Sun.COM bzero(&so->so_poll_list, sizeof (so->so_poll_list)); 590*8348SEric.Yu@Sun.COM bzero(&so->so_proto_props, sizeof (struct sock_proto_props)); 591*8348SEric.Yu@Sun.COM 592*8348SEric.Yu@Sun.COM bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t)); 593*8348SEric.Yu@Sun.COM so->so_ksock_cb_arg = NULL; 594*8348SEric.Yu@Sun.COM 595*8348SEric.Yu@Sun.COM so->so_max_addr_len = sizeof (struct sockaddr_storage); 596*8348SEric.Yu@Sun.COM 597*8348SEric.Yu@Sun.COM so->so_direct = NULL; 598*8348SEric.Yu@Sun.COM 599*8348SEric.Yu@Sun.COM vn_exists(vp); 600*8348SEric.Yu@Sun.COM } 601*8348SEric.Yu@Sun.COM 602*8348SEric.Yu@Sun.COM void 603*8348SEric.Yu@Sun.COM sonode_fini(struct sonode *so) 604*8348SEric.Yu@Sun.COM { 605*8348SEric.Yu@Sun.COM mblk_t *mp; 606*8348SEric.Yu@Sun.COM vnode_t *vp; 607*8348SEric.Yu@Sun.COM 608*8348SEric.Yu@Sun.COM ASSERT(so->so_count == 0); 609*8348SEric.Yu@Sun.COM 610*8348SEric.Yu@Sun.COM if (so->so_rcv_timer_tid) { 611*8348SEric.Yu@Sun.COM ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 612*8348SEric.Yu@Sun.COM (void) untimeout(so->so_rcv_timer_tid); 613*8348SEric.Yu@Sun.COM so->so_rcv_timer_tid = 0; 614*8348SEric.Yu@Sun.COM } 615*8348SEric.Yu@Sun.COM 616*8348SEric.Yu@Sun.COM so_acceptq_flush(so); 617*8348SEric.Yu@Sun.COM 618*8348SEric.Yu@Sun.COM #ifdef DEBUG 619*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 620*8348SEric.Yu@Sun.COM ASSERT(so_verify_oobstate(so)); 621*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 622*8348SEric.Yu@Sun.COM #endif /* DEBUG */ 623*8348SEric.Yu@Sun.COM if ((mp = so->so_oobmsg) != NULL) { 624*8348SEric.Yu@Sun.COM freemsg(mp); 625*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 626*8348SEric.Yu@Sun.COM so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA| 627*8348SEric.Yu@Sun.COM SS_RCVATMARK); 628*8348SEric.Yu@Sun.COM } 629*8348SEric.Yu@Sun.COM 630*8348SEric.Yu@Sun.COM if (so->so_poll_list.ph_list != NULL) { 631*8348SEric.Yu@Sun.COM pollwakeup(&so->so_poll_list, POLLERR); 632*8348SEric.Yu@Sun.COM pollhead_clean(&so->so_poll_list); 633*8348SEric.Yu@Sun.COM } 634*8348SEric.Yu@Sun.COM 635*8348SEric.Yu@Sun.COM if (so->so_direct != NULL) { 636*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 637*8348SEric.Yu@Sun.COM 638*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_uioafh == NULL); 639*8348SEric.Yu@Sun.COM 640*8348SEric.Yu@Sun.COM so->so_direct = NULL; 641*8348SEric.Yu@Sun.COM kmem_cache_free(sock_sod_cache, sodp); 642*8348SEric.Yu@Sun.COM } 643*8348SEric.Yu@Sun.COM 644*8348SEric.Yu@Sun.COM vp = SOTOV(so); 645*8348SEric.Yu@Sun.COM vn_invalid(vp); 646*8348SEric.Yu@Sun.COM 647*8348SEric.Yu@Sun.COM if (so->so_peercred != NULL) { 648*8348SEric.Yu@Sun.COM crfree(so->so_peercred); 649*8348SEric.Yu@Sun.COM so->so_peercred = NULL; 650*8348SEric.Yu@Sun.COM } 651*8348SEric.Yu@Sun.COM } 652*8348SEric.Yu@Sun.COM 653*8348SEric.Yu@Sun.COM /* 654*8348SEric.Yu@Sun.COM * This function is called at the beginning of recvmsg(). 655*8348SEric.Yu@Sun.COM * 656*8348SEric.Yu@Sun.COM * If I/OAT is enabled on this sonode, initialize the uioa state machine 657*8348SEric.Yu@Sun.COM * with state UIOA_ALLOC. 658*8348SEric.Yu@Sun.COM */ 659*8348SEric.Yu@Sun.COM uio_t * 660*8348SEric.Yu@Sun.COM sod_rcv_init(struct sonode *so, int flags, struct uio **uiopp) 661*8348SEric.Yu@Sun.COM { 662*8348SEric.Yu@Sun.COM struct uio *suiop; 663*8348SEric.Yu@Sun.COM struct uio *uiop; 664*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 665*8348SEric.Yu@Sun.COM 666*8348SEric.Yu@Sun.COM if (sodp == NULL) 667*8348SEric.Yu@Sun.COM return (NULL); 668*8348SEric.Yu@Sun.COM 669*8348SEric.Yu@Sun.COM suiop = NULL; 670*8348SEric.Yu@Sun.COM uiop = *uiopp; 671*8348SEric.Yu@Sun.COM 672*8348SEric.Yu@Sun.COM mutex_enter(sodp->sod_lockp); 673*8348SEric.Yu@Sun.COM if (uiop->uio_resid >= uioasync.mincnt && 674*8348SEric.Yu@Sun.COM sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 675*8348SEric.Yu@Sun.COM uioasync.enabled && !(flags & MSG_PEEK) && 676*8348SEric.Yu@Sun.COM !(so->so_state & SS_CANTRCVMORE)) { 677*8348SEric.Yu@Sun.COM /* 678*8348SEric.Yu@Sun.COM * Big enough I/O for uioa min setup and an sodirect socket 679*8348SEric.Yu@Sun.COM * and sodirect enabled and uioa enabled and I/O will be done 680*8348SEric.Yu@Sun.COM * and not EOF so initialize the sodirect_t uioa_t with "uiop". 681*8348SEric.Yu@Sun.COM */ 682*8348SEric.Yu@Sun.COM if (!uioainit(uiop, &sodp->sod_uioa)) { 683*8348SEric.Yu@Sun.COM /* 684*8348SEric.Yu@Sun.COM * Successful uioainit() so the uio_t part of the 685*8348SEric.Yu@Sun.COM * uioa_t will be used for all uio_t work to follow, 686*8348SEric.Yu@Sun.COM * we return the original "uiop" in "suiop". 687*8348SEric.Yu@Sun.COM */ 688*8348SEric.Yu@Sun.COM suiop = uiop; 689*8348SEric.Yu@Sun.COM *uiopp = (uio_t *)&sodp->sod_uioa; 690*8348SEric.Yu@Sun.COM /* 691*8348SEric.Yu@Sun.COM * Before returning to the caller the passed in uio_t 692*8348SEric.Yu@Sun.COM * "uiop" will be updated via a call to uioafini() 693*8348SEric.Yu@Sun.COM * below. 694*8348SEric.Yu@Sun.COM * 695*8348SEric.Yu@Sun.COM * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 696*8348SEric.Yu@Sun.COM * here as first we have to uioamove() any currently 697*8348SEric.Yu@Sun.COM * queued M_DATA mblk_t(s) so it will be done later. 698*8348SEric.Yu@Sun.COM */ 699*8348SEric.Yu@Sun.COM } 700*8348SEric.Yu@Sun.COM /* 701*8348SEric.Yu@Sun.COM * In either uioainit() success or not case note the number 702*8348SEric.Yu@Sun.COM * of uio bytes the caller wants for sod framework and/or 703*8348SEric.Yu@Sun.COM * transport (e.g. TCP) strategy. 704*8348SEric.Yu@Sun.COM */ 705*8348SEric.Yu@Sun.COM sodp->sod_want = uiop->uio_resid; 706*8348SEric.Yu@Sun.COM } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 707*8348SEric.Yu@Sun.COM /* 708*8348SEric.Yu@Sun.COM * No uioa but still using sodirect so note the number of 709*8348SEric.Yu@Sun.COM * uio bytes the caller wants for sodirect framework and/or 710*8348SEric.Yu@Sun.COM * transport (e.g. TCP) strategy. 711*8348SEric.Yu@Sun.COM */ 712*8348SEric.Yu@Sun.COM sodp->sod_want = uiop->uio_resid; 713*8348SEric.Yu@Sun.COM } 714*8348SEric.Yu@Sun.COM mutex_exit(sodp->sod_lockp); 715*8348SEric.Yu@Sun.COM 716*8348SEric.Yu@Sun.COM return (suiop); 717*8348SEric.Yu@Sun.COM } 718*8348SEric.Yu@Sun.COM 719*8348SEric.Yu@Sun.COM /* 720*8348SEric.Yu@Sun.COM * This function is called at the end of recvmsg(), it finializes all the I/OAT 721*8348SEric.Yu@Sun.COM * operations, and reset the uioa state to UIOA_ALLOC. 722*8348SEric.Yu@Sun.COM */ 723*8348SEric.Yu@Sun.COM int 724*8348SEric.Yu@Sun.COM sod_rcv_done(struct sonode *so, struct uio *suiop, struct uio *uiop) 725*8348SEric.Yu@Sun.COM { 726*8348SEric.Yu@Sun.COM int error = 0; 727*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 728*8348SEric.Yu@Sun.COM mblk_t *mp; 729*8348SEric.Yu@Sun.COM 730*8348SEric.Yu@Sun.COM if (sodp == NULL) { 731*8348SEric.Yu@Sun.COM return (0); 732*8348SEric.Yu@Sun.COM } 733*8348SEric.Yu@Sun.COM 734*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(sodp->sod_lockp)); 735*8348SEric.Yu@Sun.COM /* Finish any sodirect and uioa processing */ 736*8348SEric.Yu@Sun.COM if (suiop != NULL) { 737*8348SEric.Yu@Sun.COM /* Finish any uioa_t processing */ 738*8348SEric.Yu@Sun.COM 739*8348SEric.Yu@Sun.COM ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 740*8348SEric.Yu@Sun.COM error = uioafini(suiop, (uioa_t *)uiop); 741*8348SEric.Yu@Sun.COM if ((mp = sodp->sod_uioafh) != NULL) { 742*8348SEric.Yu@Sun.COM sodp->sod_uioafh = NULL; 743*8348SEric.Yu@Sun.COM sodp->sod_uioaft = NULL; 744*8348SEric.Yu@Sun.COM freemsg(mp); 745*8348SEric.Yu@Sun.COM } 746*8348SEric.Yu@Sun.COM } 747*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_uioafh == NULL); 748*8348SEric.Yu@Sun.COM if (!(sodp->sod_state & SOD_WAKE_NOT)) { 749*8348SEric.Yu@Sun.COM /* Awoke */ 750*8348SEric.Yu@Sun.COM sodp->sod_state &= SOD_WAKE_CLR; 751*8348SEric.Yu@Sun.COM sodp->sod_state |= SOD_WAKE_NOT; 752*8348SEric.Yu@Sun.COM } 753*8348SEric.Yu@Sun.COM /* Last, clear sod_want value */ 754*8348SEric.Yu@Sun.COM sodp->sod_want = 0; 755*8348SEric.Yu@Sun.COM 756*8348SEric.Yu@Sun.COM return (error); 757*8348SEric.Yu@Sun.COM } 758*8348SEric.Yu@Sun.COM 759*8348SEric.Yu@Sun.COM /* 760*8348SEric.Yu@Sun.COM * Schedule a uioamove() on a mblk. This is ususally called from 761*8348SEric.Yu@Sun.COM * protocols (e.g. TCP) on a I/OAT enabled sonode. 762*8348SEric.Yu@Sun.COM */ 763*8348SEric.Yu@Sun.COM mblk_t * 764*8348SEric.Yu@Sun.COM sod_uioa_mblk_init(struct sodirect_s *sodp, mblk_t *mp, size_t msg_size) 765*8348SEric.Yu@Sun.COM { 766*8348SEric.Yu@Sun.COM uioa_t *uioap = &sodp->sod_uioa; 767*8348SEric.Yu@Sun.COM mblk_t *mp1 = mp; 768*8348SEric.Yu@Sun.COM mblk_t *lmp = NULL; 769*8348SEric.Yu@Sun.COM 770*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(mp) == M_DATA); 771*8348SEric.Yu@Sun.COM ASSERT(msg_size == msgdsize(mp)); 772*8348SEric.Yu@Sun.COM 773*8348SEric.Yu@Sun.COM /* Caller must have lock held */ 774*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(sodp->sod_lockp)); 775*8348SEric.Yu@Sun.COM 776*8348SEric.Yu@Sun.COM if (uioap->uioa_state & UIOA_ENABLED) { 777*8348SEric.Yu@Sun.COM /* Uioa is enabled */ 778*8348SEric.Yu@Sun.COM 779*8348SEric.Yu@Sun.COM if (msg_size > uioap->uio_resid) { 780*8348SEric.Yu@Sun.COM /* 781*8348SEric.Yu@Sun.COM * There isn't enough uio space for the mblk_t chain 782*8348SEric.Yu@Sun.COM * so disable uioa such that this and any additional 783*8348SEric.Yu@Sun.COM * mblk_t data is handled by the socket and schedule 784*8348SEric.Yu@Sun.COM * the socket for wakeup to finish this uioa. 785*8348SEric.Yu@Sun.COM */ 786*8348SEric.Yu@Sun.COM uioap->uioa_state &= UIOA_CLR; 787*8348SEric.Yu@Sun.COM uioap->uioa_state |= UIOA_FINI; 788*8348SEric.Yu@Sun.COM if (sodp->sod_state & SOD_WAKE_NOT) { 789*8348SEric.Yu@Sun.COM sodp->sod_state &= SOD_WAKE_CLR; 790*8348SEric.Yu@Sun.COM sodp->sod_state |= SOD_WAKE_NEED; 791*8348SEric.Yu@Sun.COM } 792*8348SEric.Yu@Sun.COM return (mp); 793*8348SEric.Yu@Sun.COM } 794*8348SEric.Yu@Sun.COM do { 795*8348SEric.Yu@Sun.COM uint32_t len = MBLKL(mp1); 796*8348SEric.Yu@Sun.COM 797*8348SEric.Yu@Sun.COM if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) { 798*8348SEric.Yu@Sun.COM /* Scheduled, mark dblk_t as such */ 799*8348SEric.Yu@Sun.COM DB_FLAGS(mp1) |= DBLK_UIOA; 800*8348SEric.Yu@Sun.COM } else { 801*8348SEric.Yu@Sun.COM /* Error, turn off async processing */ 802*8348SEric.Yu@Sun.COM uioap->uioa_state &= UIOA_CLR; 803*8348SEric.Yu@Sun.COM uioap->uioa_state |= UIOA_FINI; 804*8348SEric.Yu@Sun.COM break; 805*8348SEric.Yu@Sun.COM } 806*8348SEric.Yu@Sun.COM lmp = mp1; 807*8348SEric.Yu@Sun.COM } while ((mp1 = mp1->b_cont) != NULL); 808*8348SEric.Yu@Sun.COM 809*8348SEric.Yu@Sun.COM if (mp1 != NULL || uioap->uio_resid == 0) { 810*8348SEric.Yu@Sun.COM /* 811*8348SEric.Yu@Sun.COM * Not all mblk_t(s) uioamoved (error) or all uio 812*8348SEric.Yu@Sun.COM * space has been consumed so schedule the socket 813*8348SEric.Yu@Sun.COM * for wakeup to finish this uio. 814*8348SEric.Yu@Sun.COM */ 815*8348SEric.Yu@Sun.COM sodp->sod_state &= SOD_WAKE_CLR; 816*8348SEric.Yu@Sun.COM sodp->sod_state |= SOD_WAKE_NEED; 817*8348SEric.Yu@Sun.COM 818*8348SEric.Yu@Sun.COM /* Break the mblk chain if neccessary. */ 819*8348SEric.Yu@Sun.COM if (mp1 != NULL && lmp != NULL) { 820*8348SEric.Yu@Sun.COM mp->b_next = mp1; 821*8348SEric.Yu@Sun.COM lmp->b_cont = NULL; 822*8348SEric.Yu@Sun.COM } 823*8348SEric.Yu@Sun.COM } 824*8348SEric.Yu@Sun.COM } 825*8348SEric.Yu@Sun.COM return (mp1); 826*8348SEric.Yu@Sun.COM } 827*8348SEric.Yu@Sun.COM 828*8348SEric.Yu@Sun.COM /* 829*8348SEric.Yu@Sun.COM * This function is called on a mblk that thas been successfully uioamoved(). 830*8348SEric.Yu@Sun.COM */ 831*8348SEric.Yu@Sun.COM void 832*8348SEric.Yu@Sun.COM sod_uioa_mblk_done(sodirect_t *sodp, mblk_t *bp) 833*8348SEric.Yu@Sun.COM { 834*8348SEric.Yu@Sun.COM if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) { 835*8348SEric.Yu@Sun.COM /* 836*8348SEric.Yu@Sun.COM * A uioa flaged mblk_t chain, already uio processed, 837*8348SEric.Yu@Sun.COM * add it to the sodirect uioa pending free list. 838*8348SEric.Yu@Sun.COM * 839*8348SEric.Yu@Sun.COM * Note, a b_cont chain headed by a DBLK_UIOA enable 840*8348SEric.Yu@Sun.COM * mblk_t must have all mblk_t(s) DBLK_UIOA enabled. 841*8348SEric.Yu@Sun.COM */ 842*8348SEric.Yu@Sun.COM mblk_t *bpt = sodp->sod_uioaft; 843*8348SEric.Yu@Sun.COM 844*8348SEric.Yu@Sun.COM ASSERT(sodp != NULL); 845*8348SEric.Yu@Sun.COM 846*8348SEric.Yu@Sun.COM /* 847*8348SEric.Yu@Sun.COM * Add first mblk_t of "bp" chain to current sodirect uioa 848*8348SEric.Yu@Sun.COM * free list tail mblk_t, if any, else empty list so new head. 849*8348SEric.Yu@Sun.COM */ 850*8348SEric.Yu@Sun.COM if (bpt == NULL) 851*8348SEric.Yu@Sun.COM sodp->sod_uioafh = bp; 852*8348SEric.Yu@Sun.COM else 853*8348SEric.Yu@Sun.COM bpt->b_cont = bp; 854*8348SEric.Yu@Sun.COM 855*8348SEric.Yu@Sun.COM /* 856*8348SEric.Yu@Sun.COM * Walk mblk_t "bp" chain to find tail and adjust rptr of 857*8348SEric.Yu@Sun.COM * each to reflect that uioamove() has consumed all data. 858*8348SEric.Yu@Sun.COM */ 859*8348SEric.Yu@Sun.COM bpt = bp; 860*8348SEric.Yu@Sun.COM for (;;) { 861*8348SEric.Yu@Sun.COM ASSERT(bpt->b_datap->db_flags & DBLK_UIOA); 862*8348SEric.Yu@Sun.COM 863*8348SEric.Yu@Sun.COM bpt->b_rptr = bpt->b_wptr; 864*8348SEric.Yu@Sun.COM if (bpt->b_cont == NULL) 865*8348SEric.Yu@Sun.COM break; 866*8348SEric.Yu@Sun.COM bpt = bpt->b_cont; 867*8348SEric.Yu@Sun.COM } 868*8348SEric.Yu@Sun.COM /* New sodirect uioa free list tail */ 869*8348SEric.Yu@Sun.COM sodp->sod_uioaft = bpt; 870*8348SEric.Yu@Sun.COM 871*8348SEric.Yu@Sun.COM /* Only dequeue once with data returned per uioa_t */ 872*8348SEric.Yu@Sun.COM if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) { 873*8348SEric.Yu@Sun.COM sodp->sod_uioa.uioa_state &= UIOA_CLR; 874*8348SEric.Yu@Sun.COM sodp->sod_uioa.uioa_state |= UIOA_FINI; 875*8348SEric.Yu@Sun.COM } 876*8348SEric.Yu@Sun.COM } 877*8348SEric.Yu@Sun.COM } 878*8348SEric.Yu@Sun.COM 879*8348SEric.Yu@Sun.COM /* 880*8348SEric.Yu@Sun.COM * When transit from UIOA_INIT state to UIOA_ENABLE state in recvmsg(), call 881*8348SEric.Yu@Sun.COM * this function on a non-STREAMS socket to schedule uioamove() on the data 882*8348SEric.Yu@Sun.COM * that has already queued in this socket. 883*8348SEric.Yu@Sun.COM */ 884*8348SEric.Yu@Sun.COM void 885*8348SEric.Yu@Sun.COM sod_uioa_so_init(struct sonode *so, struct sodirect_s *sodp, struct uio *uiop) 886*8348SEric.Yu@Sun.COM { 887*8348SEric.Yu@Sun.COM uioa_t *uioap = (uioa_t *)uiop; 888*8348SEric.Yu@Sun.COM mblk_t *lbp; 889*8348SEric.Yu@Sun.COM mblk_t *wbp; 890*8348SEric.Yu@Sun.COM mblk_t *bp; 891*8348SEric.Yu@Sun.COM int len; 892*8348SEric.Yu@Sun.COM int error; 893*8348SEric.Yu@Sun.COM boolean_t in_rcv_q = B_TRUE; 894*8348SEric.Yu@Sun.COM 895*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(sodp->sod_lockp)); 896*8348SEric.Yu@Sun.COM ASSERT(&sodp->sod_uioa == uioap); 897*8348SEric.Yu@Sun.COM 898*8348SEric.Yu@Sun.COM /* 899*8348SEric.Yu@Sun.COM * Walk first b_cont chain in sod_q 900*8348SEric.Yu@Sun.COM * and schedule any M_DATA mblk_t's for uio asynchronous move. 901*8348SEric.Yu@Sun.COM */ 902*8348SEric.Yu@Sun.COM bp = so->so_rcv_q_head; 903*8348SEric.Yu@Sun.COM 904*8348SEric.Yu@Sun.COM again: 905*8348SEric.Yu@Sun.COM /* Walk the chain */ 906*8348SEric.Yu@Sun.COM lbp = NULL; 907*8348SEric.Yu@Sun.COM wbp = bp; 908*8348SEric.Yu@Sun.COM 909*8348SEric.Yu@Sun.COM do { 910*8348SEric.Yu@Sun.COM if (bp == NULL) 911*8348SEric.Yu@Sun.COM break; 912*8348SEric.Yu@Sun.COM 913*8348SEric.Yu@Sun.COM if (wbp->b_datap->db_type != M_DATA) { 914*8348SEric.Yu@Sun.COM /* Not M_DATA, no more uioa */ 915*8348SEric.Yu@Sun.COM goto nouioa; 916*8348SEric.Yu@Sun.COM } 917*8348SEric.Yu@Sun.COM if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { 918*8348SEric.Yu@Sun.COM /* Have a M_DATA mblk_t with data */ 919*8348SEric.Yu@Sun.COM if (len > uioap->uio_resid || (so->so_oobmark > 0 && 920*8348SEric.Yu@Sun.COM len + uioap->uioa_mbytes >= so->so_oobmark)) { 921*8348SEric.Yu@Sun.COM /* Not enough uio sapce, or beyond oobmark */ 922*8348SEric.Yu@Sun.COM goto nouioa; 923*8348SEric.Yu@Sun.COM } 924*8348SEric.Yu@Sun.COM ASSERT(!(wbp->b_datap->db_flags & DBLK_UIOA)); 925*8348SEric.Yu@Sun.COM error = uioamove(wbp->b_rptr, len, 926*8348SEric.Yu@Sun.COM UIO_READ, uioap); 927*8348SEric.Yu@Sun.COM if (!error) { 928*8348SEric.Yu@Sun.COM /* Scheduled, mark dblk_t as such */ 929*8348SEric.Yu@Sun.COM wbp->b_datap->db_flags |= DBLK_UIOA; 930*8348SEric.Yu@Sun.COM } else { 931*8348SEric.Yu@Sun.COM /* Break the mblk chain */ 932*8348SEric.Yu@Sun.COM goto nouioa; 933*8348SEric.Yu@Sun.COM } 934*8348SEric.Yu@Sun.COM } 935*8348SEric.Yu@Sun.COM /* Save last wbp processed */ 936*8348SEric.Yu@Sun.COM lbp = wbp; 937*8348SEric.Yu@Sun.COM } while ((wbp = wbp->b_cont) != NULL); 938*8348SEric.Yu@Sun.COM 939*8348SEric.Yu@Sun.COM if (in_rcv_q && (bp == NULL || bp->b_next == NULL)) { 940*8348SEric.Yu@Sun.COM /* 941*8348SEric.Yu@Sun.COM * We get here only once to process the sonode dump area 942*8348SEric.Yu@Sun.COM * if so_rcv_q_head is NULL or all the mblks have been 943*8348SEric.Yu@Sun.COM * successfully uioamoved()ed. 944*8348SEric.Yu@Sun.COM */ 945*8348SEric.Yu@Sun.COM in_rcv_q = B_FALSE; 946*8348SEric.Yu@Sun.COM 947*8348SEric.Yu@Sun.COM /* move to dump area */ 948*8348SEric.Yu@Sun.COM bp = so->so_rcv_head; 949*8348SEric.Yu@Sun.COM goto again; 950*8348SEric.Yu@Sun.COM } 951*8348SEric.Yu@Sun.COM 952*8348SEric.Yu@Sun.COM return; 953*8348SEric.Yu@Sun.COM 954*8348SEric.Yu@Sun.COM nouioa: 955*8348SEric.Yu@Sun.COM /* No more uioa */ 956*8348SEric.Yu@Sun.COM uioap->uioa_state &= UIOA_CLR; 957*8348SEric.Yu@Sun.COM uioap->uioa_state |= UIOA_FINI; 958*8348SEric.Yu@Sun.COM 959*8348SEric.Yu@Sun.COM /* 960*8348SEric.Yu@Sun.COM * If we processed 1 or more mblk_t(s) then we need to split the 961*8348SEric.Yu@Sun.COM * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) 962*8348SEric.Yu@Sun.COM * are in the current chain and the rest are in the following new 963*8348SEric.Yu@Sun.COM * chain. 964*8348SEric.Yu@Sun.COM */ 965*8348SEric.Yu@Sun.COM if (lbp != NULL) { 966*8348SEric.Yu@Sun.COM /* New end of current chain */ 967*8348SEric.Yu@Sun.COM lbp->b_cont = NULL; 968*8348SEric.Yu@Sun.COM 969*8348SEric.Yu@Sun.COM /* Insert new chain wbp after bp */ 970*8348SEric.Yu@Sun.COM if ((wbp->b_next = bp->b_next) == NULL) { 971*8348SEric.Yu@Sun.COM /* 972*8348SEric.Yu@Sun.COM * No need to grab so_lock, since sod_lockp 973*8348SEric.Yu@Sun.COM * points to so_lock. 974*8348SEric.Yu@Sun.COM */ 975*8348SEric.Yu@Sun.COM if (in_rcv_q) 976*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = wbp; 977*8348SEric.Yu@Sun.COM else 978*8348SEric.Yu@Sun.COM so->so_rcv_last_head = wbp; 979*8348SEric.Yu@Sun.COM } 980*8348SEric.Yu@Sun.COM bp->b_next = wbp; 981*8348SEric.Yu@Sun.COM bp->b_next->b_prev = bp->b_prev; 982*8348SEric.Yu@Sun.COM bp->b_prev = lbp; 983*8348SEric.Yu@Sun.COM } 984*8348SEric.Yu@Sun.COM } 985*8348SEric.Yu@Sun.COM 986*8348SEric.Yu@Sun.COM /* 987*8348SEric.Yu@Sun.COM * Initialize sodirect data structures on a socket. 988*8348SEric.Yu@Sun.COM */ 989*8348SEric.Yu@Sun.COM void 990*8348SEric.Yu@Sun.COM sod_sock_init(struct sonode *so, struct stdata *stp, sod_enq_func enq_func, 991*8348SEric.Yu@Sun.COM sod_wakeup_func wake_func, kmutex_t *lockp) 992*8348SEric.Yu@Sun.COM { 993*8348SEric.Yu@Sun.COM sodirect_t *sodp; 994*8348SEric.Yu@Sun.COM 995*8348SEric.Yu@Sun.COM ASSERT(so->so_direct == NULL); 996*8348SEric.Yu@Sun.COM 997*8348SEric.Yu@Sun.COM so->so_state |= SS_SODIRECT; 998*8348SEric.Yu@Sun.COM 999*8348SEric.Yu@Sun.COM sodp = kmem_cache_alloc(sock_sod_cache, KM_SLEEP); 1000*8348SEric.Yu@Sun.COM sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; 1001*8348SEric.Yu@Sun.COM sodp->sod_want = 0; 1002*8348SEric.Yu@Sun.COM sodp->sod_q = (stp != NULL) ? RD(stp->sd_wrq) : NULL; 1003*8348SEric.Yu@Sun.COM sodp->sod_enqueue = enq_func; 1004*8348SEric.Yu@Sun.COM sodp->sod_wakeup = wake_func; 1005*8348SEric.Yu@Sun.COM sodp->sod_uioafh = NULL; 1006*8348SEric.Yu@Sun.COM sodp->sod_uioaft = NULL; 1007*8348SEric.Yu@Sun.COM sodp->sod_lockp = lockp; 1008*8348SEric.Yu@Sun.COM /* 1009*8348SEric.Yu@Sun.COM * Remainder of the sod_uioa members are left uninitialized 1010*8348SEric.Yu@Sun.COM * but will be initialized later by uioainit() before uioa 1011*8348SEric.Yu@Sun.COM * is enabled. 1012*8348SEric.Yu@Sun.COM */ 1013*8348SEric.Yu@Sun.COM sodp->sod_uioa.uioa_state = UIOA_ALLOC; 1014*8348SEric.Yu@Sun.COM so->so_direct = sodp; 1015*8348SEric.Yu@Sun.COM if (stp != NULL) 1016*8348SEric.Yu@Sun.COM stp->sd_sodirect = sodp; 1017*8348SEric.Yu@Sun.COM } 1018*8348SEric.Yu@Sun.COM 1019*8348SEric.Yu@Sun.COM /* 1020*8348SEric.Yu@Sun.COM * Init the sodirect kmem cache while sockfs is loading. 1021*8348SEric.Yu@Sun.COM */ 1022*8348SEric.Yu@Sun.COM void 1023*8348SEric.Yu@Sun.COM sod_init() 1024*8348SEric.Yu@Sun.COM { 1025*8348SEric.Yu@Sun.COM /* Allocate sodirect_t kmem_cache */ 1026*8348SEric.Yu@Sun.COM sock_sod_cache = kmem_cache_create("sock_sod_cache", 1027*8348SEric.Yu@Sun.COM sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1028*8348SEric.Yu@Sun.COM } 1029*8348SEric.Yu@Sun.COM 1030*8348SEric.Yu@Sun.COM ssize_t 1031*8348SEric.Yu@Sun.COM sod_uioa_mblk(struct sonode *so, mblk_t *mp) 1032*8348SEric.Yu@Sun.COM { 1033*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 1034*8348SEric.Yu@Sun.COM 1035*8348SEric.Yu@Sun.COM ASSERT(sodp != NULL); 1036*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(sodp->sod_lockp)); 1037*8348SEric.Yu@Sun.COM 1038*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_state & SOD_ENABLED); 1039*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_uioa.uioa_state != (UIOA_ALLOC|UIOA_INIT)); 1040*8348SEric.Yu@Sun.COM 1041*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_uioa.uioa_state & (UIOA_ENABLED|UIOA_FINI)); 1042*8348SEric.Yu@Sun.COM 1043*8348SEric.Yu@Sun.COM if (mp == NULL && so->so_rcv_q_head != NULL) { 1044*8348SEric.Yu@Sun.COM mp = so->so_rcv_q_head; 1045*8348SEric.Yu@Sun.COM ASSERT(mp->b_prev != NULL); 1046*8348SEric.Yu@Sun.COM mp->b_prev = NULL; 1047*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp->b_next; 1048*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head == NULL) { 1049*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 1050*8348SEric.Yu@Sun.COM } 1051*8348SEric.Yu@Sun.COM mp->b_next = NULL; 1052*8348SEric.Yu@Sun.COM } 1053*8348SEric.Yu@Sun.COM 1054*8348SEric.Yu@Sun.COM sod_uioa_mblk_done(sodp, mp); 1055*8348SEric.Yu@Sun.COM 1056*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head == NULL && so->so_rcv_head != NULL && 1057*8348SEric.Yu@Sun.COM DB_TYPE(so->so_rcv_head) == M_DATA && 1058*8348SEric.Yu@Sun.COM (DB_FLAGS(so->so_rcv_head) & DBLK_UIOA)) { 1059*8348SEric.Yu@Sun.COM /* more arrived */ 1060*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_head == NULL); 1061*8348SEric.Yu@Sun.COM mp = so->so_rcv_head; 1062*8348SEric.Yu@Sun.COM so->so_rcv_head = mp->b_next; 1063*8348SEric.Yu@Sun.COM if (so->so_rcv_head == NULL) 1064*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 1065*8348SEric.Yu@Sun.COM mp->b_prev = mp->b_next = NULL; 1066*8348SEric.Yu@Sun.COM sod_uioa_mblk_done(sodp, mp); 1067*8348SEric.Yu@Sun.COM } 1068*8348SEric.Yu@Sun.COM 1069*8348SEric.Yu@Sun.COM #ifdef DEBUG 1070*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head != NULL) { 1071*8348SEric.Yu@Sun.COM mblk_t *m = so->so_rcv_q_head; 1072*8348SEric.Yu@Sun.COM while (m != NULL) { 1073*8348SEric.Yu@Sun.COM if (DB_FLAGS(m) & DBLK_UIOA) { 1074*8348SEric.Yu@Sun.COM cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 1075*8348SEric.Yu@Sun.COM " in so_rcv_q_head.\n", (void *)m); 1076*8348SEric.Yu@Sun.COM } 1077*8348SEric.Yu@Sun.COM m = m->b_next; 1078*8348SEric.Yu@Sun.COM } 1079*8348SEric.Yu@Sun.COM } 1080*8348SEric.Yu@Sun.COM if (so->so_rcv_head != NULL) { 1081*8348SEric.Yu@Sun.COM mblk_t *m = so->so_rcv_head; 1082*8348SEric.Yu@Sun.COM while (m != NULL) { 1083*8348SEric.Yu@Sun.COM if (DB_FLAGS(m) & DBLK_UIOA) { 1084*8348SEric.Yu@Sun.COM cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 1085*8348SEric.Yu@Sun.COM " in so_rcv_head.\n", (void *)m); 1086*8348SEric.Yu@Sun.COM } 1087*8348SEric.Yu@Sun.COM m = m->b_next; 1088*8348SEric.Yu@Sun.COM } 1089*8348SEric.Yu@Sun.COM } 1090*8348SEric.Yu@Sun.COM #endif 1091*8348SEric.Yu@Sun.COM return (sodp->sod_uioa.uioa_mbytes); 1092*8348SEric.Yu@Sun.COM } 1093