1*8348SEric.Yu@Sun.COM /* 2*8348SEric.Yu@Sun.COM * CDDL HEADER START 3*8348SEric.Yu@Sun.COM * 4*8348SEric.Yu@Sun.COM * The contents of this file are subject to the terms of the 5*8348SEric.Yu@Sun.COM * Common Development and Distribution License (the "License"). 6*8348SEric.Yu@Sun.COM * You may not use this file except in compliance with the License. 7*8348SEric.Yu@Sun.COM * 8*8348SEric.Yu@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*8348SEric.Yu@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*8348SEric.Yu@Sun.COM * See the License for the specific language governing permissions 11*8348SEric.Yu@Sun.COM * and limitations under the License. 12*8348SEric.Yu@Sun.COM * 13*8348SEric.Yu@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*8348SEric.Yu@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*8348SEric.Yu@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*8348SEric.Yu@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*8348SEric.Yu@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*8348SEric.Yu@Sun.COM * 19*8348SEric.Yu@Sun.COM * CDDL HEADER END 20*8348SEric.Yu@Sun.COM */ 21*8348SEric.Yu@Sun.COM 22*8348SEric.Yu@Sun.COM /* 23*8348SEric.Yu@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*8348SEric.Yu@Sun.COM * Use is subject to license terms. 25*8348SEric.Yu@Sun.COM */ 26*8348SEric.Yu@Sun.COM 27*8348SEric.Yu@Sun.COM #include <sys/types.h> 28*8348SEric.Yu@Sun.COM #include <sys/param.h> 29*8348SEric.Yu@Sun.COM #include <sys/signal.h> 30*8348SEric.Yu@Sun.COM #include <sys/cmn_err.h> 31*8348SEric.Yu@Sun.COM 32*8348SEric.Yu@Sun.COM #include <sys/stropts.h> 33*8348SEric.Yu@Sun.COM #include <sys/socket.h> 34*8348SEric.Yu@Sun.COM #include <sys/socketvar.h> 35*8348SEric.Yu@Sun.COM #include <sys/sockio.h> 36*8348SEric.Yu@Sun.COM #include <sys/sodirect.h> 37*8348SEric.Yu@Sun.COM #include <sys/strsubr.h> 38*8348SEric.Yu@Sun.COM #include <sys/strsun.h> 39*8348SEric.Yu@Sun.COM #include <sys/atomic.h> 40*8348SEric.Yu@Sun.COM 41*8348SEric.Yu@Sun.COM #include <fs/sockfs/sockcommon.h> 42*8348SEric.Yu@Sun.COM #include <fs/sockfs/socktpi.h> 43*8348SEric.Yu@Sun.COM #include <sys/ddi.h> 44*8348SEric.Yu@Sun.COM #include <inet/ip.h> 45*8348SEric.Yu@Sun.COM #include <sys/time.h> 46*8348SEric.Yu@Sun.COM #include <sys/cmn_err.h> 47*8348SEric.Yu@Sun.COM 48*8348SEric.Yu@Sun.COM #ifdef SOCK_TEST 49*8348SEric.Yu@Sun.COM extern int do_useracc; 50*8348SEric.Yu@Sun.COM extern clock_t sock_test_timelimit; 51*8348SEric.Yu@Sun.COM #endif /* SOCK_TEST */ 52*8348SEric.Yu@Sun.COM 53*8348SEric.Yu@Sun.COM #define MBLK_PULL_LEN 64 54*8348SEric.Yu@Sun.COM uint32_t so_mblk_pull_len = MBLK_PULL_LEN; 55*8348SEric.Yu@Sun.COM 56*8348SEric.Yu@Sun.COM #ifdef DEBUG 57*8348SEric.Yu@Sun.COM boolean_t so_debug_length = B_FALSE; 58*8348SEric.Yu@Sun.COM static boolean_t so_check_length(sonode_t *so); 59*8348SEric.Yu@Sun.COM #endif 60*8348SEric.Yu@Sun.COM 61*8348SEric.Yu@Sun.COM int 62*8348SEric.Yu@Sun.COM so_acceptq_enqueue_locked(struct sonode *so, struct sonode *nso) 63*8348SEric.Yu@Sun.COM { 64*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_acceptq_lock)); 65*8348SEric.Yu@Sun.COM ASSERT(nso->so_acceptq_next == NULL); 66*8348SEric.Yu@Sun.COM 67*8348SEric.Yu@Sun.COM *so->so_acceptq_tail = nso; 68*8348SEric.Yu@Sun.COM so->so_acceptq_tail = &nso->so_acceptq_next; 69*8348SEric.Yu@Sun.COM so->so_acceptq_len++; 70*8348SEric.Yu@Sun.COM 71*8348SEric.Yu@Sun.COM /* Wakeup a single consumer */ 72*8348SEric.Yu@Sun.COM cv_signal(&so->so_acceptq_cv); 73*8348SEric.Yu@Sun.COM 74*8348SEric.Yu@Sun.COM return (so->so_acceptq_len); 75*8348SEric.Yu@Sun.COM } 76*8348SEric.Yu@Sun.COM 77*8348SEric.Yu@Sun.COM /* 78*8348SEric.Yu@Sun.COM * int so_acceptq_enqueue(struct sonode *so, struct sonode *nso) 79*8348SEric.Yu@Sun.COM * 80*8348SEric.Yu@Sun.COM * Enqueue an incoming connection on a listening socket. 81*8348SEric.Yu@Sun.COM * 82*8348SEric.Yu@Sun.COM * Arguments: 83*8348SEric.Yu@Sun.COM * so - listening socket 84*8348SEric.Yu@Sun.COM * nso - new connection 85*8348SEric.Yu@Sun.COM * 86*8348SEric.Yu@Sun.COM * Returns: 87*8348SEric.Yu@Sun.COM * Number of queued connections, including the new connection 88*8348SEric.Yu@Sun.COM */ 89*8348SEric.Yu@Sun.COM int 90*8348SEric.Yu@Sun.COM so_acceptq_enqueue(struct sonode *so, struct sonode *nso) 91*8348SEric.Yu@Sun.COM { 92*8348SEric.Yu@Sun.COM int conns; 93*8348SEric.Yu@Sun.COM 94*8348SEric.Yu@Sun.COM mutex_enter(&so->so_acceptq_lock); 95*8348SEric.Yu@Sun.COM conns = so_acceptq_enqueue_locked(so, nso); 96*8348SEric.Yu@Sun.COM mutex_exit(&so->so_acceptq_lock); 97*8348SEric.Yu@Sun.COM 98*8348SEric.Yu@Sun.COM return (conns); 99*8348SEric.Yu@Sun.COM } 100*8348SEric.Yu@Sun.COM 101*8348SEric.Yu@Sun.COM static int 102*8348SEric.Yu@Sun.COM so_acceptq_dequeue_locked(struct sonode *so, boolean_t dontblock, 103*8348SEric.Yu@Sun.COM struct sonode **nsop) 104*8348SEric.Yu@Sun.COM { 105*8348SEric.Yu@Sun.COM struct sonode *nso = NULL; 106*8348SEric.Yu@Sun.COM 107*8348SEric.Yu@Sun.COM *nsop = NULL; 108*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_acceptq_lock)); 109*8348SEric.Yu@Sun.COM while ((nso = so->so_acceptq_head) == NULL) { 110*8348SEric.Yu@Sun.COM /* 111*8348SEric.Yu@Sun.COM * No need to check so_error here, because it is not 112*8348SEric.Yu@Sun.COM * possible for a listening socket to be reset or otherwise 113*8348SEric.Yu@Sun.COM * disconnected. 114*8348SEric.Yu@Sun.COM * 115*8348SEric.Yu@Sun.COM * So now we just need check if it's ok to wait. 116*8348SEric.Yu@Sun.COM */ 117*8348SEric.Yu@Sun.COM if (dontblock) 118*8348SEric.Yu@Sun.COM return (EWOULDBLOCK); 119*8348SEric.Yu@Sun.COM if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 120*8348SEric.Yu@Sun.COM return (EINTR); 121*8348SEric.Yu@Sun.COM 122*8348SEric.Yu@Sun.COM if (cv_wait_sig_swap(&so->so_acceptq_cv, 123*8348SEric.Yu@Sun.COM &so->so_acceptq_lock) == 0) 124*8348SEric.Yu@Sun.COM return (EINTR); 125*8348SEric.Yu@Sun.COM } 126*8348SEric.Yu@Sun.COM 127*8348SEric.Yu@Sun.COM ASSERT(nso != NULL); 128*8348SEric.Yu@Sun.COM so->so_acceptq_head = nso->so_acceptq_next; 129*8348SEric.Yu@Sun.COM nso->so_acceptq_next = NULL; 130*8348SEric.Yu@Sun.COM 131*8348SEric.Yu@Sun.COM if (so->so_acceptq_head == NULL) { 132*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_tail == &nso->so_acceptq_next); 133*8348SEric.Yu@Sun.COM so->so_acceptq_tail = &so->so_acceptq_head; 134*8348SEric.Yu@Sun.COM } 135*8348SEric.Yu@Sun.COM ASSERT(so->so_acceptq_len > 0); 136*8348SEric.Yu@Sun.COM --so->so_acceptq_len; 137*8348SEric.Yu@Sun.COM 138*8348SEric.Yu@Sun.COM *nsop = nso; 139*8348SEric.Yu@Sun.COM 140*8348SEric.Yu@Sun.COM return (0); 141*8348SEric.Yu@Sun.COM } 142*8348SEric.Yu@Sun.COM 143*8348SEric.Yu@Sun.COM /* 144*8348SEric.Yu@Sun.COM * int so_acceptq_dequeue(struct sonode *, boolean_t, struct sonode **) 145*8348SEric.Yu@Sun.COM * 146*8348SEric.Yu@Sun.COM * Pulls a connection off of the accept queue. 147*8348SEric.Yu@Sun.COM * 148*8348SEric.Yu@Sun.COM * Arguments: 149*8348SEric.Yu@Sun.COM * so - listening socket 150*8348SEric.Yu@Sun.COM * dontblock - indicate whether it's ok to sleep if there are no 151*8348SEric.Yu@Sun.COM * connections on the queue 152*8348SEric.Yu@Sun.COM * nsop - Value-return argument 153*8348SEric.Yu@Sun.COM * 154*8348SEric.Yu@Sun.COM * Return values: 155*8348SEric.Yu@Sun.COM * 0 when a connection is successfully dequeued, in which case nsop 156*8348SEric.Yu@Sun.COM * is set to point to the new connection. Upon failure a non-zero 157*8348SEric.Yu@Sun.COM * value is returned, and the value of nsop is set to NULL. 158*8348SEric.Yu@Sun.COM * 159*8348SEric.Yu@Sun.COM * Note: 160*8348SEric.Yu@Sun.COM * so_acceptq_dequeue() may return prematurly if the socket is falling 161*8348SEric.Yu@Sun.COM * back to TPI. 162*8348SEric.Yu@Sun.COM */ 163*8348SEric.Yu@Sun.COM int 164*8348SEric.Yu@Sun.COM so_acceptq_dequeue(struct sonode *so, boolean_t dontblock, 165*8348SEric.Yu@Sun.COM struct sonode **nsop) 166*8348SEric.Yu@Sun.COM { 167*8348SEric.Yu@Sun.COM int error; 168*8348SEric.Yu@Sun.COM 169*8348SEric.Yu@Sun.COM mutex_enter(&so->so_acceptq_lock); 170*8348SEric.Yu@Sun.COM error = so_acceptq_dequeue_locked(so, dontblock, nsop); 171*8348SEric.Yu@Sun.COM mutex_exit(&so->so_acceptq_lock); 172*8348SEric.Yu@Sun.COM 173*8348SEric.Yu@Sun.COM return (error); 174*8348SEric.Yu@Sun.COM } 175*8348SEric.Yu@Sun.COM 176*8348SEric.Yu@Sun.COM /* 177*8348SEric.Yu@Sun.COM * void so_acceptq_flush(struct sonode *so) 178*8348SEric.Yu@Sun.COM * 179*8348SEric.Yu@Sun.COM * Removes all pending connections from a listening socket, and 180*8348SEric.Yu@Sun.COM * frees the associated resources. 181*8348SEric.Yu@Sun.COM * 182*8348SEric.Yu@Sun.COM * Arguments 183*8348SEric.Yu@Sun.COM * so - listening socket 184*8348SEric.Yu@Sun.COM * 185*8348SEric.Yu@Sun.COM * Return values: 186*8348SEric.Yu@Sun.COM * None. 187*8348SEric.Yu@Sun.COM * 188*8348SEric.Yu@Sun.COM * Note: 189*8348SEric.Yu@Sun.COM * The caller has to ensure that no calls to so_acceptq_enqueue() or 190*8348SEric.Yu@Sun.COM * so_acceptq_dequeue() occur while the accept queue is being flushed. 191*8348SEric.Yu@Sun.COM * So either the socket needs to be in a state where no operations 192*8348SEric.Yu@Sun.COM * would come in, or so_lock needs to be obtained. 193*8348SEric.Yu@Sun.COM */ 194*8348SEric.Yu@Sun.COM void 195*8348SEric.Yu@Sun.COM so_acceptq_flush(struct sonode *so) 196*8348SEric.Yu@Sun.COM { 197*8348SEric.Yu@Sun.COM struct sonode *nso; 198*8348SEric.Yu@Sun.COM 199*8348SEric.Yu@Sun.COM nso = so->so_acceptq_head; 200*8348SEric.Yu@Sun.COM 201*8348SEric.Yu@Sun.COM while (nso != NULL) { 202*8348SEric.Yu@Sun.COM struct sonode *nnso = NULL; 203*8348SEric.Yu@Sun.COM 204*8348SEric.Yu@Sun.COM nnso = nso->so_acceptq_next; 205*8348SEric.Yu@Sun.COM nso->so_acceptq_next = NULL; 206*8348SEric.Yu@Sun.COM /* 207*8348SEric.Yu@Sun.COM * Since the socket is on the accept queue, there can 208*8348SEric.Yu@Sun.COM * only be one reference. We drop the reference and 209*8348SEric.Yu@Sun.COM * just blow off the socket. 210*8348SEric.Yu@Sun.COM */ 211*8348SEric.Yu@Sun.COM ASSERT(nso->so_count == 1); 212*8348SEric.Yu@Sun.COM nso->so_count--; 213*8348SEric.Yu@Sun.COM socket_destroy(nso); 214*8348SEric.Yu@Sun.COM nso = nnso; 215*8348SEric.Yu@Sun.COM } 216*8348SEric.Yu@Sun.COM 217*8348SEric.Yu@Sun.COM so->so_acceptq_head = NULL; 218*8348SEric.Yu@Sun.COM so->so_acceptq_tail = &so->so_acceptq_head; 219*8348SEric.Yu@Sun.COM so->so_acceptq_len = 0; 220*8348SEric.Yu@Sun.COM } 221*8348SEric.Yu@Sun.COM 222*8348SEric.Yu@Sun.COM int 223*8348SEric.Yu@Sun.COM so_wait_connected_locked(struct sonode *so, boolean_t nonblock, 224*8348SEric.Yu@Sun.COM sock_connid_t id) 225*8348SEric.Yu@Sun.COM { 226*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 227*8348SEric.Yu@Sun.COM 228*8348SEric.Yu@Sun.COM /* 229*8348SEric.Yu@Sun.COM * The protocol has notified us that a connection attempt is being 230*8348SEric.Yu@Sun.COM * made, so before we wait for a notification to arrive we must 231*8348SEric.Yu@Sun.COM * clear out any errors associated with earlier connection attempts. 232*8348SEric.Yu@Sun.COM */ 233*8348SEric.Yu@Sun.COM if (so->so_error != 0 && SOCK_CONNID_LT(so->so_proto_connid, id)) 234*8348SEric.Yu@Sun.COM so->so_error = 0; 235*8348SEric.Yu@Sun.COM 236*8348SEric.Yu@Sun.COM while (SOCK_CONNID_LT(so->so_proto_connid, id)) { 237*8348SEric.Yu@Sun.COM if (nonblock) 238*8348SEric.Yu@Sun.COM return (EINPROGRESS); 239*8348SEric.Yu@Sun.COM 240*8348SEric.Yu@Sun.COM if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 241*8348SEric.Yu@Sun.COM return (EINTR); 242*8348SEric.Yu@Sun.COM 243*8348SEric.Yu@Sun.COM if (cv_wait_sig_swap(&so->so_state_cv, &so->so_lock) == 0) 244*8348SEric.Yu@Sun.COM return (EINTR); 245*8348SEric.Yu@Sun.COM } 246*8348SEric.Yu@Sun.COM 247*8348SEric.Yu@Sun.COM if (so->so_error != 0) 248*8348SEric.Yu@Sun.COM return (sogeterr(so, B_TRUE)); 249*8348SEric.Yu@Sun.COM /* 250*8348SEric.Yu@Sun.COM * Under normal circumstances, so_error should contain an error 251*8348SEric.Yu@Sun.COM * in case the connect failed. However, it is possible for another 252*8348SEric.Yu@Sun.COM * thread to come in a consume the error, so generate a sensible 253*8348SEric.Yu@Sun.COM * error in that case. 254*8348SEric.Yu@Sun.COM */ 255*8348SEric.Yu@Sun.COM if ((so->so_state & SS_ISCONNECTED) == 0) 256*8348SEric.Yu@Sun.COM return (ECONNREFUSED); 257*8348SEric.Yu@Sun.COM 258*8348SEric.Yu@Sun.COM return (0); 259*8348SEric.Yu@Sun.COM } 260*8348SEric.Yu@Sun.COM 261*8348SEric.Yu@Sun.COM /* 262*8348SEric.Yu@Sun.COM * int so_wait_connected(struct sonode *so, boolean_t nonblock, 263*8348SEric.Yu@Sun.COM * sock_connid_t id) 264*8348SEric.Yu@Sun.COM * 265*8348SEric.Yu@Sun.COM * Wait until the socket is connected or an error has occured. 266*8348SEric.Yu@Sun.COM * 267*8348SEric.Yu@Sun.COM * Arguments: 268*8348SEric.Yu@Sun.COM * so - socket 269*8348SEric.Yu@Sun.COM * nonblock - indicate whether it's ok to sleep if the connection has 270*8348SEric.Yu@Sun.COM * not yet been established 271*8348SEric.Yu@Sun.COM * gen - generation number that was returned by the protocol 272*8348SEric.Yu@Sun.COM * when the operation was started 273*8348SEric.Yu@Sun.COM * 274*8348SEric.Yu@Sun.COM * Returns: 275*8348SEric.Yu@Sun.COM * 0 if the connection attempt was successful, or an error indicating why 276*8348SEric.Yu@Sun.COM * the connection attempt failed. 277*8348SEric.Yu@Sun.COM */ 278*8348SEric.Yu@Sun.COM int 279*8348SEric.Yu@Sun.COM so_wait_connected(struct sonode *so, boolean_t nonblock, sock_connid_t id) 280*8348SEric.Yu@Sun.COM { 281*8348SEric.Yu@Sun.COM int error; 282*8348SEric.Yu@Sun.COM 283*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 284*8348SEric.Yu@Sun.COM error = so_wait_connected_locked(so, nonblock, id); 285*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 286*8348SEric.Yu@Sun.COM 287*8348SEric.Yu@Sun.COM return (error); 288*8348SEric.Yu@Sun.COM } 289*8348SEric.Yu@Sun.COM 290*8348SEric.Yu@Sun.COM int 291*8348SEric.Yu@Sun.COM so_snd_wait_qnotfull_locked(struct sonode *so, boolean_t dontblock) 292*8348SEric.Yu@Sun.COM { 293*8348SEric.Yu@Sun.COM int error; 294*8348SEric.Yu@Sun.COM 295*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 296*8348SEric.Yu@Sun.COM while (so->so_snd_qfull) { 297*8348SEric.Yu@Sun.COM if (so->so_state & SS_CANTSENDMORE) 298*8348SEric.Yu@Sun.COM return (EPIPE); 299*8348SEric.Yu@Sun.COM if (dontblock) 300*8348SEric.Yu@Sun.COM return (EWOULDBLOCK); 301*8348SEric.Yu@Sun.COM 302*8348SEric.Yu@Sun.COM if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING)) 303*8348SEric.Yu@Sun.COM return (EINTR); 304*8348SEric.Yu@Sun.COM 305*8348SEric.Yu@Sun.COM if (so->so_sndtimeo == 0) { 306*8348SEric.Yu@Sun.COM /* 307*8348SEric.Yu@Sun.COM * Zero means disable timeout. 308*8348SEric.Yu@Sun.COM */ 309*8348SEric.Yu@Sun.COM error = cv_wait_sig(&so->so_snd_cv, &so->so_lock); 310*8348SEric.Yu@Sun.COM } else { 311*8348SEric.Yu@Sun.COM clock_t now; 312*8348SEric.Yu@Sun.COM 313*8348SEric.Yu@Sun.COM time_to_wait(&now, so->so_sndtimeo); 314*8348SEric.Yu@Sun.COM error = cv_timedwait_sig(&so->so_snd_cv, &so->so_lock, 315*8348SEric.Yu@Sun.COM now); 316*8348SEric.Yu@Sun.COM } 317*8348SEric.Yu@Sun.COM if (error == 0) 318*8348SEric.Yu@Sun.COM return (EINTR); 319*8348SEric.Yu@Sun.COM else if (error == -1) 320*8348SEric.Yu@Sun.COM return (ETIME); 321*8348SEric.Yu@Sun.COM } 322*8348SEric.Yu@Sun.COM return (0); 323*8348SEric.Yu@Sun.COM } 324*8348SEric.Yu@Sun.COM 325*8348SEric.Yu@Sun.COM /* 326*8348SEric.Yu@Sun.COM * int so_wait_sendbuf(struct sonode *so, boolean_t dontblock) 327*8348SEric.Yu@Sun.COM * 328*8348SEric.Yu@Sun.COM * Wait for the transport to notify us about send buffers becoming 329*8348SEric.Yu@Sun.COM * available. 330*8348SEric.Yu@Sun.COM */ 331*8348SEric.Yu@Sun.COM int 332*8348SEric.Yu@Sun.COM so_snd_wait_qnotfull(struct sonode *so, boolean_t dontblock) 333*8348SEric.Yu@Sun.COM { 334*8348SEric.Yu@Sun.COM int error = 0; 335*8348SEric.Yu@Sun.COM 336*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 337*8348SEric.Yu@Sun.COM if (so->so_snd_qfull) { 338*8348SEric.Yu@Sun.COM so->so_snd_wakeup = B_TRUE; 339*8348SEric.Yu@Sun.COM error = so_snd_wait_qnotfull_locked(so, dontblock); 340*8348SEric.Yu@Sun.COM so->so_snd_wakeup = B_FALSE; 341*8348SEric.Yu@Sun.COM } 342*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 343*8348SEric.Yu@Sun.COM 344*8348SEric.Yu@Sun.COM return (error); 345*8348SEric.Yu@Sun.COM } 346*8348SEric.Yu@Sun.COM 347*8348SEric.Yu@Sun.COM void 348*8348SEric.Yu@Sun.COM so_snd_qfull(struct sonode *so) 349*8348SEric.Yu@Sun.COM { 350*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 351*8348SEric.Yu@Sun.COM so->so_snd_qfull = B_TRUE; 352*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 353*8348SEric.Yu@Sun.COM } 354*8348SEric.Yu@Sun.COM 355*8348SEric.Yu@Sun.COM void 356*8348SEric.Yu@Sun.COM so_snd_qnotfull(struct sonode *so) 357*8348SEric.Yu@Sun.COM { 358*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 359*8348SEric.Yu@Sun.COM so->so_snd_qfull = B_FALSE; 360*8348SEric.Yu@Sun.COM /* wake up everyone waiting for buffers */ 361*8348SEric.Yu@Sun.COM cv_broadcast(&so->so_snd_cv); 362*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 363*8348SEric.Yu@Sun.COM } 364*8348SEric.Yu@Sun.COM 365*8348SEric.Yu@Sun.COM /* 366*8348SEric.Yu@Sun.COM * Change the process/process group to which SIGIO is sent. 367*8348SEric.Yu@Sun.COM */ 368*8348SEric.Yu@Sun.COM int 369*8348SEric.Yu@Sun.COM socket_chgpgrp(struct sonode *so, pid_t pid) 370*8348SEric.Yu@Sun.COM { 371*8348SEric.Yu@Sun.COM int error; 372*8348SEric.Yu@Sun.COM 373*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 374*8348SEric.Yu@Sun.COM if (pid != 0) { 375*8348SEric.Yu@Sun.COM /* 376*8348SEric.Yu@Sun.COM * Permissions check by sending signal 0. 377*8348SEric.Yu@Sun.COM * Note that when kill fails it does a 378*8348SEric.Yu@Sun.COM * set_errno causing the system call to fail. 379*8348SEric.Yu@Sun.COM */ 380*8348SEric.Yu@Sun.COM error = kill(pid, 0); 381*8348SEric.Yu@Sun.COM if (error != 0) { 382*8348SEric.Yu@Sun.COM return (error); 383*8348SEric.Yu@Sun.COM } 384*8348SEric.Yu@Sun.COM } 385*8348SEric.Yu@Sun.COM so->so_pgrp = pid; 386*8348SEric.Yu@Sun.COM return (0); 387*8348SEric.Yu@Sun.COM } 388*8348SEric.Yu@Sun.COM 389*8348SEric.Yu@Sun.COM 390*8348SEric.Yu@Sun.COM /* 391*8348SEric.Yu@Sun.COM * Generate a SIGIO, for 'writable' events include siginfo structure, 392*8348SEric.Yu@Sun.COM * for read events just send the signal. 393*8348SEric.Yu@Sun.COM */ 394*8348SEric.Yu@Sun.COM /*ARGSUSED*/ 395*8348SEric.Yu@Sun.COM static void 396*8348SEric.Yu@Sun.COM socket_sigproc(proc_t *proc, int event) 397*8348SEric.Yu@Sun.COM { 398*8348SEric.Yu@Sun.COM k_siginfo_t info; 399*8348SEric.Yu@Sun.COM 400*8348SEric.Yu@Sun.COM ASSERT(event & (SOCKETSIG_WRITE | SOCKETSIG_READ | SOCKETSIG_URG)); 401*8348SEric.Yu@Sun.COM 402*8348SEric.Yu@Sun.COM if (event & SOCKETSIG_WRITE) { 403*8348SEric.Yu@Sun.COM info.si_signo = SIGPOLL; 404*8348SEric.Yu@Sun.COM info.si_code = POLL_OUT; 405*8348SEric.Yu@Sun.COM info.si_errno = 0; 406*8348SEric.Yu@Sun.COM info.si_fd = 0; 407*8348SEric.Yu@Sun.COM info.si_band = 0; 408*8348SEric.Yu@Sun.COM sigaddq(proc, NULL, &info, KM_NOSLEEP); 409*8348SEric.Yu@Sun.COM } 410*8348SEric.Yu@Sun.COM if (event & SOCKETSIG_READ) { 411*8348SEric.Yu@Sun.COM sigtoproc(proc, NULL, SIGPOLL); 412*8348SEric.Yu@Sun.COM } 413*8348SEric.Yu@Sun.COM if (event & SOCKETSIG_URG) { 414*8348SEric.Yu@Sun.COM sigtoproc(proc, NULL, SIGURG); 415*8348SEric.Yu@Sun.COM } 416*8348SEric.Yu@Sun.COM } 417*8348SEric.Yu@Sun.COM 418*8348SEric.Yu@Sun.COM void 419*8348SEric.Yu@Sun.COM socket_sendsig(struct sonode *so, int event) 420*8348SEric.Yu@Sun.COM { 421*8348SEric.Yu@Sun.COM proc_t *proc; 422*8348SEric.Yu@Sun.COM 423*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 424*8348SEric.Yu@Sun.COM 425*8348SEric.Yu@Sun.COM if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) && 426*8348SEric.Yu@Sun.COM event != SOCKETSIG_URG)) { 427*8348SEric.Yu@Sun.COM return; 428*8348SEric.Yu@Sun.COM } 429*8348SEric.Yu@Sun.COM 430*8348SEric.Yu@Sun.COM dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp)); 431*8348SEric.Yu@Sun.COM 432*8348SEric.Yu@Sun.COM if (so->so_pgrp > 0) { 433*8348SEric.Yu@Sun.COM /* 434*8348SEric.Yu@Sun.COM * XXX This unfortunately still generates 435*8348SEric.Yu@Sun.COM * a signal when a fd is closed but 436*8348SEric.Yu@Sun.COM * the proc is active. 437*8348SEric.Yu@Sun.COM */ 438*8348SEric.Yu@Sun.COM mutex_enter(&pidlock); 439*8348SEric.Yu@Sun.COM proc = prfind(so->so_pgrp); 440*8348SEric.Yu@Sun.COM if (proc == NULL) { 441*8348SEric.Yu@Sun.COM mutex_exit(&pidlock); 442*8348SEric.Yu@Sun.COM return; 443*8348SEric.Yu@Sun.COM } 444*8348SEric.Yu@Sun.COM mutex_enter(&proc->p_lock); 445*8348SEric.Yu@Sun.COM mutex_exit(&pidlock); 446*8348SEric.Yu@Sun.COM socket_sigproc(proc, event); 447*8348SEric.Yu@Sun.COM mutex_exit(&proc->p_lock); 448*8348SEric.Yu@Sun.COM } else { 449*8348SEric.Yu@Sun.COM /* 450*8348SEric.Yu@Sun.COM * Send to process group. Hold pidlock across 451*8348SEric.Yu@Sun.COM * calls to socket_sigproc(). 452*8348SEric.Yu@Sun.COM */ 453*8348SEric.Yu@Sun.COM pid_t pgrp = -so->so_pgrp; 454*8348SEric.Yu@Sun.COM 455*8348SEric.Yu@Sun.COM mutex_enter(&pidlock); 456*8348SEric.Yu@Sun.COM proc = pgfind(pgrp); 457*8348SEric.Yu@Sun.COM while (proc != NULL) { 458*8348SEric.Yu@Sun.COM mutex_enter(&proc->p_lock); 459*8348SEric.Yu@Sun.COM socket_sigproc(proc, event); 460*8348SEric.Yu@Sun.COM mutex_exit(&proc->p_lock); 461*8348SEric.Yu@Sun.COM proc = proc->p_pglink; 462*8348SEric.Yu@Sun.COM } 463*8348SEric.Yu@Sun.COM mutex_exit(&pidlock); 464*8348SEric.Yu@Sun.COM } 465*8348SEric.Yu@Sun.COM } 466*8348SEric.Yu@Sun.COM 467*8348SEric.Yu@Sun.COM #define MIN(a, b) ((a) < (b) ? (a) : (b)) 468*8348SEric.Yu@Sun.COM /* Copy userdata into a new mblk_t */ 469*8348SEric.Yu@Sun.COM mblk_t * 470*8348SEric.Yu@Sun.COM socopyinuio(uio_t *uiop, ssize_t iosize, size_t wroff, ssize_t maxblk, 471*8348SEric.Yu@Sun.COM size_t tail_len, int *errorp) 472*8348SEric.Yu@Sun.COM { 473*8348SEric.Yu@Sun.COM mblk_t *head = NULL, **tail = &head; 474*8348SEric.Yu@Sun.COM 475*8348SEric.Yu@Sun.COM ASSERT(iosize == INFPSZ || iosize > 0); 476*8348SEric.Yu@Sun.COM 477*8348SEric.Yu@Sun.COM if (iosize == INFPSZ || iosize > uiop->uio_resid) 478*8348SEric.Yu@Sun.COM iosize = uiop->uio_resid; 479*8348SEric.Yu@Sun.COM 480*8348SEric.Yu@Sun.COM if (maxblk == INFPSZ) 481*8348SEric.Yu@Sun.COM maxblk = iosize; 482*8348SEric.Yu@Sun.COM 483*8348SEric.Yu@Sun.COM /* Nothing to do in these cases, so we're done */ 484*8348SEric.Yu@Sun.COM if (iosize < 0 || maxblk < 0 || (maxblk == 0 && iosize > 0)) 485*8348SEric.Yu@Sun.COM goto done; 486*8348SEric.Yu@Sun.COM 487*8348SEric.Yu@Sun.COM /* 488*8348SEric.Yu@Sun.COM * We will enter the loop below if iosize is 0; it will allocate an 489*8348SEric.Yu@Sun.COM * empty message block and call uiomove(9F) which will just return. 490*8348SEric.Yu@Sun.COM * We could avoid that with an extra check but would only slow 491*8348SEric.Yu@Sun.COM * down the much more likely case where iosize is larger than 0. 492*8348SEric.Yu@Sun.COM */ 493*8348SEric.Yu@Sun.COM do { 494*8348SEric.Yu@Sun.COM ssize_t blocksize; 495*8348SEric.Yu@Sun.COM mblk_t *mp; 496*8348SEric.Yu@Sun.COM 497*8348SEric.Yu@Sun.COM blocksize = MIN(iosize, maxblk); 498*8348SEric.Yu@Sun.COM ASSERT(blocksize >= 0); 499*8348SEric.Yu@Sun.COM if ((mp = allocb(wroff + blocksize + tail_len, 500*8348SEric.Yu@Sun.COM BPRI_MED)) == NULL) { 501*8348SEric.Yu@Sun.COM *errorp = ENOMEM; 502*8348SEric.Yu@Sun.COM return (head); 503*8348SEric.Yu@Sun.COM } 504*8348SEric.Yu@Sun.COM mp->b_rptr += wroff; 505*8348SEric.Yu@Sun.COM mp->b_wptr = mp->b_rptr + blocksize; 506*8348SEric.Yu@Sun.COM 507*8348SEric.Yu@Sun.COM *tail = mp; 508*8348SEric.Yu@Sun.COM tail = &mp->b_cont; 509*8348SEric.Yu@Sun.COM 510*8348SEric.Yu@Sun.COM /* uiomove(9F) either returns 0 or EFAULT */ 511*8348SEric.Yu@Sun.COM if ((*errorp = uiomove(mp->b_rptr, (size_t)blocksize, 512*8348SEric.Yu@Sun.COM UIO_WRITE, uiop)) != 0) { 513*8348SEric.Yu@Sun.COM ASSERT(*errorp != ENOMEM); 514*8348SEric.Yu@Sun.COM freemsg(head); 515*8348SEric.Yu@Sun.COM return (NULL); 516*8348SEric.Yu@Sun.COM } 517*8348SEric.Yu@Sun.COM 518*8348SEric.Yu@Sun.COM iosize -= blocksize; 519*8348SEric.Yu@Sun.COM } while (iosize > 0); 520*8348SEric.Yu@Sun.COM 521*8348SEric.Yu@Sun.COM done: 522*8348SEric.Yu@Sun.COM *errorp = 0; 523*8348SEric.Yu@Sun.COM return (head); 524*8348SEric.Yu@Sun.COM } 525*8348SEric.Yu@Sun.COM 526*8348SEric.Yu@Sun.COM mblk_t * 527*8348SEric.Yu@Sun.COM socopyoutuio(mblk_t *mp, struct uio *uiop, ssize_t max_read, int *errorp) 528*8348SEric.Yu@Sun.COM { 529*8348SEric.Yu@Sun.COM int error; 530*8348SEric.Yu@Sun.COM ptrdiff_t n; 531*8348SEric.Yu@Sun.COM mblk_t *nmp; 532*8348SEric.Yu@Sun.COM 533*8348SEric.Yu@Sun.COM ASSERT(mp->b_wptr >= mp->b_rptr); 534*8348SEric.Yu@Sun.COM 535*8348SEric.Yu@Sun.COM /* 536*8348SEric.Yu@Sun.COM * max_read is the offset of the oobmark and read can not go pass 537*8348SEric.Yu@Sun.COM * the oobmark. 538*8348SEric.Yu@Sun.COM */ 539*8348SEric.Yu@Sun.COM if (max_read == INFPSZ || max_read > uiop->uio_resid) 540*8348SEric.Yu@Sun.COM max_read = uiop->uio_resid; 541*8348SEric.Yu@Sun.COM 542*8348SEric.Yu@Sun.COM do { 543*8348SEric.Yu@Sun.COM if ((n = MIN(max_read, MBLKL(mp))) != 0) { 544*8348SEric.Yu@Sun.COM ASSERT(n > 0); 545*8348SEric.Yu@Sun.COM 546*8348SEric.Yu@Sun.COM error = uiomove(mp->b_rptr, n, UIO_READ, uiop); 547*8348SEric.Yu@Sun.COM if (error != 0) { 548*8348SEric.Yu@Sun.COM freemsg(mp); 549*8348SEric.Yu@Sun.COM *errorp = error; 550*8348SEric.Yu@Sun.COM return (NULL); 551*8348SEric.Yu@Sun.COM } 552*8348SEric.Yu@Sun.COM } 553*8348SEric.Yu@Sun.COM 554*8348SEric.Yu@Sun.COM mp->b_rptr += n; 555*8348SEric.Yu@Sun.COM max_read -= n; 556*8348SEric.Yu@Sun.COM while (mp != NULL && (mp->b_rptr >= mp->b_wptr)) { 557*8348SEric.Yu@Sun.COM /* 558*8348SEric.Yu@Sun.COM * get rid of zero length mblks 559*8348SEric.Yu@Sun.COM */ 560*8348SEric.Yu@Sun.COM nmp = mp; 561*8348SEric.Yu@Sun.COM mp = mp->b_cont; 562*8348SEric.Yu@Sun.COM freeb(nmp); 563*8348SEric.Yu@Sun.COM } 564*8348SEric.Yu@Sun.COM } while (mp != NULL && max_read > 0); 565*8348SEric.Yu@Sun.COM 566*8348SEric.Yu@Sun.COM *errorp = 0; 567*8348SEric.Yu@Sun.COM return (mp); 568*8348SEric.Yu@Sun.COM } 569*8348SEric.Yu@Sun.COM 570*8348SEric.Yu@Sun.COM static void 571*8348SEric.Yu@Sun.COM so_prepend_msg(struct sonode *so, mblk_t *mp, mblk_t *last_tail) 572*8348SEric.Yu@Sun.COM { 573*8348SEric.Yu@Sun.COM ASSERT(last_tail != NULL); 574*8348SEric.Yu@Sun.COM mp->b_next = so->so_rcv_q_head; 575*8348SEric.Yu@Sun.COM mp->b_prev = last_tail; 576*8348SEric.Yu@Sun.COM ASSERT(!(DB_FLAGS(mp) & DBLK_UIOA)); 577*8348SEric.Yu@Sun.COM 578*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head == NULL) { 579*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_last_head == NULL); 580*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = mp; 581*8348SEric.Yu@Sun.COM #ifdef DEBUG 582*8348SEric.Yu@Sun.COM } else { 583*8348SEric.Yu@Sun.COM ASSERT(!(DB_FLAGS(so->so_rcv_q_head) & DBLK_UIOA)); 584*8348SEric.Yu@Sun.COM #endif 585*8348SEric.Yu@Sun.COM } 586*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp; 587*8348SEric.Yu@Sun.COM 588*8348SEric.Yu@Sun.COM #ifdef DEBUG 589*8348SEric.Yu@Sun.COM if (so_debug_length) { 590*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 591*8348SEric.Yu@Sun.COM ASSERT(so_check_length(so)); 592*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 593*8348SEric.Yu@Sun.COM } 594*8348SEric.Yu@Sun.COM #endif 595*8348SEric.Yu@Sun.COM } 596*8348SEric.Yu@Sun.COM 597*8348SEric.Yu@Sun.COM static void 598*8348SEric.Yu@Sun.COM process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head) 599*8348SEric.Yu@Sun.COM { 600*8348SEric.Yu@Sun.COM ASSERT(mp_head->b_prev != NULL); 601*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head == NULL) { 602*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp_head; 603*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = mp_last_head; 604*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_last_head->b_prev != NULL); 605*8348SEric.Yu@Sun.COM } else { 606*8348SEric.Yu@Sun.COM boolean_t flag_equal = ((DB_FLAGS(mp_head) & DBLK_UIOA) == 607*8348SEric.Yu@Sun.COM (DB_FLAGS(so->so_rcv_q_last_head) & DBLK_UIOA)); 608*8348SEric.Yu@Sun.COM 609*8348SEric.Yu@Sun.COM if (mp_head->b_next == NULL && 610*8348SEric.Yu@Sun.COM DB_TYPE(mp_head) == M_DATA && 611*8348SEric.Yu@Sun.COM DB_TYPE(so->so_rcv_q_last_head) == M_DATA && flag_equal) { 612*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 613*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 614*8348SEric.Yu@Sun.COM mp_head->b_prev = NULL; 615*8348SEric.Yu@Sun.COM } else if (flag_equal && (DB_FLAGS(mp_head) & DBLK_UIOA)) { 616*8348SEric.Yu@Sun.COM /* 617*8348SEric.Yu@Sun.COM * Append to last_head if more than one mblks, and both 618*8348SEric.Yu@Sun.COM * mp_head and last_head are I/OAT mblks. 619*8348SEric.Yu@Sun.COM */ 620*8348SEric.Yu@Sun.COM ASSERT(mp_head->b_next != NULL); 621*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_prev->b_cont = mp_head; 622*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_prev = mp_head->b_prev; 623*8348SEric.Yu@Sun.COM mp_head->b_prev = NULL; 624*8348SEric.Yu@Sun.COM 625*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_next = mp_head->b_next; 626*8348SEric.Yu@Sun.COM mp_head->b_next = NULL; 627*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = mp_last_head; 628*8348SEric.Yu@Sun.COM } else { 629*8348SEric.Yu@Sun.COM #ifdef DEBUG 630*8348SEric.Yu@Sun.COM { 631*8348SEric.Yu@Sun.COM mblk_t *tmp_mblk; 632*8348SEric.Yu@Sun.COM tmp_mblk = mp_head; 633*8348SEric.Yu@Sun.COM while (tmp_mblk != NULL) { 634*8348SEric.Yu@Sun.COM ASSERT(tmp_mblk->b_prev != NULL); 635*8348SEric.Yu@Sun.COM tmp_mblk = tmp_mblk->b_next; 636*8348SEric.Yu@Sun.COM } 637*8348SEric.Yu@Sun.COM } 638*8348SEric.Yu@Sun.COM #endif 639*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_next = mp_head; 640*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = mp_last_head; 641*8348SEric.Yu@Sun.COM } 642*8348SEric.Yu@Sun.COM } 643*8348SEric.Yu@Sun.COM } 644*8348SEric.Yu@Sun.COM 645*8348SEric.Yu@Sun.COM int 646*8348SEric.Yu@Sun.COM so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop, 647*8348SEric.Yu@Sun.COM rval_t *rvalp, int flags) 648*8348SEric.Yu@Sun.COM { 649*8348SEric.Yu@Sun.COM mblk_t *mp, *nmp; 650*8348SEric.Yu@Sun.COM mblk_t *savemp, *savemptail; 651*8348SEric.Yu@Sun.COM mblk_t *new_msg_head; 652*8348SEric.Yu@Sun.COM mblk_t *new_msg_last_head; 653*8348SEric.Yu@Sun.COM mblk_t *last_tail; 654*8348SEric.Yu@Sun.COM boolean_t partial_read; 655*8348SEric.Yu@Sun.COM boolean_t reset_atmark = B_FALSE; 656*8348SEric.Yu@Sun.COM int more = 0; 657*8348SEric.Yu@Sun.COM int error; 658*8348SEric.Yu@Sun.COM ssize_t oobmark; 659*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 660*8348SEric.Yu@Sun.COM 661*8348SEric.Yu@Sun.COM partial_read = B_FALSE; 662*8348SEric.Yu@Sun.COM *mctlp = NULL; 663*8348SEric.Yu@Sun.COM again: 664*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 665*8348SEric.Yu@Sun.COM again1: 666*8348SEric.Yu@Sun.COM #ifdef DEBUG 667*8348SEric.Yu@Sun.COM if (so_debug_length) { 668*8348SEric.Yu@Sun.COM ASSERT(so_check_length(so)); 669*8348SEric.Yu@Sun.COM } 670*8348SEric.Yu@Sun.COM #endif 671*8348SEric.Yu@Sun.COM /* 672*8348SEric.Yu@Sun.COM * First move messages from the dump area to processing area 673*8348SEric.Yu@Sun.COM */ 674*8348SEric.Yu@Sun.COM if (sodp != NULL) { 675*8348SEric.Yu@Sun.COM /* No need to grab sod_lockp since it pointers to so_lock */ 676*8348SEric.Yu@Sun.COM if (sodp->sod_state & SOD_ENABLED) { 677*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_lockp == &so->so_lock); 678*8348SEric.Yu@Sun.COM 679*8348SEric.Yu@Sun.COM if (sodp->sod_uioa.uioa_state & UIOA_ALLOC) { 680*8348SEric.Yu@Sun.COM /* nothing to uioamove */ 681*8348SEric.Yu@Sun.COM sodp = NULL; 682*8348SEric.Yu@Sun.COM } else if (sodp->sod_uioa.uioa_state & UIOA_INIT) { 683*8348SEric.Yu@Sun.COM sodp->sod_uioa.uioa_state &= UIOA_CLR; 684*8348SEric.Yu@Sun.COM sodp->sod_uioa.uioa_state |= UIOA_ENABLED; 685*8348SEric.Yu@Sun.COM /* 686*8348SEric.Yu@Sun.COM * try to uioamove() the data that 687*8348SEric.Yu@Sun.COM * has already queued. 688*8348SEric.Yu@Sun.COM */ 689*8348SEric.Yu@Sun.COM sod_uioa_so_init(so, sodp, uiop); 690*8348SEric.Yu@Sun.COM } 691*8348SEric.Yu@Sun.COM } else { 692*8348SEric.Yu@Sun.COM sodp = NULL; 693*8348SEric.Yu@Sun.COM } 694*8348SEric.Yu@Sun.COM } 695*8348SEric.Yu@Sun.COM new_msg_head = so->so_rcv_head; 696*8348SEric.Yu@Sun.COM new_msg_last_head = so->so_rcv_last_head; 697*8348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 698*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 699*8348SEric.Yu@Sun.COM oobmark = so->so_oobmark; 700*8348SEric.Yu@Sun.COM /* 701*8348SEric.Yu@Sun.COM * We can release the lock as there can only be one reader 702*8348SEric.Yu@Sun.COM */ 703*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 704*8348SEric.Yu@Sun.COM 705*8348SEric.Yu@Sun.COM if (so->so_state & SS_RCVATMARK) { 706*8348SEric.Yu@Sun.COM reset_atmark = B_TRUE; 707*8348SEric.Yu@Sun.COM } 708*8348SEric.Yu@Sun.COM if (new_msg_head != NULL) { 709*8348SEric.Yu@Sun.COM process_new_message(so, new_msg_head, new_msg_last_head); 710*8348SEric.Yu@Sun.COM } 711*8348SEric.Yu@Sun.COM savemp = savemptail = NULL; 712*8348SEric.Yu@Sun.COM rvalp->r_val1 = 0; 713*8348SEric.Yu@Sun.COM error = 0; 714*8348SEric.Yu@Sun.COM mp = so->so_rcv_q_head; 715*8348SEric.Yu@Sun.COM 716*8348SEric.Yu@Sun.COM if (mp != NULL && 717*8348SEric.Yu@Sun.COM (so->so_rcv_timer_tid == 0 || 718*8348SEric.Yu@Sun.COM so->so_rcv_queued >= so->so_rcv_thresh)) { 719*8348SEric.Yu@Sun.COM partial_read = B_FALSE; 720*8348SEric.Yu@Sun.COM 721*8348SEric.Yu@Sun.COM if (flags & MSG_PEEK) { 722*8348SEric.Yu@Sun.COM if ((nmp = dupmsg(mp)) == NULL && 723*8348SEric.Yu@Sun.COM (nmp = copymsg(mp)) == NULL) { 724*8348SEric.Yu@Sun.COM size_t size = msgsize(mp); 725*8348SEric.Yu@Sun.COM 726*8348SEric.Yu@Sun.COM error = strwaitbuf(size, BPRI_HI); 727*8348SEric.Yu@Sun.COM if (error) { 728*8348SEric.Yu@Sun.COM return (error); 729*8348SEric.Yu@Sun.COM } 730*8348SEric.Yu@Sun.COM goto again; 731*8348SEric.Yu@Sun.COM } 732*8348SEric.Yu@Sun.COM mp = nmp; 733*8348SEric.Yu@Sun.COM } else { 734*8348SEric.Yu@Sun.COM ASSERT(mp->b_prev != NULL); 735*8348SEric.Yu@Sun.COM last_tail = mp->b_prev; 736*8348SEric.Yu@Sun.COM mp->b_prev = NULL; 737*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp->b_next; 738*8348SEric.Yu@Sun.COM if (so->so_rcv_q_head == NULL) { 739*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 740*8348SEric.Yu@Sun.COM } 741*8348SEric.Yu@Sun.COM mp->b_next = NULL; 742*8348SEric.Yu@Sun.COM } 743*8348SEric.Yu@Sun.COM 744*8348SEric.Yu@Sun.COM ASSERT(mctlp != NULL); 745*8348SEric.Yu@Sun.COM /* 746*8348SEric.Yu@Sun.COM * First process PROTO or PCPROTO blocks, if any. 747*8348SEric.Yu@Sun.COM */ 748*8348SEric.Yu@Sun.COM if (DB_TYPE(mp) != M_DATA) { 749*8348SEric.Yu@Sun.COM *mctlp = mp; 750*8348SEric.Yu@Sun.COM savemp = mp; 751*8348SEric.Yu@Sun.COM savemptail = mp; 752*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(mp) == M_PROTO || 753*8348SEric.Yu@Sun.COM DB_TYPE(mp) == M_PCPROTO); 754*8348SEric.Yu@Sun.COM while (mp->b_cont != NULL && 755*8348SEric.Yu@Sun.COM DB_TYPE(mp->b_cont) != M_DATA) { 756*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(mp->b_cont) == M_PROTO || 757*8348SEric.Yu@Sun.COM DB_TYPE(mp->b_cont) == M_PCPROTO); 758*8348SEric.Yu@Sun.COM mp = mp->b_cont; 759*8348SEric.Yu@Sun.COM savemptail = mp; 760*8348SEric.Yu@Sun.COM } 761*8348SEric.Yu@Sun.COM mp = savemptail->b_cont; 762*8348SEric.Yu@Sun.COM savemptail->b_cont = NULL; 763*8348SEric.Yu@Sun.COM } 764*8348SEric.Yu@Sun.COM 765*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(mp) == M_DATA); 766*8348SEric.Yu@Sun.COM /* 767*8348SEric.Yu@Sun.COM * Now process DATA blocks, if any. Note that for sodirect 768*8348SEric.Yu@Sun.COM * enabled socket, uio_resid can be 0. 769*8348SEric.Yu@Sun.COM */ 770*8348SEric.Yu@Sun.COM if (uiop->uio_resid >= 0) { 771*8348SEric.Yu@Sun.COM ssize_t copied = 0; 772*8348SEric.Yu@Sun.COM 773*8348SEric.Yu@Sun.COM if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) { 774*8348SEric.Yu@Sun.COM mutex_enter(sodp->sod_lockp); 775*8348SEric.Yu@Sun.COM ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 776*8348SEric.Yu@Sun.COM copied = sod_uioa_mblk(so, mp); 777*8348SEric.Yu@Sun.COM if (copied > 0) 778*8348SEric.Yu@Sun.COM partial_read = B_TRUE; 779*8348SEric.Yu@Sun.COM mutex_exit(sodp->sod_lockp); 780*8348SEric.Yu@Sun.COM /* mark this mblk as processed */ 781*8348SEric.Yu@Sun.COM mp = NULL; 782*8348SEric.Yu@Sun.COM } else { 783*8348SEric.Yu@Sun.COM ssize_t oldresid = uiop->uio_resid; 784*8348SEric.Yu@Sun.COM 785*8348SEric.Yu@Sun.COM if (MBLKL(mp) < so_mblk_pull_len) { 786*8348SEric.Yu@Sun.COM if (pullupmsg(mp, -1) == 1) { 787*8348SEric.Yu@Sun.COM last_tail = mp; 788*8348SEric.Yu@Sun.COM } 789*8348SEric.Yu@Sun.COM } 790*8348SEric.Yu@Sun.COM /* 791*8348SEric.Yu@Sun.COM * Can not read beyond the oobmark 792*8348SEric.Yu@Sun.COM */ 793*8348SEric.Yu@Sun.COM mp = socopyoutuio(mp, uiop, 794*8348SEric.Yu@Sun.COM oobmark == 0 ? INFPSZ : oobmark, &error); 795*8348SEric.Yu@Sun.COM if (error != 0) { 796*8348SEric.Yu@Sun.COM freemsg(*mctlp); 797*8348SEric.Yu@Sun.COM *mctlp = NULL; 798*8348SEric.Yu@Sun.COM more = 0; 799*8348SEric.Yu@Sun.COM goto done; 800*8348SEric.Yu@Sun.COM } 801*8348SEric.Yu@Sun.COM ASSERT(oldresid >= uiop->uio_resid); 802*8348SEric.Yu@Sun.COM copied = oldresid - uiop->uio_resid; 803*8348SEric.Yu@Sun.COM if (oldresid > uiop->uio_resid) 804*8348SEric.Yu@Sun.COM partial_read = B_TRUE; 805*8348SEric.Yu@Sun.COM } 806*8348SEric.Yu@Sun.COM ASSERT(copied >= 0); 807*8348SEric.Yu@Sun.COM if (copied > 0 && !(flags & MSG_PEEK)) { 808*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 809*8348SEric.Yu@Sun.COM so->so_rcv_queued -= copied; 810*8348SEric.Yu@Sun.COM ASSERT(so->so_oobmark >= 0); 811*8348SEric.Yu@Sun.COM if (so->so_oobmark > 0) { 812*8348SEric.Yu@Sun.COM so->so_oobmark -= copied; 813*8348SEric.Yu@Sun.COM ASSERT(so->so_oobmark >= 0); 814*8348SEric.Yu@Sun.COM if (so->so_oobmark == 0) { 815*8348SEric.Yu@Sun.COM ASSERT(so->so_state & 816*8348SEric.Yu@Sun.COM SS_OOBPEND); 817*8348SEric.Yu@Sun.COM so->so_oobmark = 0; 818*8348SEric.Yu@Sun.COM so->so_state |= SS_RCVATMARK; 819*8348SEric.Yu@Sun.COM } 820*8348SEric.Yu@Sun.COM } 821*8348SEric.Yu@Sun.COM if (so->so_flowctrld && so->so_rcv_queued < 822*8348SEric.Yu@Sun.COM so->so_rcvlowat) { 823*8348SEric.Yu@Sun.COM so->so_flowctrld = B_FALSE; 824*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 825*8348SEric.Yu@Sun.COM /* 826*8348SEric.Yu@Sun.COM * open up flow control 827*8348SEric.Yu@Sun.COM */ 828*8348SEric.Yu@Sun.COM (*so->so_downcalls->sd_clr_flowctrl) 829*8348SEric.Yu@Sun.COM (so->so_proto_handle); 830*8348SEric.Yu@Sun.COM } else { 831*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 832*8348SEric.Yu@Sun.COM } 833*8348SEric.Yu@Sun.COM } 834*8348SEric.Yu@Sun.COM } 835*8348SEric.Yu@Sun.COM if (mp != NULL) { /* more data blocks in msg */ 836*8348SEric.Yu@Sun.COM more |= MOREDATA; 837*8348SEric.Yu@Sun.COM if ((flags & (MSG_PEEK|MSG_TRUNC))) { 838*8348SEric.Yu@Sun.COM if (flags & MSG_TRUNC) { 839*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 840*8348SEric.Yu@Sun.COM so->so_rcv_queued -= msgdsize(mp); 841*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 842*8348SEric.Yu@Sun.COM } 843*8348SEric.Yu@Sun.COM freemsg(mp); 844*8348SEric.Yu@Sun.COM } else if (partial_read && !somsghasdata(mp)) { 845*8348SEric.Yu@Sun.COM /* 846*8348SEric.Yu@Sun.COM * Avoid queuing a zero-length tail part of 847*8348SEric.Yu@Sun.COM * a message. partial_read == 1 indicates that 848*8348SEric.Yu@Sun.COM * we read some of the message. 849*8348SEric.Yu@Sun.COM */ 850*8348SEric.Yu@Sun.COM freemsg(mp); 851*8348SEric.Yu@Sun.COM more &= ~MOREDATA; 852*8348SEric.Yu@Sun.COM } else { 853*8348SEric.Yu@Sun.COM if (savemp != NULL && 854*8348SEric.Yu@Sun.COM (flags & MSG_DUPCTRL)) { 855*8348SEric.Yu@Sun.COM mblk_t *nmp; 856*8348SEric.Yu@Sun.COM /* 857*8348SEric.Yu@Sun.COM * There should only be non data mblks 858*8348SEric.Yu@Sun.COM */ 859*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(savemp) != M_DATA && 860*8348SEric.Yu@Sun.COM DB_TYPE(savemptail) != M_DATA); 861*8348SEric.Yu@Sun.COM try_again: 862*8348SEric.Yu@Sun.COM if ((nmp = dupmsg(savemp)) == NULL && 863*8348SEric.Yu@Sun.COM (nmp = copymsg(savemp)) == NULL) { 864*8348SEric.Yu@Sun.COM 865*8348SEric.Yu@Sun.COM size_t size = msgsize(savemp); 866*8348SEric.Yu@Sun.COM 867*8348SEric.Yu@Sun.COM error = strwaitbuf(size, 868*8348SEric.Yu@Sun.COM BPRI_HI); 869*8348SEric.Yu@Sun.COM if (error != 0) { 870*8348SEric.Yu@Sun.COM /* 871*8348SEric.Yu@Sun.COM * In case we 872*8348SEric.Yu@Sun.COM * cannot copy 873*8348SEric.Yu@Sun.COM * control data 874*8348SEric.Yu@Sun.COM * free the remaining 875*8348SEric.Yu@Sun.COM * data. 876*8348SEric.Yu@Sun.COM */ 877*8348SEric.Yu@Sun.COM freemsg(mp); 878*8348SEric.Yu@Sun.COM goto done; 879*8348SEric.Yu@Sun.COM } 880*8348SEric.Yu@Sun.COM goto try_again; 881*8348SEric.Yu@Sun.COM } 882*8348SEric.Yu@Sun.COM 883*8348SEric.Yu@Sun.COM ASSERT(nmp != NULL); 884*8348SEric.Yu@Sun.COM ASSERT(DB_TYPE(nmp) != M_DATA); 885*8348SEric.Yu@Sun.COM savemptail->b_cont = mp; 886*8348SEric.Yu@Sun.COM *mctlp = nmp; 887*8348SEric.Yu@Sun.COM mp = savemp; 888*8348SEric.Yu@Sun.COM } 889*8348SEric.Yu@Sun.COM /* 890*8348SEric.Yu@Sun.COM * putback mp 891*8348SEric.Yu@Sun.COM */ 892*8348SEric.Yu@Sun.COM so_prepend_msg(so, mp, last_tail); 893*8348SEric.Yu@Sun.COM } 894*8348SEric.Yu@Sun.COM } 895*8348SEric.Yu@Sun.COM 896*8348SEric.Yu@Sun.COM /* fast check so_rcv_head if there is more data */ 897*8348SEric.Yu@Sun.COM if (partial_read && !(so->so_state & SS_RCVATMARK) && 898*8348SEric.Yu@Sun.COM *mctlp == NULL && uiop->uio_resid > 0 && 899*8348SEric.Yu@Sun.COM !(flags & MSG_PEEK) && so->so_rcv_head != NULL) { 900*8348SEric.Yu@Sun.COM goto again; 901*8348SEric.Yu@Sun.COM } 902*8348SEric.Yu@Sun.COM } else if (!partial_read) { 903*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 904*8348SEric.Yu@Sun.COM if (so->so_error != 0) { 905*8348SEric.Yu@Sun.COM error = sogeterr(so, !(flags & MSG_PEEK)); 906*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 907*8348SEric.Yu@Sun.COM return (error); 908*8348SEric.Yu@Sun.COM } 909*8348SEric.Yu@Sun.COM /* 910*8348SEric.Yu@Sun.COM * No pending data. Return right away for nonblocking 911*8348SEric.Yu@Sun.COM * socket, otherwise sleep waiting for data. 912*8348SEric.Yu@Sun.COM */ 913*8348SEric.Yu@Sun.COM if (!(so->so_state & SS_CANTRCVMORE)) { 914*8348SEric.Yu@Sun.COM if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) || 915*8348SEric.Yu@Sun.COM (flags & MSG_DONTWAIT)) { 916*8348SEric.Yu@Sun.COM error = EWOULDBLOCK; 917*8348SEric.Yu@Sun.COM } else { 918*8348SEric.Yu@Sun.COM if (so->so_state & (SS_CLOSING | 919*8348SEric.Yu@Sun.COM SS_FALLBACK_PENDING)) { 920*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 921*8348SEric.Yu@Sun.COM error = EINTR; 922*8348SEric.Yu@Sun.COM goto done; 923*8348SEric.Yu@Sun.COM } 924*8348SEric.Yu@Sun.COM 925*8348SEric.Yu@Sun.COM if (so->so_rcv_head != NULL) { 926*8348SEric.Yu@Sun.COM goto again1; 927*8348SEric.Yu@Sun.COM } 928*8348SEric.Yu@Sun.COM so->so_rcv_wakeup = B_TRUE; 929*8348SEric.Yu@Sun.COM so->so_rcv_wanted = uiop->uio_resid; 930*8348SEric.Yu@Sun.COM if (so->so_rcvtimeo == 0) { 931*8348SEric.Yu@Sun.COM /* 932*8348SEric.Yu@Sun.COM * Zero means disable timeout. 933*8348SEric.Yu@Sun.COM */ 934*8348SEric.Yu@Sun.COM error = cv_wait_sig(&so->so_rcv_cv, 935*8348SEric.Yu@Sun.COM &so->so_lock); 936*8348SEric.Yu@Sun.COM } else { 937*8348SEric.Yu@Sun.COM clock_t now; 938*8348SEric.Yu@Sun.COM time_to_wait(&now, so->so_rcvtimeo); 939*8348SEric.Yu@Sun.COM error = cv_timedwait_sig(&so->so_rcv_cv, 940*8348SEric.Yu@Sun.COM &so->so_lock, now); 941*8348SEric.Yu@Sun.COM } 942*8348SEric.Yu@Sun.COM so->so_rcv_wakeup = B_FALSE; 943*8348SEric.Yu@Sun.COM so->so_rcv_wanted = 0; 944*8348SEric.Yu@Sun.COM 945*8348SEric.Yu@Sun.COM if (error == 0) { 946*8348SEric.Yu@Sun.COM error = EINTR; 947*8348SEric.Yu@Sun.COM } else if (error == -1) { 948*8348SEric.Yu@Sun.COM error = ETIME; 949*8348SEric.Yu@Sun.COM } else { 950*8348SEric.Yu@Sun.COM goto again1; 951*8348SEric.Yu@Sun.COM } 952*8348SEric.Yu@Sun.COM } 953*8348SEric.Yu@Sun.COM } 954*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 955*8348SEric.Yu@Sun.COM } 956*8348SEric.Yu@Sun.COM if (reset_atmark && partial_read && !(flags & MSG_PEEK)) { 957*8348SEric.Yu@Sun.COM /* 958*8348SEric.Yu@Sun.COM * We are passed the mark, update state 959*8348SEric.Yu@Sun.COM * 4.3BSD and 4.4BSD clears the mark when peeking across it. 960*8348SEric.Yu@Sun.COM * The draft Posix socket spec states that the mark should 961*8348SEric.Yu@Sun.COM * not be cleared when peeking. We follow the latter. 962*8348SEric.Yu@Sun.COM */ 963*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 964*8348SEric.Yu@Sun.COM ASSERT(so_verify_oobstate(so)); 965*8348SEric.Yu@Sun.COM so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 966*8348SEric.Yu@Sun.COM freemsg(so->so_oobmsg); 967*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 968*8348SEric.Yu@Sun.COM ASSERT(so_verify_oobstate(so)); 969*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 970*8348SEric.Yu@Sun.COM } 971*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_wakeup == B_FALSE); 972*8348SEric.Yu@Sun.COM done: 973*8348SEric.Yu@Sun.COM if (sodp != NULL) { 974*8348SEric.Yu@Sun.COM mutex_enter(sodp->sod_lockp); 975*8348SEric.Yu@Sun.COM if ((sodp->sod_state & SOD_ENABLED) && 976*8348SEric.Yu@Sun.COM (sodp->sod_uioa.uioa_state & UIOA_ENABLED)) { 977*8348SEric.Yu@Sun.COM SOD_UIOAFINI(sodp); 978*8348SEric.Yu@Sun.COM if (sodp->sod_uioa.uioa_mbytes > 0) { 979*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_head != NULL || 980*8348SEric.Yu@Sun.COM so->so_rcv_head != NULL); 981*8348SEric.Yu@Sun.COM so->so_rcv_queued -= sod_uioa_mblk(so, NULL); 982*8348SEric.Yu@Sun.COM if (error == EWOULDBLOCK) 983*8348SEric.Yu@Sun.COM error = 0; 984*8348SEric.Yu@Sun.COM } 985*8348SEric.Yu@Sun.COM } 986*8348SEric.Yu@Sun.COM mutex_exit(sodp->sod_lockp); 987*8348SEric.Yu@Sun.COM } 988*8348SEric.Yu@Sun.COM #ifdef DEBUG 989*8348SEric.Yu@Sun.COM if (so_debug_length) { 990*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 991*8348SEric.Yu@Sun.COM ASSERT(so_check_length(so)); 992*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 993*8348SEric.Yu@Sun.COM } 994*8348SEric.Yu@Sun.COM #endif 995*8348SEric.Yu@Sun.COM rvalp->r_val1 = more; 996*8348SEric.Yu@Sun.COM return (error); 997*8348SEric.Yu@Sun.COM } 998*8348SEric.Yu@Sun.COM 999*8348SEric.Yu@Sun.COM void 1000*8348SEric.Yu@Sun.COM so_enqueue_msg(struct sonode *so, mblk_t *mp, size_t msg_size) 1001*8348SEric.Yu@Sun.COM { 1002*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 1003*8348SEric.Yu@Sun.COM 1004*8348SEric.Yu@Sun.COM #ifdef DEBUG 1005*8348SEric.Yu@Sun.COM if (so_debug_length) { 1006*8348SEric.Yu@Sun.COM ASSERT(so_check_length(so)); 1007*8348SEric.Yu@Sun.COM } 1008*8348SEric.Yu@Sun.COM #endif 1009*8348SEric.Yu@Sun.COM so->so_rcv_queued += msg_size; 1010*8348SEric.Yu@Sun.COM 1011*8348SEric.Yu@Sun.COM if (so->so_rcv_head == NULL) { 1012*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_last_head == NULL); 1013*8348SEric.Yu@Sun.COM so->so_rcv_head = mp; 1014*8348SEric.Yu@Sun.COM so->so_rcv_last_head = mp; 1015*8348SEric.Yu@Sun.COM } else if ((DB_TYPE(mp) == M_DATA && 1016*8348SEric.Yu@Sun.COM DB_TYPE(so->so_rcv_last_head) == M_DATA) && 1017*8348SEric.Yu@Sun.COM ((DB_FLAGS(mp) & DBLK_UIOA) == 1018*8348SEric.Yu@Sun.COM (DB_FLAGS(so->so_rcv_last_head) & DBLK_UIOA))) { 1019*8348SEric.Yu@Sun.COM /* Added to the end */ 1020*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_last_head != NULL); 1021*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_last_head->b_prev != NULL); 1022*8348SEric.Yu@Sun.COM so->so_rcv_last_head->b_prev->b_cont = mp; 1023*8348SEric.Yu@Sun.COM } else { 1024*8348SEric.Yu@Sun.COM /* Start a new end */ 1025*8348SEric.Yu@Sun.COM so->so_rcv_last_head->b_next = mp; 1026*8348SEric.Yu@Sun.COM so->so_rcv_last_head = mp; 1027*8348SEric.Yu@Sun.COM } 1028*8348SEric.Yu@Sun.COM while (mp->b_cont != NULL) 1029*8348SEric.Yu@Sun.COM mp = mp->b_cont; 1030*8348SEric.Yu@Sun.COM 1031*8348SEric.Yu@Sun.COM so->so_rcv_last_head->b_prev = mp; 1032*8348SEric.Yu@Sun.COM #ifdef DEBUG 1033*8348SEric.Yu@Sun.COM if (so_debug_length) { 1034*8348SEric.Yu@Sun.COM ASSERT(so_check_length(so)); 1035*8348SEric.Yu@Sun.COM } 1036*8348SEric.Yu@Sun.COM #endif 1037*8348SEric.Yu@Sun.COM } 1038*8348SEric.Yu@Sun.COM 1039*8348SEric.Yu@Sun.COM /* 1040*8348SEric.Yu@Sun.COM * Return B_TRUE if there is data in the message, B_FALSE otherwise. 1041*8348SEric.Yu@Sun.COM */ 1042*8348SEric.Yu@Sun.COM boolean_t 1043*8348SEric.Yu@Sun.COM somsghasdata(mblk_t *mp) 1044*8348SEric.Yu@Sun.COM { 1045*8348SEric.Yu@Sun.COM for (; mp; mp = mp->b_cont) 1046*8348SEric.Yu@Sun.COM if (mp->b_datap->db_type == M_DATA) { 1047*8348SEric.Yu@Sun.COM ASSERT(mp->b_wptr >= mp->b_rptr); 1048*8348SEric.Yu@Sun.COM if (mp->b_wptr > mp->b_rptr) 1049*8348SEric.Yu@Sun.COM return (B_TRUE); 1050*8348SEric.Yu@Sun.COM } 1051*8348SEric.Yu@Sun.COM return (B_FALSE); 1052*8348SEric.Yu@Sun.COM } 1053*8348SEric.Yu@Sun.COM 1054*8348SEric.Yu@Sun.COM /* 1055*8348SEric.Yu@Sun.COM * Flush the read side of sockfs. 1056*8348SEric.Yu@Sun.COM * 1057*8348SEric.Yu@Sun.COM * The caller must be sure that a reader is not already active when the 1058*8348SEric.Yu@Sun.COM * buffer is being flushed. 1059*8348SEric.Yu@Sun.COM */ 1060*8348SEric.Yu@Sun.COM void 1061*8348SEric.Yu@Sun.COM so_rcv_flush(struct sonode *so) 1062*8348SEric.Yu@Sun.COM { 1063*8348SEric.Yu@Sun.COM mblk_t *mp; 1064*8348SEric.Yu@Sun.COM 1065*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 1066*8348SEric.Yu@Sun.COM 1067*8348SEric.Yu@Sun.COM if (so->so_oobmsg != NULL) { 1068*8348SEric.Yu@Sun.COM freemsg(so->so_oobmsg); 1069*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 1070*8348SEric.Yu@Sun.COM so->so_oobmark = 0; 1071*8348SEric.Yu@Sun.COM so->so_state &= 1072*8348SEric.Yu@Sun.COM ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|SS_RCVATMARK); 1073*8348SEric.Yu@Sun.COM } 1074*8348SEric.Yu@Sun.COM 1075*8348SEric.Yu@Sun.COM /* 1076*8348SEric.Yu@Sun.COM * Free messages sitting in the send and recv queue 1077*8348SEric.Yu@Sun.COM */ 1078*8348SEric.Yu@Sun.COM while (so->so_rcv_q_head != NULL) { 1079*8348SEric.Yu@Sun.COM mp = so->so_rcv_q_head; 1080*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp->b_next; 1081*8348SEric.Yu@Sun.COM mp->b_next = mp->b_prev = NULL; 1082*8348SEric.Yu@Sun.COM freemsg(mp); 1083*8348SEric.Yu@Sun.COM } 1084*8348SEric.Yu@Sun.COM while (so->so_rcv_head != NULL) { 1085*8348SEric.Yu@Sun.COM mp = so->so_rcv_head; 1086*8348SEric.Yu@Sun.COM so->so_rcv_head = mp->b_next; 1087*8348SEric.Yu@Sun.COM mp->b_next = mp->b_prev = NULL; 1088*8348SEric.Yu@Sun.COM freemsg(mp); 1089*8348SEric.Yu@Sun.COM } 1090*8348SEric.Yu@Sun.COM so->so_rcv_queued = 0; 1091*8348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 1092*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 1093*8348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 1094*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 1095*8348SEric.Yu@Sun.COM } 1096*8348SEric.Yu@Sun.COM 1097*8348SEric.Yu@Sun.COM /* 1098*8348SEric.Yu@Sun.COM * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 1099*8348SEric.Yu@Sun.COM */ 1100*8348SEric.Yu@Sun.COM int 1101*8348SEric.Yu@Sun.COM sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags, 1102*8348SEric.Yu@Sun.COM boolean_t oob_inline) 1103*8348SEric.Yu@Sun.COM { 1104*8348SEric.Yu@Sun.COM mblk_t *mp, *nmp; 1105*8348SEric.Yu@Sun.COM int error; 1106*8348SEric.Yu@Sun.COM 1107*8348SEric.Yu@Sun.COM dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", (void *)so, (void *)msg, 1108*8348SEric.Yu@Sun.COM flags)); 1109*8348SEric.Yu@Sun.COM 1110*8348SEric.Yu@Sun.COM if (msg != NULL) { 1111*8348SEric.Yu@Sun.COM /* 1112*8348SEric.Yu@Sun.COM * There is never any oob data with addresses or control since 1113*8348SEric.Yu@Sun.COM * the T_EXDATA_IND does not carry any options. 1114*8348SEric.Yu@Sun.COM */ 1115*8348SEric.Yu@Sun.COM msg->msg_controllen = 0; 1116*8348SEric.Yu@Sun.COM msg->msg_namelen = 0; 1117*8348SEric.Yu@Sun.COM msg->msg_flags = 0; 1118*8348SEric.Yu@Sun.COM } 1119*8348SEric.Yu@Sun.COM 1120*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1121*8348SEric.Yu@Sun.COM ASSERT(so_verify_oobstate(so)); 1122*8348SEric.Yu@Sun.COM if (oob_inline || 1123*8348SEric.Yu@Sun.COM (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 1124*8348SEric.Yu@Sun.COM dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 1125*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1126*8348SEric.Yu@Sun.COM return (EINVAL); 1127*8348SEric.Yu@Sun.COM } 1128*8348SEric.Yu@Sun.COM if (!(so->so_state & SS_HAVEOOBDATA)) { 1129*8348SEric.Yu@Sun.COM dprintso(so, 1, ("sorecvoob: no data yet\n")); 1130*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1131*8348SEric.Yu@Sun.COM return (EWOULDBLOCK); 1132*8348SEric.Yu@Sun.COM } 1133*8348SEric.Yu@Sun.COM ASSERT(so->so_oobmsg != NULL); 1134*8348SEric.Yu@Sun.COM mp = so->so_oobmsg; 1135*8348SEric.Yu@Sun.COM if (flags & MSG_PEEK) { 1136*8348SEric.Yu@Sun.COM /* 1137*8348SEric.Yu@Sun.COM * Since recv* can not return ENOBUFS we can not use dupmsg. 1138*8348SEric.Yu@Sun.COM * Instead we revert to the consolidation private 1139*8348SEric.Yu@Sun.COM * allocb_wait plus bcopy. 1140*8348SEric.Yu@Sun.COM */ 1141*8348SEric.Yu@Sun.COM mblk_t *mp1; 1142*8348SEric.Yu@Sun.COM 1143*8348SEric.Yu@Sun.COM mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 1144*8348SEric.Yu@Sun.COM ASSERT(mp1); 1145*8348SEric.Yu@Sun.COM 1146*8348SEric.Yu@Sun.COM while (mp != NULL) { 1147*8348SEric.Yu@Sun.COM ssize_t size; 1148*8348SEric.Yu@Sun.COM 1149*8348SEric.Yu@Sun.COM size = MBLKL(mp); 1150*8348SEric.Yu@Sun.COM bcopy(mp->b_rptr, mp1->b_wptr, size); 1151*8348SEric.Yu@Sun.COM mp1->b_wptr += size; 1152*8348SEric.Yu@Sun.COM ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 1153*8348SEric.Yu@Sun.COM mp = mp->b_cont; 1154*8348SEric.Yu@Sun.COM } 1155*8348SEric.Yu@Sun.COM mp = mp1; 1156*8348SEric.Yu@Sun.COM } else { 1157*8348SEric.Yu@Sun.COM /* 1158*8348SEric.Yu@Sun.COM * Update the state indicating that the data has been consumed. 1159*8348SEric.Yu@Sun.COM * Keep SS_OOBPEND set until data is consumed past the mark. 1160*8348SEric.Yu@Sun.COM */ 1161*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 1162*8348SEric.Yu@Sun.COM so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 1163*8348SEric.Yu@Sun.COM } 1164*8348SEric.Yu@Sun.COM ASSERT(so_verify_oobstate(so)); 1165*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1166*8348SEric.Yu@Sun.COM 1167*8348SEric.Yu@Sun.COM error = 0; 1168*8348SEric.Yu@Sun.COM nmp = mp; 1169*8348SEric.Yu@Sun.COM while (nmp != NULL && uiop->uio_resid > 0) { 1170*8348SEric.Yu@Sun.COM ssize_t n = MBLKL(nmp); 1171*8348SEric.Yu@Sun.COM 1172*8348SEric.Yu@Sun.COM n = MIN(n, uiop->uio_resid); 1173*8348SEric.Yu@Sun.COM if (n > 0) 1174*8348SEric.Yu@Sun.COM error = uiomove(nmp->b_rptr, n, 1175*8348SEric.Yu@Sun.COM UIO_READ, uiop); 1176*8348SEric.Yu@Sun.COM if (error) 1177*8348SEric.Yu@Sun.COM break; 1178*8348SEric.Yu@Sun.COM nmp = nmp->b_cont; 1179*8348SEric.Yu@Sun.COM } 1180*8348SEric.Yu@Sun.COM ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 1181*8348SEric.Yu@Sun.COM freemsg(mp); 1182*8348SEric.Yu@Sun.COM return (error); 1183*8348SEric.Yu@Sun.COM } 1184*8348SEric.Yu@Sun.COM 1185*8348SEric.Yu@Sun.COM /* 1186*8348SEric.Yu@Sun.COM * Allocate and initializ sonode 1187*8348SEric.Yu@Sun.COM */ 1188*8348SEric.Yu@Sun.COM /* ARGSUSED */ 1189*8348SEric.Yu@Sun.COM struct sonode * 1190*8348SEric.Yu@Sun.COM socket_sonode_create(struct sockparams *sp, int family, int type, 1191*8348SEric.Yu@Sun.COM int protocol, int version, int sflags, int *errorp, struct cred *cr) 1192*8348SEric.Yu@Sun.COM { 1193*8348SEric.Yu@Sun.COM sonode_t *so; 1194*8348SEric.Yu@Sun.COM int kmflags; 1195*8348SEric.Yu@Sun.COM 1196*8348SEric.Yu@Sun.COM /* 1197*8348SEric.Yu@Sun.COM * Choose the right set of sonodeops based on the upcall and 1198*8348SEric.Yu@Sun.COM * down call version that the protocol has provided 1199*8348SEric.Yu@Sun.COM */ 1200*8348SEric.Yu@Sun.COM if (SOCK_UC_VERSION != sp->sp_smod_info->smod_uc_version || 1201*8348SEric.Yu@Sun.COM SOCK_DC_VERSION != sp->sp_smod_info->smod_dc_version) { 1202*8348SEric.Yu@Sun.COM /* 1203*8348SEric.Yu@Sun.COM * mismatch 1204*8348SEric.Yu@Sun.COM */ 1205*8348SEric.Yu@Sun.COM #ifdef DEBUG 1206*8348SEric.Yu@Sun.COM cmn_err(CE_CONT, "protocol and socket module version mismatch"); 1207*8348SEric.Yu@Sun.COM #endif 1208*8348SEric.Yu@Sun.COM *errorp = EINVAL; 1209*8348SEric.Yu@Sun.COM return (NULL); 1210*8348SEric.Yu@Sun.COM } 1211*8348SEric.Yu@Sun.COM 1212*8348SEric.Yu@Sun.COM kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 1213*8348SEric.Yu@Sun.COM 1214*8348SEric.Yu@Sun.COM so = kmem_cache_alloc(socket_cache, kmflags); 1215*8348SEric.Yu@Sun.COM if (so == NULL) { 1216*8348SEric.Yu@Sun.COM *errorp = ENOMEM; 1217*8348SEric.Yu@Sun.COM return (NULL); 1218*8348SEric.Yu@Sun.COM } 1219*8348SEric.Yu@Sun.COM 1220*8348SEric.Yu@Sun.COM sonode_init(so, sp, family, type, protocol, &so_sonodeops); 1221*8348SEric.Yu@Sun.COM 1222*8348SEric.Yu@Sun.COM if (version == SOV_DEFAULT) 1223*8348SEric.Yu@Sun.COM version = so_default_version; 1224*8348SEric.Yu@Sun.COM 1225*8348SEric.Yu@Sun.COM so->so_version = (short)version; 1226*8348SEric.Yu@Sun.COM 1227*8348SEric.Yu@Sun.COM /* 1228*8348SEric.Yu@Sun.COM * set the default values to be INFPSZ 1229*8348SEric.Yu@Sun.COM * if a protocol desires it can change the value later 1230*8348SEric.Yu@Sun.COM */ 1231*8348SEric.Yu@Sun.COM so->so_proto_props.sopp_rxhiwat = SOCKET_RECVHIWATER; 1232*8348SEric.Yu@Sun.COM so->so_proto_props.sopp_rxlowat = SOCKET_RECVLOWATER; 1233*8348SEric.Yu@Sun.COM so->so_proto_props.sopp_maxpsz = INFPSZ; 1234*8348SEric.Yu@Sun.COM so->so_proto_props.sopp_maxblk = INFPSZ; 1235*8348SEric.Yu@Sun.COM 1236*8348SEric.Yu@Sun.COM return (so); 1237*8348SEric.Yu@Sun.COM } 1238*8348SEric.Yu@Sun.COM 1239*8348SEric.Yu@Sun.COM int 1240*8348SEric.Yu@Sun.COM socket_init_common(struct sonode *so, struct sonode *pso, int flags, cred_t *cr) 1241*8348SEric.Yu@Sun.COM { 1242*8348SEric.Yu@Sun.COM int error = 0; 1243*8348SEric.Yu@Sun.COM 1244*8348SEric.Yu@Sun.COM if (pso != NULL) { 1245*8348SEric.Yu@Sun.COM /* 1246*8348SEric.Yu@Sun.COM * We have a passive open, so inherit basic state from 1247*8348SEric.Yu@Sun.COM * the parent (listener). 1248*8348SEric.Yu@Sun.COM * 1249*8348SEric.Yu@Sun.COM * No need to grab the new sonode's lock, since there is no 1250*8348SEric.Yu@Sun.COM * one that can have a reference to it. 1251*8348SEric.Yu@Sun.COM */ 1252*8348SEric.Yu@Sun.COM mutex_enter(&pso->so_lock); 1253*8348SEric.Yu@Sun.COM 1254*8348SEric.Yu@Sun.COM so->so_state |= SS_ISCONNECTED | (pso->so_state & SS_ASYNC); 1255*8348SEric.Yu@Sun.COM so->so_pgrp = pso->so_pgrp; 1256*8348SEric.Yu@Sun.COM so->so_rcvtimeo = pso->so_rcvtimeo; 1257*8348SEric.Yu@Sun.COM so->so_sndtimeo = pso->so_sndtimeo; 1258*8348SEric.Yu@Sun.COM /* 1259*8348SEric.Yu@Sun.COM * Make note of the socket level options. TCP and IP level 1260*8348SEric.Yu@Sun.COM * options are already inherited. We could do all this after 1261*8348SEric.Yu@Sun.COM * accept is successful but doing it here simplifies code and 1262*8348SEric.Yu@Sun.COM * no harm done for error case. 1263*8348SEric.Yu@Sun.COM */ 1264*8348SEric.Yu@Sun.COM so->so_options = pso->so_options & (SO_DEBUG|SO_REUSEADDR| 1265*8348SEric.Yu@Sun.COM SO_KEEPALIVE| SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1266*8348SEric.Yu@Sun.COM SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1267*8348SEric.Yu@Sun.COM so->so_proto_props = pso->so_proto_props; 1268*8348SEric.Yu@Sun.COM so->so_mode = pso->so_mode; 1269*8348SEric.Yu@Sun.COM 1270*8348SEric.Yu@Sun.COM mutex_exit(&pso->so_lock); 1271*8348SEric.Yu@Sun.COM 1272*8348SEric.Yu@Sun.COM if (uioasync.enabled) { 1273*8348SEric.Yu@Sun.COM sod_sock_init(so, NULL, NULL, NULL, &so->so_lock); 1274*8348SEric.Yu@Sun.COM } 1275*8348SEric.Yu@Sun.COM return (0); 1276*8348SEric.Yu@Sun.COM } else { 1277*8348SEric.Yu@Sun.COM struct sockparams *sp = so->so_sockparams; 1278*8348SEric.Yu@Sun.COM sock_upcalls_t *upcalls_to_use; 1279*8348SEric.Yu@Sun.COM 1280*8348SEric.Yu@Sun.COM /* 1281*8348SEric.Yu@Sun.COM * Based on the version number select the right upcalls to 1282*8348SEric.Yu@Sun.COM * pass down. Currently we only have one version so choose 1283*8348SEric.Yu@Sun.COM * default 1284*8348SEric.Yu@Sun.COM */ 1285*8348SEric.Yu@Sun.COM upcalls_to_use = &so_upcalls; 1286*8348SEric.Yu@Sun.COM 1287*8348SEric.Yu@Sun.COM /* active open, so create a lower handle */ 1288*8348SEric.Yu@Sun.COM so->so_proto_handle = 1289*8348SEric.Yu@Sun.COM sp->sp_smod_info->smod_proto_create_func(so->so_family, 1290*8348SEric.Yu@Sun.COM so->so_type, so->so_protocol, &so->so_downcalls, 1291*8348SEric.Yu@Sun.COM &so->so_mode, &error, flags, cr); 1292*8348SEric.Yu@Sun.COM 1293*8348SEric.Yu@Sun.COM if (so->so_proto_handle == NULL) { 1294*8348SEric.Yu@Sun.COM ASSERT(error != 0); 1295*8348SEric.Yu@Sun.COM /* 1296*8348SEric.Yu@Sun.COM * To be safe; if a lower handle cannot be created, and 1297*8348SEric.Yu@Sun.COM * the proto does not give a reason why, assume there 1298*8348SEric.Yu@Sun.COM * was a lack of memory. 1299*8348SEric.Yu@Sun.COM */ 1300*8348SEric.Yu@Sun.COM return ((error == 0) ? ENOMEM : error); 1301*8348SEric.Yu@Sun.COM } 1302*8348SEric.Yu@Sun.COM ASSERT(so->so_downcalls != NULL); 1303*8348SEric.Yu@Sun.COM ASSERT(so->so_downcalls->sd_send != NULL || 1304*8348SEric.Yu@Sun.COM so->so_downcalls->sd_send_uio != NULL); 1305*8348SEric.Yu@Sun.COM if (so->so_downcalls->sd_recv_uio != NULL) { 1306*8348SEric.Yu@Sun.COM ASSERT(so->so_downcalls->sd_poll != NULL); 1307*8348SEric.Yu@Sun.COM so->so_pollev |= SO_POLLEV_ALWAYS; 1308*8348SEric.Yu@Sun.COM } 1309*8348SEric.Yu@Sun.COM 1310*8348SEric.Yu@Sun.COM (*so->so_downcalls->sd_activate)(so->so_proto_handle, 1311*8348SEric.Yu@Sun.COM (sock_upper_handle_t)so, upcalls_to_use, 0, cr); 1312*8348SEric.Yu@Sun.COM 1313*8348SEric.Yu@Sun.COM /* Wildcard */ 1314*8348SEric.Yu@Sun.COM 1315*8348SEric.Yu@Sun.COM /* 1316*8348SEric.Yu@Sun.COM * FIXME No need for this, the protocol can deal with it in 1317*8348SEric.Yu@Sun.COM * sd_create(). Should update ICMP. 1318*8348SEric.Yu@Sun.COM */ 1319*8348SEric.Yu@Sun.COM if (so->so_protocol != so->so_sockparams->sp_protocol) { 1320*8348SEric.Yu@Sun.COM int protocol = so->so_protocol; 1321*8348SEric.Yu@Sun.COM int error; 1322*8348SEric.Yu@Sun.COM /* 1323*8348SEric.Yu@Sun.COM * Issue SO_PROTOTYPE setsockopt. 1324*8348SEric.Yu@Sun.COM */ 1325*8348SEric.Yu@Sun.COM error = socket_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 1326*8348SEric.Yu@Sun.COM &protocol, (t_uscalar_t)sizeof (protocol), cr); 1327*8348SEric.Yu@Sun.COM if (error) { 1328*8348SEric.Yu@Sun.COM (void) (*so->so_downcalls->sd_close) 1329*8348SEric.Yu@Sun.COM (so->so_proto_handle, 0, cr); 1330*8348SEric.Yu@Sun.COM 1331*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1332*8348SEric.Yu@Sun.COM so_rcv_flush(so); 1333*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1334*8348SEric.Yu@Sun.COM /* 1335*8348SEric.Yu@Sun.COM * Setsockopt often fails with ENOPROTOOPT but 1336*8348SEric.Yu@Sun.COM * socket() should fail with 1337*8348SEric.Yu@Sun.COM * EPROTONOSUPPORT/EPROTOTYPE. 1338*8348SEric.Yu@Sun.COM */ 1339*8348SEric.Yu@Sun.COM return (EPROTONOSUPPORT); 1340*8348SEric.Yu@Sun.COM } 1341*8348SEric.Yu@Sun.COM } 1342*8348SEric.Yu@Sun.COM return (0); 1343*8348SEric.Yu@Sun.COM } 1344*8348SEric.Yu@Sun.COM } 1345*8348SEric.Yu@Sun.COM 1346*8348SEric.Yu@Sun.COM /* 1347*8348SEric.Yu@Sun.COM * int socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 1348*8348SEric.Yu@Sun.COM * struct cred *cr, int32_t *rvalp) 1349*8348SEric.Yu@Sun.COM * 1350*8348SEric.Yu@Sun.COM * Handle ioctls that manipulate basic socket state; non-blocking, 1351*8348SEric.Yu@Sun.COM * async, etc. 1352*8348SEric.Yu@Sun.COM * 1353*8348SEric.Yu@Sun.COM * Returns: 1354*8348SEric.Yu@Sun.COM * < 0 - ioctl was not handle 1355*8348SEric.Yu@Sun.COM * >= 0 - ioctl was handled, if > 0, then it is an errno 1356*8348SEric.Yu@Sun.COM * 1357*8348SEric.Yu@Sun.COM * Notes: 1358*8348SEric.Yu@Sun.COM * Assumes the standard receive buffer is used to obtain info for 1359*8348SEric.Yu@Sun.COM * NREAD. 1360*8348SEric.Yu@Sun.COM */ 1361*8348SEric.Yu@Sun.COM /* ARGSUSED */ 1362*8348SEric.Yu@Sun.COM int 1363*8348SEric.Yu@Sun.COM socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode, 1364*8348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 1365*8348SEric.Yu@Sun.COM { 1366*8348SEric.Yu@Sun.COM switch (cmd) { 1367*8348SEric.Yu@Sun.COM case FIONBIO: { 1368*8348SEric.Yu@Sun.COM int32_t value; 1369*8348SEric.Yu@Sun.COM 1370*8348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &value, sizeof (int32_t), 1371*8348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 1372*8348SEric.Yu@Sun.COM return (EFAULT); 1373*8348SEric.Yu@Sun.COM 1374*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1375*8348SEric.Yu@Sun.COM if (value) { 1376*8348SEric.Yu@Sun.COM so->so_state |= SS_NDELAY; 1377*8348SEric.Yu@Sun.COM } else { 1378*8348SEric.Yu@Sun.COM so->so_state &= ~SS_NDELAY; 1379*8348SEric.Yu@Sun.COM } 1380*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1381*8348SEric.Yu@Sun.COM return (0); 1382*8348SEric.Yu@Sun.COM } 1383*8348SEric.Yu@Sun.COM case FIOASYNC: { 1384*8348SEric.Yu@Sun.COM int32_t value; 1385*8348SEric.Yu@Sun.COM 1386*8348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &value, sizeof (int32_t), 1387*8348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 1388*8348SEric.Yu@Sun.COM return (EFAULT); 1389*8348SEric.Yu@Sun.COM 1390*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1391*8348SEric.Yu@Sun.COM 1392*8348SEric.Yu@Sun.COM if (value) { 1393*8348SEric.Yu@Sun.COM /* Turn on SIGIO */ 1394*8348SEric.Yu@Sun.COM so->so_state |= SS_ASYNC; 1395*8348SEric.Yu@Sun.COM } else { 1396*8348SEric.Yu@Sun.COM /* Turn off SIGIO */ 1397*8348SEric.Yu@Sun.COM so->so_state &= ~SS_ASYNC; 1398*8348SEric.Yu@Sun.COM } 1399*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1400*8348SEric.Yu@Sun.COM 1401*8348SEric.Yu@Sun.COM return (0); 1402*8348SEric.Yu@Sun.COM } 1403*8348SEric.Yu@Sun.COM 1404*8348SEric.Yu@Sun.COM case SIOCSPGRP: 1405*8348SEric.Yu@Sun.COM case FIOSETOWN: { 1406*8348SEric.Yu@Sun.COM int error; 1407*8348SEric.Yu@Sun.COM pid_t pid; 1408*8348SEric.Yu@Sun.COM 1409*8348SEric.Yu@Sun.COM if (so_copyin((void *)arg, &pid, sizeof (pid_t), 1410*8348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) 1411*8348SEric.Yu@Sun.COM return (EFAULT); 1412*8348SEric.Yu@Sun.COM 1413*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1414*8348SEric.Yu@Sun.COM error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0; 1415*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1416*8348SEric.Yu@Sun.COM return (error); 1417*8348SEric.Yu@Sun.COM } 1418*8348SEric.Yu@Sun.COM case SIOCGPGRP: 1419*8348SEric.Yu@Sun.COM case FIOGETOWN: 1420*8348SEric.Yu@Sun.COM if (so_copyout(&so->so_pgrp, (void *)arg, 1421*8348SEric.Yu@Sun.COM sizeof (pid_t), (mode & (int)FKIOCTL))) 1422*8348SEric.Yu@Sun.COM return (EFAULT); 1423*8348SEric.Yu@Sun.COM 1424*8348SEric.Yu@Sun.COM return (0); 1425*8348SEric.Yu@Sun.COM case SIOCATMARK: { 1426*8348SEric.Yu@Sun.COM int retval; 1427*8348SEric.Yu@Sun.COM 1428*8348SEric.Yu@Sun.COM /* 1429*8348SEric.Yu@Sun.COM * Only protocols that support urgent data can handle ATMARK. 1430*8348SEric.Yu@Sun.COM */ 1431*8348SEric.Yu@Sun.COM if ((so->so_mode & SM_EXDATA) == 0) 1432*8348SEric.Yu@Sun.COM return (EINVAL); 1433*8348SEric.Yu@Sun.COM 1434*8348SEric.Yu@Sun.COM /* 1435*8348SEric.Yu@Sun.COM * If the protocol is maintaining its own buffer, then the 1436*8348SEric.Yu@Sun.COM * request must be passed down. 1437*8348SEric.Yu@Sun.COM */ 1438*8348SEric.Yu@Sun.COM if (so->so_downcalls->sd_recv_uio != NULL) 1439*8348SEric.Yu@Sun.COM return (-1); 1440*8348SEric.Yu@Sun.COM 1441*8348SEric.Yu@Sun.COM retval = (so->so_state & SS_RCVATMARK) != 0; 1442*8348SEric.Yu@Sun.COM 1443*8348SEric.Yu@Sun.COM if (so_copyout(&retval, (void *)arg, sizeof (int), 1444*8348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) { 1445*8348SEric.Yu@Sun.COM return (EFAULT); 1446*8348SEric.Yu@Sun.COM } 1447*8348SEric.Yu@Sun.COM return (0); 1448*8348SEric.Yu@Sun.COM } 1449*8348SEric.Yu@Sun.COM 1450*8348SEric.Yu@Sun.COM case FIONREAD: { 1451*8348SEric.Yu@Sun.COM int retval; 1452*8348SEric.Yu@Sun.COM 1453*8348SEric.Yu@Sun.COM /* 1454*8348SEric.Yu@Sun.COM * If the protocol is maintaining its own buffer, then the 1455*8348SEric.Yu@Sun.COM * request must be passed down. 1456*8348SEric.Yu@Sun.COM */ 1457*8348SEric.Yu@Sun.COM if (so->so_downcalls->sd_recv_uio != NULL) 1458*8348SEric.Yu@Sun.COM return (-1); 1459*8348SEric.Yu@Sun.COM 1460*8348SEric.Yu@Sun.COM retval = MIN(so->so_rcv_queued, INT_MAX); 1461*8348SEric.Yu@Sun.COM 1462*8348SEric.Yu@Sun.COM if (so_copyout(&retval, (void *)arg, 1463*8348SEric.Yu@Sun.COM sizeof (retval), (mode & (int)FKIOCTL))) { 1464*8348SEric.Yu@Sun.COM return (EFAULT); 1465*8348SEric.Yu@Sun.COM } 1466*8348SEric.Yu@Sun.COM return (0); 1467*8348SEric.Yu@Sun.COM } 1468*8348SEric.Yu@Sun.COM 1469*8348SEric.Yu@Sun.COM case _I_GETPEERCRED: { 1470*8348SEric.Yu@Sun.COM int error = 0; 1471*8348SEric.Yu@Sun.COM 1472*8348SEric.Yu@Sun.COM if ((mode & FKIOCTL) == 0) 1473*8348SEric.Yu@Sun.COM return (EINVAL); 1474*8348SEric.Yu@Sun.COM 1475*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1476*8348SEric.Yu@Sun.COM if ((so->so_mode & SM_CONNREQUIRED) == 0) { 1477*8348SEric.Yu@Sun.COM error = ENOTSUP; 1478*8348SEric.Yu@Sun.COM } else if ((so->so_state & SS_ISCONNECTED) == 0) { 1479*8348SEric.Yu@Sun.COM error = ENOTCONN; 1480*8348SEric.Yu@Sun.COM } else if (so->so_peercred != NULL) { 1481*8348SEric.Yu@Sun.COM k_peercred_t *kp = (k_peercred_t *)arg; 1482*8348SEric.Yu@Sun.COM kp->pc_cr = so->so_peercred; 1483*8348SEric.Yu@Sun.COM kp->pc_cpid = so->so_cpid; 1484*8348SEric.Yu@Sun.COM crhold(so->so_peercred); 1485*8348SEric.Yu@Sun.COM } else { 1486*8348SEric.Yu@Sun.COM error = EINVAL; 1487*8348SEric.Yu@Sun.COM } 1488*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1489*8348SEric.Yu@Sun.COM return (error); 1490*8348SEric.Yu@Sun.COM } 1491*8348SEric.Yu@Sun.COM default: 1492*8348SEric.Yu@Sun.COM return (-1); 1493*8348SEric.Yu@Sun.COM } 1494*8348SEric.Yu@Sun.COM } 1495*8348SEric.Yu@Sun.COM 1496*8348SEric.Yu@Sun.COM /* 1497*8348SEric.Yu@Sun.COM * Process STREAMS related ioctls. If a I_PUSH/POP operation is specified 1498*8348SEric.Yu@Sun.COM * then the socket will fall back to TPI. 1499*8348SEric.Yu@Sun.COM * 1500*8348SEric.Yu@Sun.COM * Returns: 1501*8348SEric.Yu@Sun.COM * < 0 - ioctl was not handle 1502*8348SEric.Yu@Sun.COM * >= 0 - ioctl was handled, if > 0, then it is an errno 1503*8348SEric.Yu@Sun.COM */ 1504*8348SEric.Yu@Sun.COM int 1505*8348SEric.Yu@Sun.COM socket_strioc_common(struct sonode *so, int cmd, intptr_t arg, int mode, 1506*8348SEric.Yu@Sun.COM struct cred *cr, int32_t *rvalp) 1507*8348SEric.Yu@Sun.COM { 1508*8348SEric.Yu@Sun.COM switch (cmd) { 1509*8348SEric.Yu@Sun.COM case _I_INSERT: 1510*8348SEric.Yu@Sun.COM case _I_REMOVE: 1511*8348SEric.Yu@Sun.COM case I_FIND: 1512*8348SEric.Yu@Sun.COM case I_LIST: 1513*8348SEric.Yu@Sun.COM return (EOPNOTSUPP); 1514*8348SEric.Yu@Sun.COM 1515*8348SEric.Yu@Sun.COM case I_PUSH: 1516*8348SEric.Yu@Sun.COM case I_POP: { 1517*8348SEric.Yu@Sun.COM int retval; 1518*8348SEric.Yu@Sun.COM 1519*8348SEric.Yu@Sun.COM if ((retval = so_tpi_fallback(so, cr)) == 0) { 1520*8348SEric.Yu@Sun.COM /* Reissue the ioctl */ 1521*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_q_head == NULL); 1522*8348SEric.Yu@Sun.COM return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 1523*8348SEric.Yu@Sun.COM } 1524*8348SEric.Yu@Sun.COM return (retval); 1525*8348SEric.Yu@Sun.COM } 1526*8348SEric.Yu@Sun.COM case I_LOOK: 1527*8348SEric.Yu@Sun.COM if (so_copyout("sockmod", (void *)arg, strlen("sockmod") + 1, 1528*8348SEric.Yu@Sun.COM (mode & (int)FKIOCTL))) { 1529*8348SEric.Yu@Sun.COM return (EFAULT); 1530*8348SEric.Yu@Sun.COM } 1531*8348SEric.Yu@Sun.COM return (0); 1532*8348SEric.Yu@Sun.COM default: 1533*8348SEric.Yu@Sun.COM return (-1); 1534*8348SEric.Yu@Sun.COM } 1535*8348SEric.Yu@Sun.COM } 1536*8348SEric.Yu@Sun.COM 1537*8348SEric.Yu@Sun.COM int 1538*8348SEric.Yu@Sun.COM socket_getopt_common(struct sonode *so, int level, int option_name, 1539*8348SEric.Yu@Sun.COM void *optval, socklen_t *optlenp) 1540*8348SEric.Yu@Sun.COM { 1541*8348SEric.Yu@Sun.COM if (level != SOL_SOCKET) 1542*8348SEric.Yu@Sun.COM return (-1); 1543*8348SEric.Yu@Sun.COM 1544*8348SEric.Yu@Sun.COM switch (option_name) { 1545*8348SEric.Yu@Sun.COM case SO_ERROR: 1546*8348SEric.Yu@Sun.COM case SO_DOMAIN: 1547*8348SEric.Yu@Sun.COM case SO_TYPE: 1548*8348SEric.Yu@Sun.COM case SO_ACCEPTCONN: { 1549*8348SEric.Yu@Sun.COM int32_t value; 1550*8348SEric.Yu@Sun.COM socklen_t optlen = *optlenp; 1551*8348SEric.Yu@Sun.COM 1552*8348SEric.Yu@Sun.COM if (optlen < (t_uscalar_t)sizeof (int32_t)) { 1553*8348SEric.Yu@Sun.COM return (EINVAL); 1554*8348SEric.Yu@Sun.COM } 1555*8348SEric.Yu@Sun.COM 1556*8348SEric.Yu@Sun.COM switch (option_name) { 1557*8348SEric.Yu@Sun.COM case SO_ERROR: 1558*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1559*8348SEric.Yu@Sun.COM value = sogeterr(so, B_TRUE); 1560*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1561*8348SEric.Yu@Sun.COM break; 1562*8348SEric.Yu@Sun.COM case SO_DOMAIN: 1563*8348SEric.Yu@Sun.COM value = so->so_family; 1564*8348SEric.Yu@Sun.COM break; 1565*8348SEric.Yu@Sun.COM case SO_TYPE: 1566*8348SEric.Yu@Sun.COM value = so->so_type; 1567*8348SEric.Yu@Sun.COM break; 1568*8348SEric.Yu@Sun.COM case SO_ACCEPTCONN: 1569*8348SEric.Yu@Sun.COM if (so->so_state & SS_ACCEPTCONN) 1570*8348SEric.Yu@Sun.COM value = SO_ACCEPTCONN; 1571*8348SEric.Yu@Sun.COM else 1572*8348SEric.Yu@Sun.COM value = 0; 1573*8348SEric.Yu@Sun.COM break; 1574*8348SEric.Yu@Sun.COM } 1575*8348SEric.Yu@Sun.COM 1576*8348SEric.Yu@Sun.COM bcopy(&value, optval, sizeof (value)); 1577*8348SEric.Yu@Sun.COM *optlenp = sizeof (value); 1578*8348SEric.Yu@Sun.COM 1579*8348SEric.Yu@Sun.COM return (0); 1580*8348SEric.Yu@Sun.COM } 1581*8348SEric.Yu@Sun.COM case SO_SNDTIMEO: 1582*8348SEric.Yu@Sun.COM case SO_RCVTIMEO: { 1583*8348SEric.Yu@Sun.COM clock_t value; 1584*8348SEric.Yu@Sun.COM socklen_t optlen = *optlenp; 1585*8348SEric.Yu@Sun.COM 1586*8348SEric.Yu@Sun.COM if (optlen < (t_uscalar_t)sizeof (struct timeval)) { 1587*8348SEric.Yu@Sun.COM return (EINVAL); 1588*8348SEric.Yu@Sun.COM } 1589*8348SEric.Yu@Sun.COM if (option_name == SO_RCVTIMEO) 1590*8348SEric.Yu@Sun.COM value = drv_hztousec(so->so_rcvtimeo); 1591*8348SEric.Yu@Sun.COM else 1592*8348SEric.Yu@Sun.COM value = drv_hztousec(so->so_sndtimeo); 1593*8348SEric.Yu@Sun.COM ((struct timeval *)(optval))->tv_sec = value / (1000 * 1000); 1594*8348SEric.Yu@Sun.COM ((struct timeval *)(optval))->tv_usec = value % (1000 * 1000); 1595*8348SEric.Yu@Sun.COM *optlenp = sizeof (struct timeval); 1596*8348SEric.Yu@Sun.COM return (0); 1597*8348SEric.Yu@Sun.COM } 1598*8348SEric.Yu@Sun.COM case SO_DEBUG: 1599*8348SEric.Yu@Sun.COM case SO_REUSEADDR: 1600*8348SEric.Yu@Sun.COM case SO_KEEPALIVE: 1601*8348SEric.Yu@Sun.COM case SO_DONTROUTE: 1602*8348SEric.Yu@Sun.COM case SO_BROADCAST: 1603*8348SEric.Yu@Sun.COM case SO_USELOOPBACK: 1604*8348SEric.Yu@Sun.COM case SO_OOBINLINE: 1605*8348SEric.Yu@Sun.COM case SO_SNDBUF: 1606*8348SEric.Yu@Sun.COM case SO_RCVBUF: 1607*8348SEric.Yu@Sun.COM #ifdef notyet 1608*8348SEric.Yu@Sun.COM case SO_SNDLOWAT: 1609*8348SEric.Yu@Sun.COM case SO_RCVLOWAT: 1610*8348SEric.Yu@Sun.COM #endif /* notyet */ 1611*8348SEric.Yu@Sun.COM case SO_DGRAM_ERRIND: { 1612*8348SEric.Yu@Sun.COM socklen_t optlen = *optlenp; 1613*8348SEric.Yu@Sun.COM 1614*8348SEric.Yu@Sun.COM if (optlen < (t_uscalar_t)sizeof (int32_t)) 1615*8348SEric.Yu@Sun.COM return (EINVAL); 1616*8348SEric.Yu@Sun.COM break; 1617*8348SEric.Yu@Sun.COM } 1618*8348SEric.Yu@Sun.COM case SO_LINGER: { 1619*8348SEric.Yu@Sun.COM socklen_t optlen = *optlenp; 1620*8348SEric.Yu@Sun.COM 1621*8348SEric.Yu@Sun.COM if (optlen < (t_uscalar_t)sizeof (struct linger)) 1622*8348SEric.Yu@Sun.COM return (EINVAL); 1623*8348SEric.Yu@Sun.COM break; 1624*8348SEric.Yu@Sun.COM } 1625*8348SEric.Yu@Sun.COM case SO_SND_BUFINFO: { 1626*8348SEric.Yu@Sun.COM socklen_t optlen = *optlenp; 1627*8348SEric.Yu@Sun.COM 1628*8348SEric.Yu@Sun.COM if (optlen < (t_uscalar_t)sizeof (struct so_snd_bufinfo)) 1629*8348SEric.Yu@Sun.COM return (EINVAL); 1630*8348SEric.Yu@Sun.COM ((struct so_snd_bufinfo *)(optval))->sbi_wroff = 1631*8348SEric.Yu@Sun.COM (so->so_proto_props).sopp_wroff; 1632*8348SEric.Yu@Sun.COM ((struct so_snd_bufinfo *)(optval))->sbi_maxblk = 1633*8348SEric.Yu@Sun.COM (so->so_proto_props).sopp_maxblk; 1634*8348SEric.Yu@Sun.COM ((struct so_snd_bufinfo *)(optval))->sbi_maxpsz = 1635*8348SEric.Yu@Sun.COM (so->so_proto_props).sopp_maxpsz; 1636*8348SEric.Yu@Sun.COM ((struct so_snd_bufinfo *)(optval))->sbi_tail = 1637*8348SEric.Yu@Sun.COM (so->so_proto_props).sopp_tail; 1638*8348SEric.Yu@Sun.COM *optlenp = sizeof (struct so_snd_bufinfo); 1639*8348SEric.Yu@Sun.COM return (0); 1640*8348SEric.Yu@Sun.COM } 1641*8348SEric.Yu@Sun.COM default: 1642*8348SEric.Yu@Sun.COM break; 1643*8348SEric.Yu@Sun.COM } 1644*8348SEric.Yu@Sun.COM 1645*8348SEric.Yu@Sun.COM /* Unknown Option */ 1646*8348SEric.Yu@Sun.COM return (-1); 1647*8348SEric.Yu@Sun.COM } 1648*8348SEric.Yu@Sun.COM 1649*8348SEric.Yu@Sun.COM void 1650*8348SEric.Yu@Sun.COM socket_sonode_destroy(struct sonode *so) 1651*8348SEric.Yu@Sun.COM { 1652*8348SEric.Yu@Sun.COM sonode_fini(so); 1653*8348SEric.Yu@Sun.COM kmem_cache_free(socket_cache, so); 1654*8348SEric.Yu@Sun.COM } 1655*8348SEric.Yu@Sun.COM 1656*8348SEric.Yu@Sun.COM int 1657*8348SEric.Yu@Sun.COM so_zcopy_wait(struct sonode *so) 1658*8348SEric.Yu@Sun.COM { 1659*8348SEric.Yu@Sun.COM int error = 0; 1660*8348SEric.Yu@Sun.COM 1661*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1662*8348SEric.Yu@Sun.COM while (!(so->so_copyflag & STZCNOTIFY)) { 1663*8348SEric.Yu@Sun.COM if (so->so_state & SS_CLOSING) { 1664*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1665*8348SEric.Yu@Sun.COM return (EINTR); 1666*8348SEric.Yu@Sun.COM } 1667*8348SEric.Yu@Sun.COM if (cv_wait_sig(&so->so_copy_cv, &so->so_lock) == 0) { 1668*8348SEric.Yu@Sun.COM error = EINTR; 1669*8348SEric.Yu@Sun.COM break; 1670*8348SEric.Yu@Sun.COM } 1671*8348SEric.Yu@Sun.COM } 1672*8348SEric.Yu@Sun.COM so->so_copyflag &= ~STZCNOTIFY; 1673*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1674*8348SEric.Yu@Sun.COM return (error); 1675*8348SEric.Yu@Sun.COM } 1676*8348SEric.Yu@Sun.COM 1677*8348SEric.Yu@Sun.COM void 1678*8348SEric.Yu@Sun.COM so_timer_callback(void *arg) 1679*8348SEric.Yu@Sun.COM { 1680*8348SEric.Yu@Sun.COM struct sonode *so = (struct sonode *)arg; 1681*8348SEric.Yu@Sun.COM 1682*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1683*8348SEric.Yu@Sun.COM 1684*8348SEric.Yu@Sun.COM so->so_rcv_timer_tid = 0; 1685*8348SEric.Yu@Sun.COM if (so->so_rcv_queued > 0) { 1686*8348SEric.Yu@Sun.COM so_notify_data(so, so->so_rcv_queued); 1687*8348SEric.Yu@Sun.COM } else { 1688*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1689*8348SEric.Yu@Sun.COM } 1690*8348SEric.Yu@Sun.COM } 1691*8348SEric.Yu@Sun.COM 1692*8348SEric.Yu@Sun.COM #ifdef DEBUG 1693*8348SEric.Yu@Sun.COM /* 1694*8348SEric.Yu@Sun.COM * Verify that the length stored in so_rcv_queued and the length of data blocks 1695*8348SEric.Yu@Sun.COM * queued is same. 1696*8348SEric.Yu@Sun.COM */ 1697*8348SEric.Yu@Sun.COM static boolean_t 1698*8348SEric.Yu@Sun.COM so_check_length(sonode_t *so) 1699*8348SEric.Yu@Sun.COM { 1700*8348SEric.Yu@Sun.COM mblk_t *mp = so->so_rcv_q_head; 1701*8348SEric.Yu@Sun.COM int len = 0; 1702*8348SEric.Yu@Sun.COM 1703*8348SEric.Yu@Sun.COM ASSERT(MUTEX_HELD(&so->so_lock)); 1704*8348SEric.Yu@Sun.COM 1705*8348SEric.Yu@Sun.COM if (mp != NULL) { 1706*8348SEric.Yu@Sun.COM len = msgdsize(mp); 1707*8348SEric.Yu@Sun.COM while ((mp = mp->b_next) != NULL) 1708*8348SEric.Yu@Sun.COM len += msgdsize(mp); 1709*8348SEric.Yu@Sun.COM } 1710*8348SEric.Yu@Sun.COM mp = so->so_rcv_head; 1711*8348SEric.Yu@Sun.COM if (mp != NULL) { 1712*8348SEric.Yu@Sun.COM len += msgdsize(mp); 1713*8348SEric.Yu@Sun.COM while ((mp = mp->b_next) != NULL) 1714*8348SEric.Yu@Sun.COM len += msgdsize(mp); 1715*8348SEric.Yu@Sun.COM } 1716*8348SEric.Yu@Sun.COM return ((len == so->so_rcv_queued) ? B_TRUE : B_FALSE); 1717*8348SEric.Yu@Sun.COM } 1718*8348SEric.Yu@Sun.COM #endif 1719*8348SEric.Yu@Sun.COM 1720*8348SEric.Yu@Sun.COM int 1721*8348SEric.Yu@Sun.COM so_get_mod_version(struct sockparams *sp) 1722*8348SEric.Yu@Sun.COM { 1723*8348SEric.Yu@Sun.COM ASSERT(sp != NULL && sp->sp_smod_info != NULL); 1724*8348SEric.Yu@Sun.COM return (sp->sp_smod_info->smod_version); 1725*8348SEric.Yu@Sun.COM } 1726*8348SEric.Yu@Sun.COM 1727*8348SEric.Yu@Sun.COM /* 1728*8348SEric.Yu@Sun.COM * so_start_fallback() 1729*8348SEric.Yu@Sun.COM * 1730*8348SEric.Yu@Sun.COM * Block new socket operations from coming in, and wait for active operations 1731*8348SEric.Yu@Sun.COM * to complete. Threads that are sleeping will be woken up so they can get 1732*8348SEric.Yu@Sun.COM * out of the way. 1733*8348SEric.Yu@Sun.COM * 1734*8348SEric.Yu@Sun.COM * The caller must be a reader on so_fallback_rwlock. 1735*8348SEric.Yu@Sun.COM */ 1736*8348SEric.Yu@Sun.COM static boolean_t 1737*8348SEric.Yu@Sun.COM so_start_fallback(struct sonode *so) 1738*8348SEric.Yu@Sun.COM { 1739*8348SEric.Yu@Sun.COM ASSERT(RW_READ_HELD(&so->so_fallback_rwlock)); 1740*8348SEric.Yu@Sun.COM 1741*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1742*8348SEric.Yu@Sun.COM if (so->so_state & SS_FALLBACK_PENDING) { 1743*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1744*8348SEric.Yu@Sun.COM return (B_FALSE); 1745*8348SEric.Yu@Sun.COM } 1746*8348SEric.Yu@Sun.COM so->so_state |= SS_FALLBACK_PENDING; 1747*8348SEric.Yu@Sun.COM /* 1748*8348SEric.Yu@Sun.COM * Poke all threads that might be sleeping. Any operation that comes 1749*8348SEric.Yu@Sun.COM * in after the cv_broadcast will observe the fallback pending flag 1750*8348SEric.Yu@Sun.COM * which cause the call to return where it would normally sleep. 1751*8348SEric.Yu@Sun.COM */ 1752*8348SEric.Yu@Sun.COM cv_broadcast(&so->so_state_cv); /* threads in connect() */ 1753*8348SEric.Yu@Sun.COM cv_broadcast(&so->so_rcv_cv); /* threads in recvmsg() */ 1754*8348SEric.Yu@Sun.COM cv_broadcast(&so->so_snd_cv); /* threads in sendmsg() */ 1755*8348SEric.Yu@Sun.COM mutex_enter(&so->so_acceptq_lock); 1756*8348SEric.Yu@Sun.COM cv_broadcast(&so->so_acceptq_cv); /* threads in accept() */ 1757*8348SEric.Yu@Sun.COM mutex_exit(&so->so_acceptq_lock); 1758*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1759*8348SEric.Yu@Sun.COM 1760*8348SEric.Yu@Sun.COM /* 1761*8348SEric.Yu@Sun.COM * The main reason for the rw_tryupgrade call is to provide 1762*8348SEric.Yu@Sun.COM * observability during the fallback process. We want to 1763*8348SEric.Yu@Sun.COM * be able to see if there are pending operations. 1764*8348SEric.Yu@Sun.COM */ 1765*8348SEric.Yu@Sun.COM if (rw_tryupgrade(&so->so_fallback_rwlock) == 0) { 1766*8348SEric.Yu@Sun.COM /* 1767*8348SEric.Yu@Sun.COM * It is safe to drop and reaquire the fallback lock, because 1768*8348SEric.Yu@Sun.COM * we are guaranteed that another fallback cannot take place. 1769*8348SEric.Yu@Sun.COM */ 1770*8348SEric.Yu@Sun.COM rw_exit(&so->so_fallback_rwlock); 1771*8348SEric.Yu@Sun.COM DTRACE_PROBE1(pending__ops__wait, (struct sonode *), so); 1772*8348SEric.Yu@Sun.COM rw_enter(&so->so_fallback_rwlock, RW_WRITER); 1773*8348SEric.Yu@Sun.COM DTRACE_PROBE1(pending__ops__complete, (struct sonode *), so); 1774*8348SEric.Yu@Sun.COM } 1775*8348SEric.Yu@Sun.COM 1776*8348SEric.Yu@Sun.COM return (B_TRUE); 1777*8348SEric.Yu@Sun.COM } 1778*8348SEric.Yu@Sun.COM 1779*8348SEric.Yu@Sun.COM /* 1780*8348SEric.Yu@Sun.COM * so_end_fallback() 1781*8348SEric.Yu@Sun.COM * 1782*8348SEric.Yu@Sun.COM * Allow socket opertions back in. 1783*8348SEric.Yu@Sun.COM * 1784*8348SEric.Yu@Sun.COM * The caller must be a writer on so_fallback_rwlock. 1785*8348SEric.Yu@Sun.COM */ 1786*8348SEric.Yu@Sun.COM static void 1787*8348SEric.Yu@Sun.COM so_end_fallback(struct sonode *so) 1788*8348SEric.Yu@Sun.COM { 1789*8348SEric.Yu@Sun.COM ASSERT(RW_ISWRITER(&so->so_fallback_rwlock)); 1790*8348SEric.Yu@Sun.COM 1791*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1792*8348SEric.Yu@Sun.COM so->so_state &= ~SS_FALLBACK_PENDING; 1793*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1794*8348SEric.Yu@Sun.COM 1795*8348SEric.Yu@Sun.COM rw_downgrade(&so->so_fallback_rwlock); 1796*8348SEric.Yu@Sun.COM } 1797*8348SEric.Yu@Sun.COM 1798*8348SEric.Yu@Sun.COM /* 1799*8348SEric.Yu@Sun.COM * so_quiesced_cb() 1800*8348SEric.Yu@Sun.COM * 1801*8348SEric.Yu@Sun.COM * Callback passed to the protocol during fallback. It is called once 1802*8348SEric.Yu@Sun.COM * the endpoint is quiescent. 1803*8348SEric.Yu@Sun.COM * 1804*8348SEric.Yu@Sun.COM * No requests from the user, no notifications from the protocol, so it 1805*8348SEric.Yu@Sun.COM * is safe to synchronize the state. Data can also be moved without 1806*8348SEric.Yu@Sun.COM * risk for reordering. 1807*8348SEric.Yu@Sun.COM * 1808*8348SEric.Yu@Sun.COM * NOTE: urgent data is dropped on the floor. 1809*8348SEric.Yu@Sun.COM * 1810*8348SEric.Yu@Sun.COM * We do not need to hold so_lock, since there can be only one thread 1811*8348SEric.Yu@Sun.COM * operating on the sonode. 1812*8348SEric.Yu@Sun.COM */ 1813*8348SEric.Yu@Sun.COM static void 1814*8348SEric.Yu@Sun.COM so_quiesced_cb(sock_upper_handle_t sock_handle, queue_t *q, 1815*8348SEric.Yu@Sun.COM struct T_capability_ack *tcap, struct sockaddr *laddr, socklen_t laddrlen, 1816*8348SEric.Yu@Sun.COM struct sockaddr *faddr, socklen_t faddrlen, short opts) 1817*8348SEric.Yu@Sun.COM { 1818*8348SEric.Yu@Sun.COM struct sonode *so = (struct sonode *)sock_handle; 1819*8348SEric.Yu@Sun.COM 1820*8348SEric.Yu@Sun.COM sotpi_update_state(so, tcap, laddr, laddrlen, faddr, faddrlen, opts); 1821*8348SEric.Yu@Sun.COM 1822*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1823*8348SEric.Yu@Sun.COM SOCKET_TIMER_CANCEL(so); 1824*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1825*8348SEric.Yu@Sun.COM /* 1826*8348SEric.Yu@Sun.COM * Move data to the STREAM head. 1827*8348SEric.Yu@Sun.COM */ 1828*8348SEric.Yu@Sun.COM if (so->so_rcv_head != NULL) { 1829*8348SEric.Yu@Sun.COM if (so->so_rcv_q_last_head == NULL) 1830*8348SEric.Yu@Sun.COM so->so_rcv_q_head = so->so_rcv_head; 1831*8348SEric.Yu@Sun.COM else 1832*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head->b_next = so->so_rcv_head; 1833*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = so->so_rcv_last_head; 1834*8348SEric.Yu@Sun.COM } 1835*8348SEric.Yu@Sun.COM 1836*8348SEric.Yu@Sun.COM while (so->so_rcv_q_head != NULL) { 1837*8348SEric.Yu@Sun.COM mblk_t *mp = so->so_rcv_q_head; 1838*8348SEric.Yu@Sun.COM size_t mlen = msgdsize(mp); 1839*8348SEric.Yu@Sun.COM 1840*8348SEric.Yu@Sun.COM so->so_rcv_q_head = mp->b_next; 1841*8348SEric.Yu@Sun.COM mp->b_next = NULL; 1842*8348SEric.Yu@Sun.COM mp->b_prev = NULL; 1843*8348SEric.Yu@Sun.COM so->so_rcv_queued -= mlen; 1844*8348SEric.Yu@Sun.COM putnext(q, mp); 1845*8348SEric.Yu@Sun.COM } 1846*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_queued == 0); 1847*8348SEric.Yu@Sun.COM so->so_rcv_head = NULL; 1848*8348SEric.Yu@Sun.COM so->so_rcv_last_head = NULL; 1849*8348SEric.Yu@Sun.COM so->so_rcv_q_head = NULL; 1850*8348SEric.Yu@Sun.COM so->so_rcv_q_last_head = NULL; 1851*8348SEric.Yu@Sun.COM 1852*8348SEric.Yu@Sun.COM #ifdef DEBUG 1853*8348SEric.Yu@Sun.COM if (so->so_oobmsg != NULL || so->so_oobmark > 0) { 1854*8348SEric.Yu@Sun.COM cmn_err(CE_NOTE, "losing oob data due to tpi fallback\n"); 1855*8348SEric.Yu@Sun.COM } 1856*8348SEric.Yu@Sun.COM #endif 1857*8348SEric.Yu@Sun.COM if (so->so_oobmsg != NULL) { 1858*8348SEric.Yu@Sun.COM freemsg(so->so_oobmsg); 1859*8348SEric.Yu@Sun.COM so->so_oobmsg = NULL; 1860*8348SEric.Yu@Sun.COM } 1861*8348SEric.Yu@Sun.COM so->so_oobmark = 0; 1862*8348SEric.Yu@Sun.COM 1863*8348SEric.Yu@Sun.COM ASSERT(so->so_rcv_queued == 0); 1864*8348SEric.Yu@Sun.COM } 1865*8348SEric.Yu@Sun.COM 1866*8348SEric.Yu@Sun.COM /* 1867*8348SEric.Yu@Sun.COM * so_tpi_fallback() 1868*8348SEric.Yu@Sun.COM * 1869*8348SEric.Yu@Sun.COM * This is fallback initation routine; things start here. 1870*8348SEric.Yu@Sun.COM * 1871*8348SEric.Yu@Sun.COM * Basic strategy: 1872*8348SEric.Yu@Sun.COM * o Block new socket operations from coming in 1873*8348SEric.Yu@Sun.COM * o Allocate/initate info needed by TPI 1874*8348SEric.Yu@Sun.COM * o Quiesce the connection, at which point we sync 1875*8348SEric.Yu@Sun.COM * state and move data 1876*8348SEric.Yu@Sun.COM * o Change operations (sonodeops) associated with the socket 1877*8348SEric.Yu@Sun.COM * o Unblock threads waiting for the fallback to finish 1878*8348SEric.Yu@Sun.COM */ 1879*8348SEric.Yu@Sun.COM int 1880*8348SEric.Yu@Sun.COM so_tpi_fallback(struct sonode *so, struct cred *cr) 1881*8348SEric.Yu@Sun.COM { 1882*8348SEric.Yu@Sun.COM int error; 1883*8348SEric.Yu@Sun.COM queue_t *q; 1884*8348SEric.Yu@Sun.COM struct sockparams *sp; 1885*8348SEric.Yu@Sun.COM struct sockparams *newsp; 1886*8348SEric.Yu@Sun.COM so_proto_fallback_func_t fbfunc; 1887*8348SEric.Yu@Sun.COM boolean_t direct; 1888*8348SEric.Yu@Sun.COM 1889*8348SEric.Yu@Sun.COM error = 0; 1890*8348SEric.Yu@Sun.COM sp = so->so_sockparams; 1891*8348SEric.Yu@Sun.COM fbfunc = sp->sp_smod_info->smod_proto_fallback_func; 1892*8348SEric.Yu@Sun.COM 1893*8348SEric.Yu@Sun.COM /* 1894*8348SEric.Yu@Sun.COM * Fallback can only happen if there is a device associated 1895*8348SEric.Yu@Sun.COM * with the sonode, and the socket module has a fallback function. 1896*8348SEric.Yu@Sun.COM */ 1897*8348SEric.Yu@Sun.COM if (!SOCKPARAMS_HAS_DEVICE(sp) || fbfunc == NULL) 1898*8348SEric.Yu@Sun.COM return (EINVAL); 1899*8348SEric.Yu@Sun.COM 1900*8348SEric.Yu@Sun.COM /* 1901*8348SEric.Yu@Sun.COM * Initiate fallback; upon success we know that no new requests 1902*8348SEric.Yu@Sun.COM * will come in from the user. 1903*8348SEric.Yu@Sun.COM */ 1904*8348SEric.Yu@Sun.COM if (!so_start_fallback(so)) 1905*8348SEric.Yu@Sun.COM return (EAGAIN); 1906*8348SEric.Yu@Sun.COM 1907*8348SEric.Yu@Sun.COM newsp = sockparams_hold_ephemeral_bydev(so->so_family, so->so_type, 1908*8348SEric.Yu@Sun.COM so->so_protocol, so->so_sockparams->sp_sdev_info.sd_devpath, 1909*8348SEric.Yu@Sun.COM KM_SLEEP, &error); 1910*8348SEric.Yu@Sun.COM if (error != 0) 1911*8348SEric.Yu@Sun.COM goto out; 1912*8348SEric.Yu@Sun.COM 1913*8348SEric.Yu@Sun.COM if (so->so_direct != NULL) { 1914*8348SEric.Yu@Sun.COM sodirect_t *sodp = so->so_direct; 1915*8348SEric.Yu@Sun.COM mutex_enter(sodp->sod_lockp); 1916*8348SEric.Yu@Sun.COM 1917*8348SEric.Yu@Sun.COM so->so_direct->sod_state &= ~SOD_ENABLED; 1918*8348SEric.Yu@Sun.COM so->so_state &= ~SS_SODIRECT; 1919*8348SEric.Yu@Sun.COM ASSERT(sodp->sod_uioafh == NULL); 1920*8348SEric.Yu@Sun.COM mutex_exit(sodp->sod_lockp); 1921*8348SEric.Yu@Sun.COM } 1922*8348SEric.Yu@Sun.COM 1923*8348SEric.Yu@Sun.COM /* Turn sonode into a TPI socket */ 1924*8348SEric.Yu@Sun.COM q = sotpi_convert_sonode(so, newsp, &direct, cr); 1925*8348SEric.Yu@Sun.COM if (q == NULL) { 1926*8348SEric.Yu@Sun.COM zcmn_err(getzoneid(), CE_WARN, 1927*8348SEric.Yu@Sun.COM "Failed to convert socket to TPI. Pid = %d\n", 1928*8348SEric.Yu@Sun.COM curproc->p_pid); 1929*8348SEric.Yu@Sun.COM SOCKPARAMS_DEC_REF(newsp); 1930*8348SEric.Yu@Sun.COM error = EINVAL; 1931*8348SEric.Yu@Sun.COM goto out; 1932*8348SEric.Yu@Sun.COM } 1933*8348SEric.Yu@Sun.COM 1934*8348SEric.Yu@Sun.COM /* 1935*8348SEric.Yu@Sun.COM * Now tell the protocol to start using TPI. so_quiesced_cb be 1936*8348SEric.Yu@Sun.COM * called once it's safe to synchronize state. 1937*8348SEric.Yu@Sun.COM */ 1938*8348SEric.Yu@Sun.COM DTRACE_PROBE1(proto__fallback__begin, struct sonode *, so); 1939*8348SEric.Yu@Sun.COM /* FIXME assumes this cannot fail. TCP can fail to enter squeue */ 1940*8348SEric.Yu@Sun.COM (*fbfunc)(so->so_proto_handle, q, direct, so_quiesced_cb); 1941*8348SEric.Yu@Sun.COM DTRACE_PROBE1(proto__fallback__end, struct sonode *, so); 1942*8348SEric.Yu@Sun.COM 1943*8348SEric.Yu@Sun.COM /* 1944*8348SEric.Yu@Sun.COM * Free all pending connection indications, i.e., socket_accept() has 1945*8348SEric.Yu@Sun.COM * not yet pulled the connection of the queue. The transport sent 1946*8348SEric.Yu@Sun.COM * a T_CONN_IND message for each pending connection to the STREAM head. 1947*8348SEric.Yu@Sun.COM */ 1948*8348SEric.Yu@Sun.COM so_acceptq_flush(so); 1949*8348SEric.Yu@Sun.COM 1950*8348SEric.Yu@Sun.COM mutex_enter(&so->so_lock); 1951*8348SEric.Yu@Sun.COM so->so_state |= SS_FALLBACK_COMP; 1952*8348SEric.Yu@Sun.COM mutex_exit(&so->so_lock); 1953*8348SEric.Yu@Sun.COM 1954*8348SEric.Yu@Sun.COM /* 1955*8348SEric.Yu@Sun.COM * Swap the sonode ops. Socket opertations that come in once this 1956*8348SEric.Yu@Sun.COM * is done will proceed without blocking. 1957*8348SEric.Yu@Sun.COM */ 1958*8348SEric.Yu@Sun.COM so->so_ops = &sotpi_sonodeops; 1959*8348SEric.Yu@Sun.COM 1960*8348SEric.Yu@Sun.COM /* 1961*8348SEric.Yu@Sun.COM * Wake up any threads stuck in poll. This is needed since the poll 1962*8348SEric.Yu@Sun.COM * head changes when the fallback happens (moves from the sonode to 1963*8348SEric.Yu@Sun.COM * the STREAMS head). 1964*8348SEric.Yu@Sun.COM */ 1965*8348SEric.Yu@Sun.COM pollwakeup(&so->so_poll_list, POLLERR); 1966*8348SEric.Yu@Sun.COM out: 1967*8348SEric.Yu@Sun.COM so_end_fallback(so); 1968*8348SEric.Yu@Sun.COM 1969*8348SEric.Yu@Sun.COM return (error); 1970*8348SEric.Yu@Sun.COM } 1971