138414Smckusick /* 247574Skarels * Copyright (c) 1989, 1991 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*53426Smckusick * @(#)nfs_socket.c 7.30 (Berkeley) 05/11/92 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1752196Smckusick #include "types.h" 1838414Smckusick #include "param.h" 1952196Smckusick #include "uio.h" 2040117Smckusick #include "proc.h" 2152196Smckusick #include "signal.h" 2238414Smckusick #include "mount.h" 2338414Smckusick #include "kernel.h" 2438414Smckusick #include "malloc.h" 2538414Smckusick #include "mbuf.h" 2638414Smckusick #include "vnode.h" 2738414Smckusick #include "domain.h" 2838414Smckusick #include "protosw.h" 2938414Smckusick #include "socket.h" 3038414Smckusick #include "socketvar.h" 3147574Skarels #include "syslog.h" 3247737Skarels #include "tprintf.h" 3352196Smckusick #include "machine/endian.h" 3452196Smckusick #include "netinet/in.h" 3552196Smckusick #include "netinet/tcp.h" 3652196Smckusick #ifdef ISO 3752196Smckusick #include "netiso/iso.h" 3852196Smckusick #endif 3952196Smckusick #include "ufs/ufs/quota.h" 4052196Smckusick #include "ufs/ufs/ufsmount.h" 4138414Smckusick #include "rpcv2.h" 4238414Smckusick #include "nfsv2.h" 4338414Smckusick #include "nfs.h" 4438414Smckusick #include "xdr_subs.h" 4538414Smckusick #include "nfsm_subs.h" 4638414Smckusick #include "nfsmount.h" 4752196Smckusick #include "nfsnode.h" 4852196Smckusick #include "nfsrtt.h" 4952196Smckusick #include "nqnfs.h" 5038414Smckusick 5138414Smckusick #define TRUE 1 5243351Smckusick #define FALSE 0 5338414Smckusick 5452196Smckusick int netnetnet = sizeof (struct netaddrhash); 5540117Smckusick /* 5652196Smckusick * Estimate rto for an nfs rpc sent via. an unreliable datagram. 5752196Smckusick * Use the mean and mean deviation of rtt for the appropriate type of rpc 5852196Smckusick * for the frequent rpcs and a default for the others. 5952196Smckusick * The justification for doing "other" this way is that these rpcs 6052196Smckusick * happen so infrequently that timer est. would probably be stale. 6152196Smckusick * Also, since many of these rpcs are 6252196Smckusick * non-idempotent, a conservative timeout is desired. 6352196Smckusick * getattr, lookup - A+2D 6452196Smckusick * read, write - A+4D 6552196Smckusick * other - nm_timeo 6652196Smckusick */ 6752196Smckusick #define NFS_RTO(n, t) \ 6852196Smckusick ((t) == 0 ? (n)->nm_timeo : \ 6952196Smckusick ((t) < 3 ? \ 7052196Smckusick (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 7152196Smckusick ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 7252196Smckusick #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 7352196Smckusick #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 7452196Smckusick /* 7538414Smckusick * External data, mostly RPC constants in XDR form 7638414Smckusick */ 7738414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 7852196Smckusick rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 7952196Smckusick rpc_auth_kerb; 8052196Smckusick extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 8152196Smckusick extern time_t nqnfsstarttime; 8241900Smckusick extern int nonidempotent[NFS_NPROCS]; 8352196Smckusick 8452196Smckusick /* 8552196Smckusick * Maps errno values to nfs error numbers. 8652196Smckusick * Use NFSERR_IO as the catch all for ones not specifically defined in 8752196Smckusick * RFC 1094. 8852196Smckusick */ 8952196Smckusick static int nfsrv_errmap[ELAST] = { 9052196Smckusick NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9152196Smckusick NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9252196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 9352196Smckusick NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 9452196Smckusick NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9552196Smckusick NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 9652196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9752196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9852196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9952196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10052196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10152196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10252196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 10352196Smckusick NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 10452196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10552196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10652196Smckusick NFSERR_IO, 10745281Smckusick }; 10852196Smckusick 10952196Smckusick /* 11052196Smckusick * Defines which timer to use for the procnum. 11152196Smckusick * 0 - default 11252196Smckusick * 1 - getattr 11352196Smckusick * 2 - lookup 11452196Smckusick * 3 - read 11552196Smckusick * 4 - write 11652196Smckusick */ 11752196Smckusick static int proct[NFS_NPROCS] = { 11852196Smckusick 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 11952196Smckusick }; 12052196Smckusick 12152196Smckusick /* 12252196Smckusick * There is a congestion window for outstanding rpcs maintained per mount 12352196Smckusick * point. The cwnd size is adjusted in roughly the way that: 12452196Smckusick * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 12552196Smckusick * SIGCOMM '88". ACM, August 1988. 12652196Smckusick * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 12752196Smckusick * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 12852196Smckusick * of rpcs is in progress. 12952196Smckusick * (The sent count and cwnd are scaled for integer arith.) 13052196Smckusick * Variants of "slow start" were tried and were found to be too much of a 13152196Smckusick * performance hit (ave. rtt 3 times larger), 13252196Smckusick * I suspect due to the large rtt that nfs rpcs have. 13352196Smckusick */ 13452196Smckusick #define NFS_CWNDSCALE 256 13552196Smckusick #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 13652196Smckusick static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 13741900Smckusick int nfs_sbwait(); 13852196Smckusick void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 13952196Smckusick void nfs_rcvunlock(), nqnfs_serverd(); 14052196Smckusick struct mbuf *nfsm_rpchead(); 14152196Smckusick int nfsrtton = 0; 14252196Smckusick struct nfsrtt nfsrtt; 14352196Smckusick struct nfsd nfsd_head; 14441900Smckusick 14538414Smckusick int nfsrv_null(), 14638414Smckusick nfsrv_getattr(), 14738414Smckusick nfsrv_setattr(), 14838414Smckusick nfsrv_lookup(), 14938414Smckusick nfsrv_readlink(), 15038414Smckusick nfsrv_read(), 15138414Smckusick nfsrv_write(), 15238414Smckusick nfsrv_create(), 15338414Smckusick nfsrv_remove(), 15438414Smckusick nfsrv_rename(), 15538414Smckusick nfsrv_link(), 15638414Smckusick nfsrv_symlink(), 15738414Smckusick nfsrv_mkdir(), 15838414Smckusick nfsrv_rmdir(), 15938414Smckusick nfsrv_readdir(), 16038414Smckusick nfsrv_statfs(), 16152196Smckusick nfsrv_noop(), 16252196Smckusick nqnfsrv_readdirlook(), 16352196Smckusick nqnfsrv_getlease(), 16452196Smckusick nqnfsrv_vacated(); 16538414Smckusick 16638414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 16738414Smckusick nfsrv_null, 16838414Smckusick nfsrv_getattr, 16938414Smckusick nfsrv_setattr, 17038414Smckusick nfsrv_noop, 17138414Smckusick nfsrv_lookup, 17238414Smckusick nfsrv_readlink, 17338414Smckusick nfsrv_read, 17438414Smckusick nfsrv_noop, 17538414Smckusick nfsrv_write, 17638414Smckusick nfsrv_create, 17738414Smckusick nfsrv_remove, 17838414Smckusick nfsrv_rename, 17938414Smckusick nfsrv_link, 18038414Smckusick nfsrv_symlink, 18138414Smckusick nfsrv_mkdir, 18238414Smckusick nfsrv_rmdir, 18338414Smckusick nfsrv_readdir, 18438414Smckusick nfsrv_statfs, 18552196Smckusick nqnfsrv_readdirlook, 18652196Smckusick nqnfsrv_getlease, 18752196Smckusick nqnfsrv_vacated, 18838414Smckusick }; 18938414Smckusick 19040117Smckusick struct nfsreq nfsreqh; 19138414Smckusick 19238414Smckusick /* 19341900Smckusick * Initialize sockets and congestion for a new NFS connection. 19440117Smckusick * We do not free the sockaddr if error. 19538414Smckusick */ 19652196Smckusick nfs_connect(nmp, rep) 19740117Smckusick register struct nfsmount *nmp; 19852196Smckusick struct nfsreq *rep; 19940117Smckusick { 20041900Smckusick register struct socket *so; 20152196Smckusick int s, error, rcvreserve, sndreserve; 20252988Smckusick struct sockaddr *saddr; 20352988Smckusick struct sockaddr_in *sin; 20440117Smckusick struct mbuf *m; 20552988Smckusick u_short tport; 20640117Smckusick 20741900Smckusick nmp->nm_so = (struct socket *)0; 20852988Smckusick saddr = mtod(nmp->nm_nam, struct sockaddr *); 20952988Smckusick if (error = socreate(saddr->sa_family, 21041900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 21140117Smckusick goto bad; 21241900Smckusick so = nmp->nm_so; 21341900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 21440117Smckusick 21541900Smckusick /* 21652988Smckusick * Some servers require that the client port be a reserved port number. 21752988Smckusick */ 21852988Smckusick if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 21952988Smckusick MGET(m, M_WAIT, MT_SONAME); 22052988Smckusick sin = mtod(m, struct sockaddr_in *); 22152988Smckusick sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 22252988Smckusick sin->sin_family = AF_INET; 22352988Smckusick sin->sin_addr.s_addr = INADDR_ANY; 22452988Smckusick tport = IPPORT_RESERVED - 1; 22552988Smckusick sin->sin_port = htons(tport); 22652988Smckusick while ((error = sobind(so, m)) == EADDRINUSE && 22752988Smckusick --tport > IPPORT_RESERVED / 2) 22852988Smckusick sin->sin_port = htons(tport); 22952988Smckusick m_freem(m); 23052988Smckusick if (error) 23152988Smckusick goto bad; 23252988Smckusick } 23352988Smckusick 23452988Smckusick /* 23541900Smckusick * Protocols that do not require connections may be optionally left 23641900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 23741900Smckusick */ 23841900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 23941900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 24041900Smckusick error = ENOTCONN; 24140117Smckusick goto bad; 24240117Smckusick } 24341900Smckusick } else { 24441900Smckusick if (error = soconnect(so, nmp->nm_nam)) 24540117Smckusick goto bad; 24641900Smckusick 24741900Smckusick /* 24841900Smckusick * Wait for the connection to complete. Cribbed from the 24952196Smckusick * connect system call but with the wait timing out so 25052196Smckusick * that interruptible mounts don't hang here for a long time. 25141900Smckusick */ 25241900Smckusick s = splnet(); 25352196Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 25452196Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 25552196Smckusick "nfscon", 2 * hz); 25652196Smckusick if ((so->so_state & SS_ISCONNECTING) && 25752196Smckusick so->so_error == 0 && rep && 25852196Smckusick (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 25952196Smckusick so->so_state &= ~SS_ISCONNECTING; 26052196Smckusick splx(s); 26152196Smckusick goto bad; 26252196Smckusick } 26352196Smckusick } 26441900Smckusick if (so->so_error) { 26541900Smckusick error = so->so_error; 26652196Smckusick so->so_error = 0; 26752196Smckusick splx(s); 26841900Smckusick goto bad; 26941900Smckusick } 27052196Smckusick splx(s); 27140117Smckusick } 27252196Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 27352196Smckusick so->so_rcv.sb_timeo = (5 * hz); 27452196Smckusick so->so_snd.sb_timeo = (5 * hz); 27552196Smckusick } else { 27652196Smckusick so->so_rcv.sb_timeo = 0; 27752196Smckusick so->so_snd.sb_timeo = 0; 27852196Smckusick } 27941900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 28052196Smckusick sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 28152196Smckusick rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 28252196Smckusick } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 28352196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 28452196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 28541900Smckusick } else { 28652196Smckusick if (nmp->nm_sotype != SOCK_STREAM) 28752196Smckusick panic("nfscon sotype"); 28841900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 28941900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 29041900Smckusick *mtod(m, int *) = 1; 29141900Smckusick m->m_len = sizeof(int); 29241900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 29341900Smckusick } 29452196Smckusick if (so->so_proto->pr_protocol == IPPROTO_TCP) { 29541900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 29641900Smckusick *mtod(m, int *) = 1; 29741900Smckusick m->m_len = sizeof(int); 29841900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 29941900Smckusick } 30052196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 30152196Smckusick * 2; 30252196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 30352196Smckusick * 2; 30441900Smckusick } 30552196Smckusick if (error = soreserve(so, sndreserve, rcvreserve)) 30652196Smckusick goto bad; 30741900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 30841900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 30940117Smckusick 31041900Smckusick /* Initialize other non-zero congestion variables */ 31152196Smckusick nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 31252196Smckusick nmp->nm_srtt[4] = (NFS_TIMEO << 3); 31352196Smckusick nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 31452196Smckusick nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 31552196Smckusick nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 31641900Smckusick nmp->nm_sent = 0; 31752196Smckusick nmp->nm_timeouts = 0; 31841900Smckusick return (0); 31940117Smckusick 32041900Smckusick bad: 32141900Smckusick nfs_disconnect(nmp); 32241900Smckusick return (error); 32341900Smckusick } 32440117Smckusick 32541900Smckusick /* 32641900Smckusick * Reconnect routine: 32741900Smckusick * Called when a connection is broken on a reliable protocol. 32841900Smckusick * - clean up the old socket 32941900Smckusick * - nfs_connect() again 33041900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 33141900Smckusick * If this fails the mount point is DEAD! 33252196Smckusick * nb: Must be called with the nfs_sndlock() set on the mount point. 33341900Smckusick */ 33452196Smckusick nfs_reconnect(rep) 33541900Smckusick register struct nfsreq *rep; 33641900Smckusick { 33741900Smckusick register struct nfsreq *rp; 33852196Smckusick register struct nfsmount *nmp = rep->r_nmp; 33941900Smckusick int error; 34040117Smckusick 34152196Smckusick nfs_disconnect(nmp); 34252196Smckusick while (error = nfs_connect(nmp, rep)) { 34352196Smckusick if (error == EINTR || error == ERESTART) 34441900Smckusick return (EINTR); 34543351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 34640117Smckusick } 34741900Smckusick 34841900Smckusick /* 34941900Smckusick * Loop through outstanding request list and fix up all requests 35041900Smckusick * on old socket. 35141900Smckusick */ 35241900Smckusick rp = nfsreqh.r_next; 35341900Smckusick while (rp != &nfsreqh) { 35441900Smckusick if (rp->r_nmp == nmp) 35541900Smckusick rp->r_flags |= R_MUSTRESEND; 35641900Smckusick rp = rp->r_next; 35740117Smckusick } 35840117Smckusick return (0); 35940117Smckusick } 36040117Smckusick 36140117Smckusick /* 36240117Smckusick * NFS disconnect. Clean up and unlink. 36340117Smckusick */ 36441900Smckusick void 36540117Smckusick nfs_disconnect(nmp) 36640117Smckusick register struct nfsmount *nmp; 36740117Smckusick { 36841900Smckusick register struct socket *so; 36940117Smckusick 37041900Smckusick if (nmp->nm_so) { 37141900Smckusick so = nmp->nm_so; 37241900Smckusick nmp->nm_so = (struct socket *)0; 37341900Smckusick soshutdown(so, 2); 37441900Smckusick soclose(so); 37540117Smckusick } 37640117Smckusick } 37740117Smckusick 37840117Smckusick /* 37941900Smckusick * This is the nfs send routine. For connection based socket types, it 38052196Smckusick * must be called with an nfs_sndlock() on the socket. 38141900Smckusick * "rep == NULL" indicates that it has been called from a server. 38252196Smckusick * For the client side: 38352196Smckusick * - return EINTR if the RPC is terminated, 0 otherwise 38452196Smckusick * - set R_MUSTRESEND if the send fails for any reason 38552196Smckusick * - do any cleanup required by recoverable socket errors (???) 38652196Smckusick * For the server side: 38752196Smckusick * - return EINTR or ERESTART if interrupted by a signal 38852196Smckusick * - return EPIPE if a connection is lost for connection based sockets (TCP...) 38952196Smckusick * - do any cleanup required by recoverable socket errors (???) 39040117Smckusick */ 39141900Smckusick nfs_send(so, nam, top, rep) 39238414Smckusick register struct socket *so; 39338414Smckusick struct mbuf *nam; 39441900Smckusick register struct mbuf *top; 39541900Smckusick struct nfsreq *rep; 39638414Smckusick { 39741900Smckusick struct mbuf *sendnam; 39852196Smckusick int error, soflags, flags; 39938414Smckusick 40041900Smckusick if (rep) { 40141900Smckusick if (rep->r_flags & R_SOFTTERM) { 40240117Smckusick m_freem(top); 40341900Smckusick return (EINTR); 40440117Smckusick } 40552196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) { 40652196Smckusick rep->r_flags |= R_MUSTRESEND; 40752196Smckusick m_freem(top); 40852196Smckusick return (0); 40952196Smckusick } 41041900Smckusick rep->r_flags &= ~R_MUSTRESEND; 41141900Smckusick soflags = rep->r_nmp->nm_soflags; 41241900Smckusick } else 41341900Smckusick soflags = so->so_proto->pr_flags; 41441900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 41541900Smckusick sendnam = (struct mbuf *)0; 41641900Smckusick else 41741900Smckusick sendnam = nam; 41852196Smckusick if (so->so_type == SOCK_SEQPACKET) 41952196Smckusick flags = MSG_EOR; 42052196Smckusick else 42152196Smckusick flags = 0; 42241900Smckusick 42341900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 42452196Smckusick (struct mbuf *)0, flags); 42552196Smckusick if (error) { 42652196Smckusick if (rep) { 42752934Smckusick log(LOG_INFO, "nfs send error %d for server %s\n",error, 42852934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 42952196Smckusick /* 43052196Smckusick * Deal with errors for the client side. 43152196Smckusick */ 43252196Smckusick if (rep->r_flags & R_SOFTTERM) 43352196Smckusick error = EINTR; 43452196Smckusick else 43552196Smckusick rep->r_flags |= R_MUSTRESEND; 43652934Smckusick } else 43752934Smckusick log(LOG_INFO, "nfsd send error %d\n", error); 43852196Smckusick 43952196Smckusick /* 44052196Smckusick * Handle any recoverable (soft) socket errors here. (???) 44152196Smckusick */ 44252196Smckusick if (error != EINTR && error != ERESTART && 44352196Smckusick error != EWOULDBLOCK && error != EPIPE) 44441900Smckusick error = 0; 44538414Smckusick } 44638414Smckusick return (error); 44738414Smckusick } 44838414Smckusick 44938414Smckusick /* 45041900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 45141900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 45241900Smckusick * Mark and consolidate the data into a new mbuf list. 45341900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 45441900Smckusick * small mbufs. 45541900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 45641900Smckusick * we have read any of it, even if the system call has been interrupted. 45738414Smckusick */ 45852196Smckusick nfs_receive(rep, aname, mp) 45952196Smckusick register struct nfsreq *rep; 46038414Smckusick struct mbuf **aname; 46138414Smckusick struct mbuf **mp; 46238414Smckusick { 46352196Smckusick register struct socket *so; 46441900Smckusick struct uio auio; 46541900Smckusick struct iovec aio; 46638414Smckusick register struct mbuf *m; 46752196Smckusick struct mbuf *control; 46841900Smckusick u_long len; 46941900Smckusick struct mbuf **getnam; 47052196Smckusick int error, sotype, rcvflg; 47152932Smckusick struct proc *p = curproc; /* XXX */ 47238414Smckusick 47341900Smckusick /* 47441900Smckusick * Set up arguments for soreceive() 47541900Smckusick */ 47641900Smckusick *mp = (struct mbuf *)0; 47741900Smckusick *aname = (struct mbuf *)0; 47852196Smckusick sotype = rep->r_nmp->nm_sotype; 47938414Smckusick 48041900Smckusick /* 48141900Smckusick * For reliable protocols, lock against other senders/receivers 48241900Smckusick * in case a reconnect is necessary. 48341900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 48441900Smckusick * more there is to get. 48541900Smckusick * We must lock the socket against other receivers 48641900Smckusick * until we have an entire rpc request/reply. 48741900Smckusick */ 48852196Smckusick if (sotype != SOCK_DGRAM) { 48952196Smckusick if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 49052196Smckusick return (error); 49141900Smckusick tryagain: 49241900Smckusick /* 49341900Smckusick * Check for fatal errors and resending request. 49441900Smckusick */ 49552196Smckusick /* 49652196Smckusick * Ugh: If a reconnect attempt just happened, nm_so 49752196Smckusick * would have changed. NULL indicates a failed 49852196Smckusick * attempt that has essentially shut down this 49952196Smckusick * mount point. 50052196Smckusick */ 50152196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 50252196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 50352196Smckusick return (EINTR); 50452196Smckusick } 50552196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) { 50652196Smckusick if (error = nfs_reconnect(rep)) { 50752196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 50852196Smckusick return (error); 50940117Smckusick } 51052196Smckusick goto tryagain; 51141900Smckusick } 51252196Smckusick while (rep->r_flags & R_MUSTRESEND) { 51352196Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 51452196Smckusick nfsstats.rpcretries++; 51552196Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 51652196Smckusick if (error == EINTR || error == ERESTART || 51752196Smckusick (error = nfs_reconnect(rep))) { 51852196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 51952196Smckusick return (error); 52052196Smckusick } 52152196Smckusick goto tryagain; 52252196Smckusick } 52352196Smckusick } 52452196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 52552196Smckusick if (sotype == SOCK_STREAM) { 52641900Smckusick aio.iov_base = (caddr_t) &len; 52741900Smckusick aio.iov_len = sizeof(u_long); 52841900Smckusick auio.uio_iov = &aio; 52941900Smckusick auio.uio_iovcnt = 1; 53041900Smckusick auio.uio_segflg = UIO_SYSSPACE; 53141900Smckusick auio.uio_rw = UIO_READ; 53241900Smckusick auio.uio_offset = 0; 53341900Smckusick auio.uio_resid = sizeof(u_long); 53452932Smckusick auio.uio_procp = p; 53541900Smckusick do { 53652196Smckusick rcvflg = MSG_WAITALL; 53752196Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 53841900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 53952196Smckusick if (error == EWOULDBLOCK && rep) { 54041900Smckusick if (rep->r_flags & R_SOFTTERM) 54141900Smckusick return (EINTR); 54252196Smckusick } 54341900Smckusick } while (error == EWOULDBLOCK); 54447737Skarels if (!error && auio.uio_resid > 0) { 54552934Smckusick log(LOG_INFO, 54652934Smckusick "short receive (%d/%d) from nfs server %s\n", 54752934Smckusick sizeof(u_long) - auio.uio_resid, 54852934Smckusick sizeof(u_long), 54947737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 55047737Skarels error = EPIPE; 55147737Skarels } 55240761Skarels if (error) 55341900Smckusick goto errout; 55441900Smckusick len = ntohl(len) & ~0x80000000; 55541900Smckusick /* 55641900Smckusick * This is SERIOUS! We are out of sync with the sender 55741900Smckusick * and forcing a disconnect/reconnect is all I can do. 55841900Smckusick */ 55941900Smckusick if (len > NFS_MAXPACKET) { 56052934Smckusick log(LOG_ERR, "%s (%d) from nfs server %s\n", 56152934Smckusick "impossible packet length", 56252934Smckusick len, 56352934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 56447737Skarels error = EFBIG; 56547737Skarels goto errout; 56641900Smckusick } 56741900Smckusick auio.uio_resid = len; 56841900Smckusick do { 56947737Skarels rcvflg = MSG_WAITALL; 57041900Smckusick error = soreceive(so, (struct mbuf **)0, 57141900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 57241900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 57341900Smckusick error == ERESTART); 57447737Skarels if (!error && auio.uio_resid > 0) { 57552934Smckusick log(LOG_INFO, 57652934Smckusick "short receive (%d/%d) from nfs server %s\n", 57752934Smckusick len - auio.uio_resid, len, 57852934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 57947737Skarels error = EPIPE; 58047737Skarels } 58140117Smckusick } else { 58252196Smckusick /* 58352196Smckusick * NB: Since uio_resid is big, MSG_WAITALL is ignored 58452196Smckusick * and soreceive() will return when it has either a 58552196Smckusick * control msg or a data msg. 58652196Smckusick * We have no use for control msg., but must grab them 58752196Smckusick * and then throw them away so we know what is going 58852196Smckusick * on. 58952196Smckusick */ 59052196Smckusick auio.uio_resid = len = 100000000; /* Anything Big */ 59152932Smckusick auio.uio_procp = p; 59241900Smckusick do { 59347737Skarels rcvflg = 0; 59441900Smckusick error = soreceive(so, (struct mbuf **)0, 59552196Smckusick &auio, mp, &control, &rcvflg); 59652196Smckusick if (control) 59752196Smckusick m_freem(control); 59841900Smckusick if (error == EWOULDBLOCK && rep) { 59941900Smckusick if (rep->r_flags & R_SOFTTERM) 60041900Smckusick return (EINTR); 60141900Smckusick } 60252196Smckusick } while (error == EWOULDBLOCK || 60352196Smckusick (!error && *mp == NULL && control)); 60452196Smckusick if ((rcvflg & MSG_EOR) == 0) 60552196Smckusick printf("Egad!!\n"); 60641900Smckusick if (!error && *mp == NULL) 60741900Smckusick error = EPIPE; 60841900Smckusick len -= auio.uio_resid; 60940117Smckusick } 61041900Smckusick errout: 61152196Smckusick if (error && error != EINTR && error != ERESTART) { 61241900Smckusick m_freem(*mp); 61341900Smckusick *mp = (struct mbuf *)0; 61452934Smckusick if (error != EPIPE) 61547737Skarels log(LOG_INFO, 61647737Skarels "receive error %d from nfs server %s\n", 61747737Skarels error, 61847737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 61952196Smckusick error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 62041900Smckusick if (!error) 62152196Smckusick error = nfs_reconnect(rep); 62252196Smckusick if (!error) 62341900Smckusick goto tryagain; 62440117Smckusick } 62541900Smckusick } else { 62652196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) 62752196Smckusick return (EACCES); 62841900Smckusick if (so->so_state & SS_ISCONNECTED) 62941900Smckusick getnam = (struct mbuf **)0; 63041900Smckusick else 63141900Smckusick getnam = aname; 63241900Smckusick auio.uio_resid = len = 1000000; 63352932Smckusick auio.uio_procp = p; 63441900Smckusick do { 63547737Skarels rcvflg = 0; 63641900Smckusick error = soreceive(so, getnam, &auio, mp, 63741900Smckusick (struct mbuf **)0, &rcvflg); 63852196Smckusick if (error == EWOULDBLOCK && 63941900Smckusick (rep->r_flags & R_SOFTTERM)) 64041900Smckusick return (EINTR); 64141900Smckusick } while (error == EWOULDBLOCK); 64241900Smckusick len -= auio.uio_resid; 64341900Smckusick } 64441900Smckusick if (error) { 64541900Smckusick m_freem(*mp); 64641900Smckusick *mp = (struct mbuf *)0; 64741900Smckusick } 64841900Smckusick /* 64952196Smckusick * Search for any mbufs that are not a multiple of 4 bytes long 65052196Smckusick * or with m_data not longword aligned. 65141900Smckusick * These could cause pointer alignment problems, so copy them to 65241900Smckusick * well aligned mbufs. 65341900Smckusick */ 65452196Smckusick nfs_realign(*mp, 5 * NFSX_UNSIGNED); 65538414Smckusick return (error); 65638414Smckusick } 65738414Smckusick 65838414Smckusick /* 65941900Smckusick * Implement receipt of reply on a socket. 66038414Smckusick * We must search through the list of received datagrams matching them 66138414Smckusick * with outstanding requests using the xid, until ours is found. 66238414Smckusick */ 66341900Smckusick /* ARGSUSED */ 66452196Smckusick nfs_reply(myrep) 66539344Smckusick struct nfsreq *myrep; 66638414Smckusick { 66738414Smckusick register struct nfsreq *rep; 66852196Smckusick register struct nfsmount *nmp = myrep->r_nmp; 66952196Smckusick register long t1; 67052196Smckusick struct mbuf *mrep, *nam, *md; 67152196Smckusick u_long rxid, *tl; 67252196Smckusick caddr_t dpos, cp2; 67352196Smckusick int error; 67438414Smckusick 67541900Smckusick /* 67641900Smckusick * Loop around until we get our own reply 67741900Smckusick */ 67841900Smckusick for (;;) { 67941900Smckusick /* 68041900Smckusick * Lock against other receivers so that I don't get stuck in 68141900Smckusick * sbwait() after someone else has received my reply for me. 68241900Smckusick * Also necessary for connection based protocols to avoid 68341900Smckusick * race conditions during a reconnect. 68441900Smckusick */ 68552196Smckusick if (error = nfs_rcvlock(myrep)) 68652196Smckusick return (error); 68741900Smckusick /* Already received, bye bye */ 68841900Smckusick if (myrep->r_mrep != NULL) { 68952196Smckusick nfs_rcvunlock(&nmp->nm_flag); 69041900Smckusick return (0); 69140117Smckusick } 69241900Smckusick /* 69341900Smckusick * Get the next Rpc reply off the socket 69441900Smckusick */ 69552196Smckusick error = nfs_receive(myrep, &nam, &mrep); 69652196Smckusick nfs_rcvunlock(&nmp->nm_flag); 69752196Smckusick if (error) printf("rcv err=%d\n",error); 69852196Smckusick if (error) { 69938414Smckusick 70041900Smckusick /* 70141900Smckusick * Ignore routing errors on connectionless protocols?? 70241900Smckusick */ 70341900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 70441900Smckusick nmp->nm_so->so_error = 0; 70541900Smckusick continue; 70641900Smckusick } 70741900Smckusick return (error); 70838414Smckusick } 70952196Smckusick if (nam) 71052196Smckusick m_freem(nam); 71141900Smckusick 71241900Smckusick /* 71341900Smckusick * Get the xid and check that it is an rpc reply 71441900Smckusick */ 71552196Smckusick md = mrep; 71652196Smckusick dpos = mtod(md, caddr_t); 71752196Smckusick nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 71852196Smckusick rxid = *tl++; 71952196Smckusick if (*tl != rpc_reply) { 72052196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 72152196Smckusick if (nqnfs_callback(nmp, mrep, md, dpos)) 72252196Smckusick nfsstats.rpcinvalid++; 72352196Smckusick } else { 72452196Smckusick nfsstats.rpcinvalid++; 72552196Smckusick m_freem(mrep); 72652196Smckusick } 72752196Smckusick nfsmout: 72841900Smckusick continue; 72938414Smckusick } 73052196Smckusick 73141900Smckusick /* 73241900Smckusick * Loop through the request list to match up the reply 73341900Smckusick * Iff no match, just drop the datagram 73441900Smckusick */ 73541900Smckusick rep = nfsreqh.r_next; 73641900Smckusick while (rep != &nfsreqh) { 73745281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 73841900Smckusick /* Found it.. */ 73952196Smckusick rep->r_mrep = mrep; 74052196Smckusick rep->r_md = md; 74152196Smckusick rep->r_dpos = dpos; 74252196Smckusick if (nfsrtton) { 74352196Smckusick struct rttl *rt; 74452196Smckusick 74552196Smckusick rt = &nfsrtt.rttl[nfsrtt.pos]; 74652196Smckusick rt->proc = rep->r_procnum; 74752196Smckusick rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 74852196Smckusick rt->sent = nmp->nm_sent; 74952196Smckusick rt->cwnd = nmp->nm_cwnd; 75052196Smckusick rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 75152196Smckusick rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 75252196Smckusick rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 75352196Smckusick rt->tstamp = time; 75452196Smckusick if (rep->r_flags & R_TIMING) 75552196Smckusick rt->rtt = rep->r_rtt; 75652196Smckusick else 75752196Smckusick rt->rtt = 1000000; 75852196Smckusick nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 75952196Smckusick } 76041900Smckusick /* 76152196Smckusick * Update congestion window. 76252196Smckusick * Do the additive increase of 76352196Smckusick * one rpc/rtt. 76441900Smckusick */ 76552196Smckusick if (nmp->nm_cwnd <= nmp->nm_sent) { 76652196Smckusick nmp->nm_cwnd += 76752196Smckusick (NFS_CWNDSCALE * NFS_CWNDSCALE + 76852196Smckusick (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 76952196Smckusick if (nmp->nm_cwnd > NFS_MAXCWND) 77052196Smckusick nmp->nm_cwnd = NFS_MAXCWND; 77152196Smckusick } 77252196Smckusick nmp->nm_sent -= NFS_CWNDSCALE; 77352196Smckusick /* 77452196Smckusick * Update rtt using a gain of 0.125 on the mean 77552196Smckusick * and a gain of 0.25 on the deviation. 77652196Smckusick */ 77741900Smckusick if (rep->r_flags & R_TIMING) { 77852196Smckusick /* 77952196Smckusick * Since the timer resolution of 78052196Smckusick * NFS_HZ is so course, it can often 78152196Smckusick * result in r_rtt == 0. Since 78252196Smckusick * r_rtt == N means that the actual 78352196Smckusick * rtt is between N+dt and N+2-dt ticks, 78452196Smckusick * add 1. 78552196Smckusick */ 78652196Smckusick t1 = rep->r_rtt + 1; 78752196Smckusick t1 -= (NFS_SRTT(rep) >> 3); 78852196Smckusick NFS_SRTT(rep) += t1; 78952196Smckusick if (t1 < 0) 79052196Smckusick t1 = -t1; 79152196Smckusick t1 -= (NFS_SDRTT(rep) >> 2); 79252196Smckusick NFS_SDRTT(rep) += t1; 79341900Smckusick } 79452196Smckusick nmp->nm_timeouts = 0; 79540117Smckusick break; 79638414Smckusick } 79741900Smckusick rep = rep->r_next; 79838414Smckusick } 79941900Smckusick /* 80041900Smckusick * If not matched to a request, drop it. 80141900Smckusick * If it's mine, get out. 80241900Smckusick */ 80341900Smckusick if (rep == &nfsreqh) { 80441900Smckusick nfsstats.rpcunexpected++; 80552196Smckusick m_freem(mrep); 806*53426Smckusick } else if (rep == myrep) { 807*53426Smckusick if (rep->r_mrep == NULL) 808*53426Smckusick panic("nfsreply nil"); 80941900Smckusick return (0); 810*53426Smckusick } 81138414Smckusick } 81238414Smckusick } 81338414Smckusick 81438414Smckusick /* 81538414Smckusick * nfs_request - goes something like this 81638414Smckusick * - fill in request struct 81738414Smckusick * - links it into list 81841900Smckusick * - calls nfs_send() for first transmit 81941900Smckusick * - calls nfs_receive() to get reply 82038414Smckusick * - break down rpc header and return with nfs reply pointed to 82138414Smckusick * by mrep or error 82238414Smckusick * nb: always frees up mreq mbuf list 82338414Smckusick */ 82452196Smckusick nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 82538414Smckusick struct vnode *vp; 82652196Smckusick struct mbuf *mrest; 82741900Smckusick int procnum; 82841900Smckusick struct proc *procp; 82952196Smckusick struct ucred *cred; 83038414Smckusick struct mbuf **mrp; 83138414Smckusick struct mbuf **mdp; 83238414Smckusick caddr_t *dposp; 83338414Smckusick { 83438414Smckusick register struct mbuf *m, *mrep; 83538414Smckusick register struct nfsreq *rep; 83648048Smckusick register u_long *tl; 83752196Smckusick register int i; 83841900Smckusick struct nfsmount *nmp; 83952196Smckusick struct mbuf *md, *mheadend; 84039344Smckusick struct nfsreq *reph; 84152196Smckusick struct nfsnode *tp, *np; 84252196Smckusick time_t reqtime, waituntil; 84352196Smckusick caddr_t dpos, cp2; 84452196Smckusick int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 84552196Smckusick int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 84652196Smckusick u_long xid; 84752196Smckusick char *auth_str; 84838414Smckusick 84952196Smckusick nmp = VFSTONFS(vp->v_mount); 85038414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 85141900Smckusick rep->r_nmp = nmp; 85238414Smckusick rep->r_vp = vp; 85341900Smckusick rep->r_procp = procp; 85452196Smckusick rep->r_procnum = procnum; 85552196Smckusick i = 0; 85652196Smckusick m = mrest; 85738414Smckusick while (m) { 85852196Smckusick i += m->m_len; 85938414Smckusick m = m->m_next; 86038414Smckusick } 86152196Smckusick mrest_len = i; 86252196Smckusick 86352196Smckusick /* 86452196Smckusick * Get the RPC header with authorization. 86552196Smckusick */ 86652196Smckusick kerbauth: 86752196Smckusick auth_str = (char *)0; 86852196Smckusick if (nmp->nm_flag & NFSMNT_KERB) { 86952196Smckusick if (failed_auth) { 87052196Smckusick error = nfs_getauth(nmp, rep, cred, &auth_type, 87152196Smckusick &auth_str, &auth_len); 87252196Smckusick if (error) { 87352196Smckusick free((caddr_t)rep, M_NFSREQ); 87452196Smckusick m_freem(mrest); 87552196Smckusick return (error); 87652196Smckusick } 87752196Smckusick } else { 87852196Smckusick auth_type = RPCAUTH_UNIX; 87952196Smckusick auth_len = 5 * NFSX_UNSIGNED; 88045281Smckusick } 88152196Smckusick } else { 88252196Smckusick auth_type = RPCAUTH_UNIX; 883*53426Smckusick if (cred->cr_ngroups < 1) 884*53426Smckusick panic("nfsreq nogrps"); 88552196Smckusick auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 88652196Smckusick nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 88752196Smckusick 5 * NFSX_UNSIGNED; 88845281Smckusick } 88952196Smckusick m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 89052196Smckusick auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 89152196Smckusick if (auth_str) 89252196Smckusick free(auth_str, M_TEMP); 89352196Smckusick 89441900Smckusick /* 89552196Smckusick * For stream protocols, insert a Sun RPC Record Mark. 89641900Smckusick */ 89752196Smckusick if (nmp->nm_sotype == SOCK_STREAM) { 89852196Smckusick M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 89952196Smckusick *mtod(m, u_long *) = htonl(0x80000000 | 90052196Smckusick (m->m_pkthdr.len - NFSX_UNSIGNED)); 90141900Smckusick } 90252196Smckusick rep->r_mreq = m; 90352196Smckusick rep->r_xid = xid; 90452196Smckusick tryagain: 90552196Smckusick if (nmp->nm_flag & NFSMNT_SOFT) 90652196Smckusick rep->r_retry = nmp->nm_retry; 90752196Smckusick else 90852196Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 90952196Smckusick rep->r_rtt = rep->r_rexmit = 0; 91052196Smckusick if (proct[procnum] > 0) 91152196Smckusick rep->r_flags = R_TIMING; 91252196Smckusick else 91352196Smckusick rep->r_flags = 0; 91452196Smckusick rep->r_mrep = NULL; 91538414Smckusick 91640117Smckusick /* 91740117Smckusick * Do the client side RPC. 91840117Smckusick */ 91940117Smckusick nfsstats.rpcrequests++; 92041900Smckusick /* 92141900Smckusick * Chain request into list of outstanding requests. Be sure 92241900Smckusick * to put it LAST so timer finds oldest requests first. 92341900Smckusick */ 92452196Smckusick s = splsoftclock(); 92539344Smckusick reph = &nfsreqh; 92641900Smckusick reph->r_prev->r_next = rep; 92741900Smckusick rep->r_prev = reph->r_prev; 92839344Smckusick reph->r_prev = rep; 92939344Smckusick rep->r_next = reph; 93052196Smckusick 93152196Smckusick /* Get send time for nqnfs */ 93252196Smckusick reqtime = time.tv_sec; 93352196Smckusick 93440117Smckusick /* 93540117Smckusick * If backing off another request or avoiding congestion, don't 93640117Smckusick * send this one now but let timer do it. If not timing a request, 93740117Smckusick * do it now. 93840117Smckusick */ 93952196Smckusick if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 94052196Smckusick (nmp->nm_flag & NFSMNT_DUMBTIMR) || 94152196Smckusick nmp->nm_sent < nmp->nm_cwnd)) { 94240117Smckusick splx(s); 94341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 94452196Smckusick error = nfs_sndlock(&nmp->nm_flag, rep); 94552196Smckusick if (!error) { 94652196Smckusick m = m_copym(m, 0, M_COPYALL, M_WAIT); 94752196Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 94852196Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 94952196Smckusick nfs_sndunlock(&nmp->nm_flag); 95052196Smckusick } 95152196Smckusick if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 95252196Smckusick nmp->nm_sent += NFS_CWNDSCALE; 95352196Smckusick rep->r_flags |= R_SENT; 95452196Smckusick } 95552196Smckusick } else { 95641900Smckusick splx(s); 95752196Smckusick rep->r_rtt = -1; 95852196Smckusick } 95938414Smckusick 96038414Smckusick /* 96140117Smckusick * Wait for the reply from our send or the timer's. 96240117Smckusick */ 96341900Smckusick if (!error) 96452196Smckusick error = nfs_reply(rep); 96538414Smckusick 96640117Smckusick /* 96740117Smckusick * RPC done, unlink the request. 96840117Smckusick */ 96952196Smckusick s = splsoftclock(); 97038414Smckusick rep->r_prev->r_next = rep->r_next; 97139344Smckusick rep->r_next->r_prev = rep->r_prev; 97238414Smckusick splx(s); 97341900Smckusick 97441900Smckusick /* 97541900Smckusick * If there was a successful reply and a tprintf msg. 97641900Smckusick * tprintf a response. 97741900Smckusick */ 97847737Skarels if (!error && (rep->r_flags & R_TPRINTFMSG)) 97947737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 98047737Skarels "is alive again"); 98145281Smckusick mrep = rep->r_mrep; 98252196Smckusick md = rep->r_md; 98352196Smckusick dpos = rep->r_dpos; 98452196Smckusick if (error) { 98552196Smckusick m_freem(rep->r_mreq); 98652196Smckusick free((caddr_t)rep, M_NFSREQ); 98738414Smckusick return (error); 98852196Smckusick } 98938414Smckusick 99038414Smckusick /* 99138414Smckusick * break down the rpc header and check if ok 99238414Smckusick */ 99352196Smckusick nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 99448048Smckusick if (*tl++ == rpc_msgdenied) { 99548048Smckusick if (*tl == rpc_mismatch) 99638414Smckusick error = EOPNOTSUPP; 99752196Smckusick else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 99852196Smckusick if (*tl == rpc_rejectedcred && failed_auth == 0) { 99952196Smckusick failed_auth++; 100052196Smckusick mheadend->m_next = (struct mbuf *)0; 100152196Smckusick m_freem(mrep); 100252196Smckusick m_freem(rep->r_mreq); 100352196Smckusick goto kerbauth; 100452196Smckusick } else 100552196Smckusick error = EAUTH; 100652196Smckusick } else 100738414Smckusick error = EACCES; 100838414Smckusick m_freem(mrep); 100952196Smckusick m_freem(rep->r_mreq); 101052196Smckusick free((caddr_t)rep, M_NFSREQ); 101138414Smckusick return (error); 101238414Smckusick } 101352196Smckusick 101438414Smckusick /* 101538414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 101638414Smckusick * for nfs_reqhead(), but for now just dump it 101738414Smckusick */ 101848048Smckusick if (*++tl != 0) { 101952196Smckusick i = nfsm_rndup(fxdr_unsigned(long, *tl)); 102052196Smckusick nfsm_adv(i); 102138414Smckusick } 102252196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 102338414Smckusick /* 0 == ok */ 102448048Smckusick if (*tl == 0) { 102552196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 102648048Smckusick if (*tl != 0) { 102748048Smckusick error = fxdr_unsigned(int, *tl); 102838414Smckusick m_freem(mrep); 102952196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 103052196Smckusick error == NQNFS_TRYLATER) { 103152196Smckusick error = 0; 103252196Smckusick waituntil = time.tv_sec + trylater_delay; 103352196Smckusick while (time.tv_sec < waituntil) 103452196Smckusick (void) tsleep((caddr_t)&lbolt, 103552196Smckusick PSOCK, "nqnfstry", 0); 103652196Smckusick trylater_delay *= nfs_backoff[trylater_cnt]; 103752196Smckusick if (trylater_cnt < 7) 103852196Smckusick trylater_cnt++; 103952196Smckusick goto tryagain; 104052196Smckusick } 104152196Smckusick m_freem(rep->r_mreq); 104252196Smckusick free((caddr_t)rep, M_NFSREQ); 104338414Smckusick return (error); 104438414Smckusick } 104552196Smckusick 104652196Smckusick /* 104752196Smckusick * For nqnfs, get any lease in reply 104852196Smckusick */ 104952196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 105052196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 105152196Smckusick if (*tl) { 105252196Smckusick np = VTONFS(vp); 105352196Smckusick nqlflag = fxdr_unsigned(int, *tl); 105452196Smckusick nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 105552196Smckusick cachable = fxdr_unsigned(int, *tl++); 105652196Smckusick reqtime += fxdr_unsigned(int, *tl++); 105752196Smckusick if (reqtime > time.tv_sec) { 105852196Smckusick if (np->n_tnext) { 105952196Smckusick if (np->n_tnext == (struct nfsnode *)nmp) 106052196Smckusick nmp->nm_tprev = np->n_tprev; 106152196Smckusick else 106252196Smckusick np->n_tnext->n_tprev = np->n_tprev; 106352196Smckusick if (np->n_tprev == (struct nfsnode *)nmp) 106452196Smckusick nmp->nm_tnext = np->n_tnext; 106552196Smckusick else 106652196Smckusick np->n_tprev->n_tnext = np->n_tnext; 106752196Smckusick if (nqlflag == NQL_WRITE) 106852196Smckusick np->n_flag |= NQNFSWRITE; 106952196Smckusick } else if (nqlflag == NQL_READ) 107052196Smckusick np->n_flag &= ~NQNFSWRITE; 107152196Smckusick else 107252196Smckusick np->n_flag |= NQNFSWRITE; 107352196Smckusick if (cachable) 107452196Smckusick np->n_flag &= ~NQNFSNONCACHE; 107552196Smckusick else 107652196Smckusick np->n_flag |= NQNFSNONCACHE; 107752196Smckusick np->n_expiry = reqtime; 107852196Smckusick fxdr_hyper(tl, &np->n_lrev); 107952196Smckusick tp = nmp->nm_tprev; 108052196Smckusick while (tp != (struct nfsnode *)nmp && 108152196Smckusick tp->n_expiry > np->n_expiry) 108252196Smckusick tp = tp->n_tprev; 108352196Smckusick if (tp == (struct nfsnode *)nmp) { 108452196Smckusick np->n_tnext = nmp->nm_tnext; 108552196Smckusick nmp->nm_tnext = np; 108652196Smckusick } else { 108752196Smckusick np->n_tnext = tp->n_tnext; 108852196Smckusick tp->n_tnext = np; 108952196Smckusick } 109052196Smckusick np->n_tprev = tp; 109152196Smckusick if (np->n_tnext == (struct nfsnode *)nmp) 109252196Smckusick nmp->nm_tprev = np; 109352196Smckusick else 109452196Smckusick np->n_tnext->n_tprev = np; 109552196Smckusick } 109652196Smckusick } 109752196Smckusick } 109838414Smckusick *mrp = mrep; 109938414Smckusick *mdp = md; 110038414Smckusick *dposp = dpos; 110152196Smckusick m_freem(rep->r_mreq); 110252196Smckusick FREE((caddr_t)rep, M_NFSREQ); 110338414Smckusick return (0); 110438414Smckusick } 110538414Smckusick m_freem(mrep); 110652196Smckusick m_freem(rep->r_mreq); 110752196Smckusick free((caddr_t)rep, M_NFSREQ); 110852196Smckusick error = EPROTONOSUPPORT; 110938414Smckusick nfsmout: 111038414Smckusick return (error); 111138414Smckusick } 111238414Smckusick 111338414Smckusick /* 111438414Smckusick * Generate the rpc reply header 111538414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 111638414Smckusick */ 111752196Smckusick nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 111838414Smckusick int siz; 111952196Smckusick struct nfsd *nd; 112038414Smckusick int err; 112152196Smckusick int cache; 112252196Smckusick u_quad_t *frev; 112338414Smckusick struct mbuf **mrq; 112438414Smckusick struct mbuf **mbp; 112538414Smckusick caddr_t *bposp; 112638414Smckusick { 112748048Smckusick register u_long *tl; 112852196Smckusick register struct mbuf *mreq; 112939494Smckusick caddr_t bpos; 113052196Smckusick struct mbuf *mb, *mb2; 113138414Smckusick 113252196Smckusick MGETHDR(mreq, M_WAIT, MT_DATA); 113338414Smckusick mb = mreq; 113452196Smckusick /* 113552196Smckusick * If this is a big reply, use a cluster else 113652196Smckusick * try and leave leading space for the lower level headers. 113752196Smckusick */ 113852196Smckusick siz += RPC_REPLYSIZ; 113952196Smckusick if (siz >= MINCLSIZE) { 114041900Smckusick MCLGET(mreq, M_WAIT); 114152196Smckusick } else 114252196Smckusick mreq->m_data += max_hdr; 114348048Smckusick tl = mtod(mreq, u_long *); 114438414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 114548048Smckusick bpos = ((caddr_t)tl)+mreq->m_len; 114652196Smckusick *tl++ = nd->nd_retxid; 114748048Smckusick *tl++ = rpc_reply; 114852196Smckusick if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 114948048Smckusick *tl++ = rpc_msgdenied; 115052196Smckusick if (err == NQNFS_AUTHERR) { 115152196Smckusick *tl++ = rpc_autherr; 115252196Smckusick *tl = rpc_rejectedcred; 115352196Smckusick mreq->m_len -= NFSX_UNSIGNED; 115452196Smckusick bpos -= NFSX_UNSIGNED; 115552196Smckusick } else { 115652196Smckusick *tl++ = rpc_mismatch; 115752196Smckusick *tl++ = txdr_unsigned(2); 115852196Smckusick *tl = txdr_unsigned(2); 115952196Smckusick } 116038414Smckusick } else { 116148048Smckusick *tl++ = rpc_msgaccepted; 116248048Smckusick *tl++ = 0; 116348048Smckusick *tl++ = 0; 116438414Smckusick switch (err) { 116538414Smckusick case EPROGUNAVAIL: 116648048Smckusick *tl = txdr_unsigned(RPC_PROGUNAVAIL); 116738414Smckusick break; 116838414Smckusick case EPROGMISMATCH: 116948048Smckusick *tl = txdr_unsigned(RPC_PROGMISMATCH); 117048048Smckusick nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 117148048Smckusick *tl++ = txdr_unsigned(2); 117248048Smckusick *tl = txdr_unsigned(2); /* someday 3 */ 117338414Smckusick break; 117438414Smckusick case EPROCUNAVAIL: 117548048Smckusick *tl = txdr_unsigned(RPC_PROCUNAVAIL); 117638414Smckusick break; 117738414Smckusick default: 117848048Smckusick *tl = 0; 117938414Smckusick if (err != VNOVAL) { 118048048Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED); 118152196Smckusick if (err) 118252196Smckusick *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 118352196Smckusick else 118452196Smckusick *tl = 0; 118538414Smckusick } 118638414Smckusick break; 118738414Smckusick }; 118838414Smckusick } 118952196Smckusick 119052196Smckusick /* 119152196Smckusick * For nqnfs, piggyback lease as requested. 119252196Smckusick */ 119352196Smckusick if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 119452196Smckusick if (nd->nd_nqlflag) { 119552196Smckusick nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 119652196Smckusick *tl++ = txdr_unsigned(nd->nd_nqlflag); 119752196Smckusick *tl++ = txdr_unsigned(cache); 119852196Smckusick *tl++ = txdr_unsigned(nd->nd_duration); 119952196Smckusick txdr_hyper(frev, tl); 120052196Smckusick } else { 120152196Smckusick if (nd->nd_nqlflag != 0) 120252196Smckusick panic("nqreph"); 120352196Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED); 120452196Smckusick *tl = 0; 120552196Smckusick } 120652196Smckusick } 120738414Smckusick *mrq = mreq; 120838414Smckusick *mbp = mb; 120938414Smckusick *bposp = bpos; 121038414Smckusick if (err != 0 && err != VNOVAL) 121138414Smckusick nfsstats.srvrpc_errs++; 121238414Smckusick return (0); 121338414Smckusick } 121438414Smckusick 121538414Smckusick /* 121638414Smckusick * Nfs timer routine 121738414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 121838414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 121940117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 122038414Smckusick */ 122138414Smckusick nfs_timer() 122238414Smckusick { 122338414Smckusick register struct nfsreq *rep; 122438414Smckusick register struct mbuf *m; 122538414Smckusick register struct socket *so; 122641900Smckusick register struct nfsmount *nmp; 122752196Smckusick register int timeo; 122852196Smckusick static long lasttime = 0; 122940117Smckusick int s, error; 123038414Smckusick 123138414Smckusick s = splnet(); 123241900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 123341900Smckusick nmp = rep->r_nmp; 123452196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 123541900Smckusick continue; 123652196Smckusick if (nfs_sigintr(nmp, rep, rep->r_procp)) { 123741900Smckusick rep->r_flags |= R_SOFTTERM; 123841900Smckusick continue; 123941900Smckusick } 124052196Smckusick if (rep->r_rtt >= 0) { 124152196Smckusick rep->r_rtt++; 124252196Smckusick if (nmp->nm_flag & NFSMNT_DUMBTIMR) 124352196Smckusick timeo = nmp->nm_timeo; 124452196Smckusick else 124552196Smckusick timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 124652196Smckusick if (nmp->nm_timeouts > 0) 124752196Smckusick timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 124852196Smckusick if (rep->r_rtt <= timeo) 124952196Smckusick continue; 125052196Smckusick if (nmp->nm_timeouts < 8) 125152196Smckusick nmp->nm_timeouts++; 125240117Smckusick } 125341900Smckusick /* 125441900Smckusick * Check for server not responding 125541900Smckusick */ 125641900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 125752196Smckusick rep->r_rexmit > nmp->nm_deadthresh) { 125847737Skarels nfs_msg(rep->r_procp, 125947737Skarels nmp->nm_mountp->mnt_stat.f_mntfromname, 126047737Skarels "not responding"); 126141900Smckusick rep->r_flags |= R_TPRINTFMSG; 126241900Smckusick } 126343351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 126441900Smckusick nfsstats.rpctimeouts++; 126541900Smckusick rep->r_flags |= R_SOFTTERM; 126641900Smckusick continue; 126741900Smckusick } 126852196Smckusick if (nmp->nm_sotype != SOCK_DGRAM) { 126952196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 127052196Smckusick rep->r_rexmit = NFS_MAXREXMIT; 127143351Smckusick continue; 127252196Smckusick } 127352196Smckusick if ((so = nmp->nm_so) == NULL) 127452196Smckusick continue; 127541900Smckusick 127641900Smckusick /* 127741900Smckusick * If there is enough space and the window allows.. 127841900Smckusick * Resend it 127952196Smckusick * Set r_rtt to -1 in case we fail to send it now. 128041900Smckusick */ 128152196Smckusick rep->r_rtt = -1; 128241900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 128352196Smckusick ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 128452196Smckusick (rep->r_flags & R_SENT) || 128552196Smckusick nmp->nm_sent < nmp->nm_cwnd) && 128652196Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 128741900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 128841900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 128952196Smckusick (struct mbuf *)0, (struct mbuf *)0); 129041900Smckusick else 129141900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 129252196Smckusick nmp->nm_nam, (struct mbuf *)0); 129341900Smckusick if (error) { 129441900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 129541900Smckusick so->so_error = 0; 129641900Smckusick } else { 129741900Smckusick /* 129852196Smckusick * Iff first send, start timing 129952196Smckusick * else turn timing off, backoff timer 130052196Smckusick * and divide congestion window by 2. 130141900Smckusick */ 130252196Smckusick if (rep->r_flags & R_SENT) { 130352196Smckusick rep->r_flags &= ~R_TIMING; 130452196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 130552196Smckusick rep->r_rexmit = NFS_MAXREXMIT; 130652196Smckusick nmp->nm_cwnd >>= 1; 130752196Smckusick if (nmp->nm_cwnd < NFS_CWNDSCALE) 130852196Smckusick nmp->nm_cwnd = NFS_CWNDSCALE; 130952196Smckusick nfsstats.rpcretries++; 131052196Smckusick } else { 131152196Smckusick rep->r_flags |= R_SENT; 131252196Smckusick nmp->nm_sent += NFS_CWNDSCALE; 131352196Smckusick } 131452196Smckusick rep->r_rtt = 0; 131541900Smckusick } 131641900Smckusick } 131740117Smckusick } 131852196Smckusick 131952196Smckusick /* 132052196Smckusick * Call the nqnfs server timer once a second to handle leases. 132152196Smckusick */ 132252196Smckusick if (lasttime != time.tv_sec) { 132352196Smckusick lasttime = time.tv_sec; 132452196Smckusick nqnfs_serverd(); 132552196Smckusick } 132640117Smckusick splx(s); 132740117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 132840117Smckusick } 132940117Smckusick 133040117Smckusick /* 133152196Smckusick * Test for a termination condition pending on the process. 133252196Smckusick * This is used for NFSMNT_INT mounts. 133340117Smckusick */ 133452196Smckusick nfs_sigintr(nmp, rep, p) 133552196Smckusick struct nfsmount *nmp; 133652196Smckusick struct nfsreq *rep; 133752196Smckusick register struct proc *p; 133852196Smckusick { 133940117Smckusick 134052196Smckusick if (rep && (rep->r_flags & R_SOFTTERM)) 134152196Smckusick return (EINTR); 134252196Smckusick if (!(nmp->nm_flag & NFSMNT_INT)) 134352196Smckusick return (0); 134452196Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 134552196Smckusick NFSINT_SIGMASK)) 134652196Smckusick return (EINTR); 134752196Smckusick return (0); 134852196Smckusick } 134952196Smckusick 135040117Smckusick /* 135152196Smckusick * Lock a socket against others. 135252196Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 135352196Smckusick * and also to avoid race conditions between the processes with nfs requests 135452196Smckusick * in progress when a reconnect is necessary. 135540117Smckusick */ 135652196Smckusick nfs_sndlock(flagp, rep) 135752196Smckusick register int *flagp; 135852196Smckusick struct nfsreq *rep; 135952196Smckusick { 136052196Smckusick struct proc *p; 136140117Smckusick 136252196Smckusick if (rep) 136352196Smckusick p = rep->r_procp; 136452196Smckusick else 136552196Smckusick p = (struct proc *)0; 136652196Smckusick while (*flagp & NFSMNT_SNDLOCK) { 136752196Smckusick if (nfs_sigintr(rep->r_nmp, rep, p)) 136852196Smckusick return (EINTR); 136952196Smckusick *flagp |= NFSMNT_WANTSND; 137052196Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 137152196Smckusick } 137252196Smckusick *flagp |= NFSMNT_SNDLOCK; 137352196Smckusick return (0); 137452196Smckusick } 137552196Smckusick 137652196Smckusick /* 137752196Smckusick * Unlock the stream socket for others. 137852196Smckusick */ 137952196Smckusick void 138052196Smckusick nfs_sndunlock(flagp) 138152196Smckusick register int *flagp; 138240117Smckusick { 138340117Smckusick 138452196Smckusick if ((*flagp & NFSMNT_SNDLOCK) == 0) 138552196Smckusick panic("nfs sndunlock"); 138652196Smckusick *flagp &= ~NFSMNT_SNDLOCK; 138752196Smckusick if (*flagp & NFSMNT_WANTSND) { 138852196Smckusick *flagp &= ~NFSMNT_WANTSND; 138952196Smckusick wakeup((caddr_t)flagp); 139040117Smckusick } 139152196Smckusick } 139252196Smckusick 139352196Smckusick nfs_rcvlock(rep) 139452196Smckusick register struct nfsreq *rep; 139552196Smckusick { 139652196Smckusick register int *flagp = &rep->r_nmp->nm_flag; 139752196Smckusick 139852196Smckusick while (*flagp & NFSMNT_RCVLOCK) { 139952196Smckusick if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 140052196Smckusick return (EINTR); 140152196Smckusick *flagp |= NFSMNT_WANTRCV; 140252196Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 140340117Smckusick } 140452196Smckusick *flagp |= NFSMNT_RCVLOCK; 140552196Smckusick return (0); 140652196Smckusick } 140740117Smckusick 140852196Smckusick /* 140952196Smckusick * Unlock the stream socket for others. 141052196Smckusick */ 141152196Smckusick void 141252196Smckusick nfs_rcvunlock(flagp) 141352196Smckusick register int *flagp; 141452196Smckusick { 141552196Smckusick 141652196Smckusick if ((*flagp & NFSMNT_RCVLOCK) == 0) 141752196Smckusick panic("nfs rcvunlock"); 141852196Smckusick *flagp &= ~NFSMNT_RCVLOCK; 141952196Smckusick if (*flagp & NFSMNT_WANTRCV) { 142052196Smckusick *flagp &= ~NFSMNT_WANTRCV; 142152196Smckusick wakeup((caddr_t)flagp); 142252196Smckusick } 142352196Smckusick } 142452196Smckusick 142552196Smckusick /* 142652196Smckusick * This function compares two net addresses by family and returns TRUE 142752196Smckusick * if they are the same host. 142852196Smckusick * If there is any doubt, return FALSE. 142952196Smckusick * The AF_INET family is handled as a special case so that address mbufs 143052196Smckusick * don't need to be saved to store "struct in_addr", which is only 4 bytes. 143152196Smckusick */ 143252196Smckusick nfs_netaddr_match(family, haddr, hmask, nam) 143352196Smckusick int family; 143452196Smckusick union nethostaddr *haddr; 143552196Smckusick union nethostaddr *hmask; 143652196Smckusick struct mbuf *nam; 143752196Smckusick { 143852196Smckusick register struct sockaddr_in *inetaddr; 143952196Smckusick #ifdef ISO 144052196Smckusick register struct sockaddr_iso *isoaddr1, *isoaddr2; 144152196Smckusick #endif 144252196Smckusick 144352196Smckusick 144452196Smckusick switch (family) { 144552196Smckusick case AF_INET: 144652196Smckusick inetaddr = mtod(nam, struct sockaddr_in *); 144752196Smckusick if (inetaddr->sin_family != AF_INET) 144852196Smckusick return (0); 144952196Smckusick if (hmask) { 145052196Smckusick if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 145152196Smckusick (haddr->had_inetaddr & hmask->had_inetaddr)) 145252196Smckusick return (1); 145352196Smckusick } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 145452196Smckusick return (1); 145552196Smckusick break; 145652196Smckusick #ifdef ISO 145752196Smckusick case AF_ISO: 145852196Smckusick isoaddr1 = mtod(nam, struct sockaddr_iso *); 145952196Smckusick if (isoaddr1->siso_family != AF_ISO) 146052196Smckusick return (0); 146152196Smckusick isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 146252196Smckusick if (isoaddr1->siso_nlen > 0 && 146352196Smckusick isoaddr1->siso_nlen == isoaddr2->siso_nlen && 146452196Smckusick SAME_ISOADDR(isoaddr1, isoaddr2)) 146552196Smckusick return (1); 146652196Smckusick break; 146752196Smckusick #endif /* ISO */ 146852196Smckusick default: 146952196Smckusick break; 147052196Smckusick }; 147152196Smckusick return (0); 147252196Smckusick } 147352196Smckusick 147452196Smckusick /* 147552196Smckusick * Build hash lists of net addresses and hang them off the mount point. 147652196Smckusick * Called by ufs_mount() to set up the lists of export addresses. 147752196Smckusick */ 147852196Smckusick hang_addrlist(mp, argp) 147952196Smckusick struct mount *mp; 148052196Smckusick struct ufs_args *argp; 148152196Smckusick { 148252196Smckusick register struct netaddrhash *np, **hnp; 148352196Smckusick register int i; 148452196Smckusick struct ufsmount *ump; 148552196Smckusick struct sockaddr *saddr; 148652196Smckusick struct mbuf *nam, *msk = (struct mbuf *)0; 148752196Smckusick union nethostaddr netmsk; 148852196Smckusick int error; 148952196Smckusick 149052196Smckusick if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 149152196Smckusick MT_SONAME)) 149252196Smckusick return (error); 149352196Smckusick saddr = mtod(nam, struct sockaddr *); 149452196Smckusick ump = VFSTOUFS(mp); 149552196Smckusick if (saddr->sa_family == AF_INET && 149652196Smckusick ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 149752196Smckusick m_freem(nam); 149852196Smckusick if (mp->mnt_flag & MNT_DEFEXPORTED) 149952196Smckusick return (EPERM); 150052196Smckusick np = &ump->um_defexported; 150152196Smckusick np->neth_exflags = argp->exflags; 150252196Smckusick np->neth_anon = argp->anon; 150352196Smckusick np->neth_anon.cr_ref = 1; 150452196Smckusick mp->mnt_flag |= MNT_DEFEXPORTED; 150552196Smckusick return (0); 150652196Smckusick } 150752196Smckusick if (argp->msklen > 0) { 150852196Smckusick if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 150952196Smckusick MT_SONAME)) { 151052196Smckusick m_freem(nam); 151152196Smckusick return (error); 151252196Smckusick } 151352196Smckusick 151452196Smckusick /* 151552196Smckusick * Scan all the hash lists to check against duplications. 151652196Smckusick * For the net list, try both masks to catch a subnet 151752196Smckusick * of another network. 151852196Smckusick */ 151952196Smckusick hnp = &ump->um_netaddr[NETMASK_HASH]; 152052196Smckusick np = *hnp; 152152196Smckusick if (saddr->sa_family == AF_INET) 152252196Smckusick netmsk.had_inetaddr = 152352196Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 152452196Smckusick else 152552196Smckusick netmsk.had_nam = msk; 152652196Smckusick while (np) { 152752196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 152852196Smckusick &np->neth_hmask, nam) || 152952196Smckusick nfs_netaddr_match(np->neth_family, &np->neth_haddr, 153052196Smckusick &netmsk, nam)) { 153152196Smckusick m_freem(nam); 153252196Smckusick m_freem(msk); 153352196Smckusick return (EPERM); 153440117Smckusick } 153552196Smckusick np = np->neth_next; 153652196Smckusick } 153752196Smckusick for (i = 0; i < NETHASHSZ; i++) { 153852196Smckusick np = ump->um_netaddr[i]; 153952196Smckusick while (np) { 154052196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 154152196Smckusick &netmsk, nam)) { 154252196Smckusick m_freem(nam); 154352196Smckusick m_freem(msk); 154452196Smckusick return (EPERM); 154552196Smckusick } 154652196Smckusick np = np->neth_next; 154752196Smckusick } 154852196Smckusick } 154952196Smckusick } else { 155052196Smckusick hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 155152196Smckusick np = ump->um_netaddr[NETMASK_HASH]; 155252196Smckusick while (np) { 155352196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 155452196Smckusick &np->neth_hmask, nam)) { 155552196Smckusick m_freem(nam); 155652196Smckusick return (EPERM); 155752196Smckusick } 155852196Smckusick np = np->neth_next; 155952196Smckusick } 156052196Smckusick np = *hnp; 156152196Smckusick while (np) { 156252196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 156352196Smckusick (union nethostaddr *)0, nam)) { 156452196Smckusick m_freem(nam); 156552196Smckusick return (EPERM); 156652196Smckusick } 156752196Smckusick np = np->neth_next; 156852196Smckusick } 156940117Smckusick } 157052196Smckusick np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 157152196Smckusick M_WAITOK); 157252196Smckusick np->neth_family = saddr->sa_family; 157352196Smckusick if (saddr->sa_family == AF_INET) { 157452196Smckusick np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 157552196Smckusick m_freem(nam); 157652196Smckusick if (msk) { 157752196Smckusick np->neth_inetmask = netmsk.had_inetaddr; 157852196Smckusick m_freem(msk); 157952196Smckusick if (np->neth_inetaddr &~ np->neth_inetmask) 158052196Smckusick return (EPERM); 158152196Smckusick } else 158252196Smckusick np->neth_inetmask = 0xffffffff; 158352196Smckusick } else { 158452196Smckusick np->neth_nam = nam; 158552196Smckusick np->neth_msk = msk; 158652196Smckusick } 158752196Smckusick np->neth_exflags = argp->exflags; 158852196Smckusick np->neth_anon = argp->anon; 158952196Smckusick np->neth_anon.cr_ref = 1; 159052196Smckusick np->neth_next = *hnp; 159152196Smckusick *hnp = np; 159252196Smckusick return (0); 159340117Smckusick } 159440117Smckusick 159552196Smckusick /* 159652196Smckusick * Free the net address hash lists that are hanging off the mount points. 159752196Smckusick */ 159852196Smckusick free_addrlist(ump) 159952196Smckusick struct ufsmount *ump; 160040117Smckusick { 160152196Smckusick register struct netaddrhash *np, *onp; 160252196Smckusick register int i; 160340117Smckusick 160452196Smckusick for (i = 0; i <= NETHASHSZ; i++) { 160552196Smckusick np = ump->um_netaddr[i]; 160652196Smckusick ump->um_netaddr[i] = (struct netaddrhash *)0; 160752196Smckusick while (np) { 160852196Smckusick onp = np; 160952196Smckusick np = np->neth_next; 161052196Smckusick if (onp->neth_family != AF_INET) { 161152196Smckusick m_freem(onp->neth_nam); 161252196Smckusick m_freem(onp->neth_msk); 161338414Smckusick } 161452196Smckusick free((caddr_t)onp, M_NETADDR); 161538414Smckusick } 161638414Smckusick } 161738414Smckusick } 161838414Smckusick 161938414Smckusick /* 162052196Smckusick * Generate a hash code for an iso host address. Used by NETADDRHASH() for 162152196Smckusick * iso addresses. 162238414Smckusick */ 162352196Smckusick iso_addrhash(saddr) 162452196Smckusick struct sockaddr *saddr; 162541900Smckusick { 162652196Smckusick #ifdef ISO 162752196Smckusick register struct sockaddr_iso *siso; 162852196Smckusick register int i, sum; 162952196Smckusick 163052196Smckusick sum = 0; 163152196Smckusick for (i = 0; i < siso->siso_nlen; i++) 163252196Smckusick sum += siso->siso_data[i]; 163352196Smckusick return (sum & (NETHASHSZ - 1)); 163452196Smckusick #else 163552196Smckusick return (0); 163652196Smckusick #endif /* ISO */ 163741900Smckusick } 163840117Smckusick 163952196Smckusick /* 164052196Smckusick * Check for badly aligned mbuf data areas and 164152196Smckusick * realign data in an mbuf list by copying the data areas up, as required. 164252196Smckusick */ 164352196Smckusick void 164452196Smckusick nfs_realign(m, hsiz) 164552196Smckusick register struct mbuf *m; 164652196Smckusick int hsiz; 164747737Skarels { 164852196Smckusick register struct mbuf *m2; 164952196Smckusick register int siz, mlen, olen; 165052196Smckusick register caddr_t tcp, fcp; 165152196Smckusick struct mbuf *mnew; 165247737Skarels 165352196Smckusick while (m) { 165452196Smckusick /* 165552196Smckusick * This never happens for UDP, rarely happens for TCP 165652196Smckusick * but frequently happens for iso transport. 165752196Smckusick */ 165852196Smckusick if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 165952196Smckusick olen = m->m_len; 166052196Smckusick fcp = mtod(m, caddr_t); 166152196Smckusick m->m_flags &= ~M_PKTHDR; 166252196Smckusick if (m->m_flags & M_EXT) 166352196Smckusick m->m_data = m->m_ext.ext_buf; 166452196Smckusick else 166552196Smckusick m->m_data = m->m_dat; 166652196Smckusick m->m_len = 0; 166752196Smckusick tcp = mtod(m, caddr_t); 166852196Smckusick mnew = m; 166952196Smckusick m2 = m->m_next; 167052196Smckusick 167152196Smckusick /* 167252196Smckusick * If possible, only put the first invariant part 167352196Smckusick * of the RPC header in the first mbuf. 167452196Smckusick */ 167552196Smckusick if (olen <= hsiz) 167652196Smckusick mlen = hsiz; 167752196Smckusick else 167852196Smckusick mlen = M_TRAILINGSPACE(m); 167952196Smckusick 168052196Smckusick /* 168152196Smckusick * Loop through the mbuf list consolidating data. 168252196Smckusick */ 168352196Smckusick while (m) { 168452196Smckusick while (olen > 0) { 168552196Smckusick if (mlen == 0) { 168652196Smckusick m2->m_flags &= ~M_PKTHDR; 168752196Smckusick if (m2->m_flags & M_EXT) 168852196Smckusick m2->m_data = m2->m_ext.ext_buf; 168952196Smckusick else 169052196Smckusick m2->m_data = m2->m_dat; 169152196Smckusick m2->m_len = 0; 169252196Smckusick mlen = M_TRAILINGSPACE(m2); 169352196Smckusick tcp = mtod(m2, caddr_t); 169452196Smckusick mnew = m2; 169552196Smckusick m2 = m2->m_next; 169652196Smckusick } 169752196Smckusick siz = MIN(mlen, olen); 169852196Smckusick if (tcp != fcp) 169952196Smckusick bcopy(fcp, tcp, siz); 170052196Smckusick mnew->m_len += siz; 170152196Smckusick mlen -= siz; 170252196Smckusick olen -= siz; 170352196Smckusick tcp += siz; 170452196Smckusick fcp += siz; 170552196Smckusick } 170652196Smckusick m = m->m_next; 170752196Smckusick if (m) { 170852196Smckusick olen = m->m_len; 170952196Smckusick fcp = mtod(m, caddr_t); 171052196Smckusick } 171152196Smckusick } 171252196Smckusick 171352196Smckusick /* 171452196Smckusick * Finally, set m_len == 0 for any trailing mbufs that have 171552196Smckusick * been copied out of. 171652196Smckusick */ 171752196Smckusick while (m2) { 171852196Smckusick m2->m_len = 0; 171952196Smckusick m2 = m2->m_next; 172052196Smckusick } 172152196Smckusick return; 172252196Smckusick } 172352196Smckusick m = m->m_next; 172452196Smckusick } 172547737Skarels } 172647737Skarels 172741900Smckusick /* 172852196Smckusick * Socket upcall routine for the nfsd sockets. 172952196Smckusick * The caddr_t arg is a pointer to the "struct nfssvc_sock". 173052196Smckusick * Essentially do as much as possible non-blocking, else punt and it will 173152196Smckusick * be called with M_WAIT from an nfsd. 173241900Smckusick */ 173352196Smckusick void 173452196Smckusick nfsrv_rcv(so, arg, waitflag) 173552196Smckusick struct socket *so; 173652196Smckusick caddr_t arg; 173752196Smckusick int waitflag; 173838414Smckusick { 173952196Smckusick register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 174052196Smckusick register struct mbuf *m; 174152196Smckusick struct mbuf *mp, *nam; 174252196Smckusick struct uio auio; 174352196Smckusick int flags, error; 174440117Smckusick 174552903Smckusick if ((slp->ns_flag & SLP_VALID) == 0) 174652903Smckusick return; 174752903Smckusick #ifdef notdef 174852903Smckusick /* 174952903Smckusick * Define this to test for nfsds handling this under heavy load. 175052903Smckusick */ 175152903Smckusick if (waitflag == M_DONTWAIT) { 175252903Smckusick slp->ns_flag |= SLP_NEEDQ; goto dorecs; 175352903Smckusick } 175452903Smckusick #endif 175552932Smckusick auio.uio_procp = NULL; 175652196Smckusick if (so->so_type == SOCK_STREAM) { 175752196Smckusick /* 175852196Smckusick * If there are already records on the queue, defer soreceive() 175952196Smckusick * to an nfsd so that there is feedback to the TCP layer that 176052196Smckusick * the nfs servers are heavily loaded. 176152196Smckusick */ 176252196Smckusick if (slp->ns_rec && waitflag == M_DONTWAIT) { 176352196Smckusick slp->ns_flag |= SLP_NEEDQ; 176452903Smckusick goto dorecs; 176552196Smckusick } 176652196Smckusick 176752196Smckusick /* 176852196Smckusick * Do soreceive(). 176952196Smckusick */ 177052196Smckusick auio.uio_resid = 1000000000; 177152196Smckusick flags = MSG_DONTWAIT; 177252196Smckusick error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 177352196Smckusick if (error || mp == (struct mbuf *)0) { 177452903Smckusick if (error == EWOULDBLOCK) 177552903Smckusick slp->ns_flag |= SLP_NEEDQ; 177652903Smckusick else 177752196Smckusick slp->ns_flag |= SLP_DISCONN; 177852196Smckusick goto dorecs; 177952196Smckusick } 178052196Smckusick m = mp; 178152196Smckusick if (slp->ns_rawend) { 178252196Smckusick slp->ns_rawend->m_next = m; 178352196Smckusick slp->ns_cc += 1000000000 - auio.uio_resid; 178452196Smckusick } else { 178552196Smckusick slp->ns_raw = m; 178652196Smckusick slp->ns_cc = 1000000000 - auio.uio_resid; 178752196Smckusick } 178852196Smckusick while (m->m_next) 178952196Smckusick m = m->m_next; 179052196Smckusick slp->ns_rawend = m; 179152196Smckusick 179252196Smckusick /* 179352196Smckusick * Now try and parse record(s) out of the raw stream data. 179452196Smckusick */ 179552196Smckusick if (error = nfsrv_getstream(slp, waitflag)) { 179652196Smckusick if (error == EPERM) 179752196Smckusick slp->ns_flag |= SLP_DISCONN; 179852903Smckusick else 179952196Smckusick slp->ns_flag |= SLP_NEEDQ; 180052196Smckusick } 180152196Smckusick } else { 180252196Smckusick do { 180352196Smckusick auio.uio_resid = 1000000000; 180452196Smckusick flags = MSG_DONTWAIT; 180552196Smckusick error = soreceive(so, &nam, &auio, &mp, 180652196Smckusick (struct mbuf **)0, &flags); 180752196Smckusick if (mp) { 180852196Smckusick nfs_realign(mp, 10 * NFSX_UNSIGNED); 180952196Smckusick if (nam) { 181052196Smckusick m = nam; 181152196Smckusick m->m_next = mp; 181252196Smckusick } else 181352196Smckusick m = mp; 181452196Smckusick if (slp->ns_recend) 181552196Smckusick slp->ns_recend->m_nextpkt = m; 181652196Smckusick else 181752196Smckusick slp->ns_rec = m; 181852196Smckusick slp->ns_recend = m; 181952196Smckusick m->m_nextpkt = (struct mbuf *)0; 182052196Smckusick } 182152196Smckusick if (error) { 182252196Smckusick if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 182352196Smckusick && error != EWOULDBLOCK) { 182452196Smckusick slp->ns_flag |= SLP_DISCONN; 182552903Smckusick goto dorecs; 182652196Smckusick } 182752196Smckusick } 182852196Smckusick } while (mp); 182940117Smckusick } 183052196Smckusick 183152196Smckusick /* 183252196Smckusick * Now try and process the request records, non-blocking. 183352196Smckusick */ 183452196Smckusick dorecs: 183552903Smckusick if (waitflag == M_DONTWAIT && 183652903Smckusick (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 183752196Smckusick nfsrv_wakenfsd(slp); 183841900Smckusick } 183940117Smckusick 184041900Smckusick /* 184152196Smckusick * Try and extract an RPC request from the mbuf data list received on a 184252196Smckusick * stream socket. The "waitflag" argument indicates whether or not it 184352196Smckusick * can sleep. 184441900Smckusick */ 184552196Smckusick nfsrv_getstream(slp, waitflag) 184652196Smckusick register struct nfssvc_sock *slp; 184752196Smckusick int waitflag; 184841900Smckusick { 184952196Smckusick register struct mbuf *m; 185052196Smckusick register char *cp1, *cp2; 185152196Smckusick register int len; 185252196Smckusick struct mbuf *om, *m2, *recm; 185352196Smckusick u_long recmark; 185441900Smckusick 185552196Smckusick if (slp->ns_flag & SLP_GETSTREAM) 185652196Smckusick panic("nfs getstream"); 185752196Smckusick slp->ns_flag |= SLP_GETSTREAM; 185852196Smckusick for (;;) { 185952196Smckusick if (slp->ns_reclen == 0) { 186052196Smckusick if (slp->ns_cc < NFSX_UNSIGNED) { 186152196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 186252196Smckusick return (0); 186352196Smckusick } 186452196Smckusick m = slp->ns_raw; 186552196Smckusick if (m->m_len >= NFSX_UNSIGNED) { 186652196Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 186752196Smckusick m->m_data += NFSX_UNSIGNED; 186852196Smckusick m->m_len -= NFSX_UNSIGNED; 186952196Smckusick } else { 187052196Smckusick cp1 = (caddr_t)&recmark; 187152196Smckusick cp2 = mtod(m, caddr_t); 187252196Smckusick while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 187352196Smckusick while (m->m_len == 0) { 187452196Smckusick m = m->m_next; 187552196Smckusick cp2 = mtod(m, caddr_t); 187652196Smckusick } 187752196Smckusick *cp1++ = *cp2++; 187852196Smckusick m->m_data++; 187952196Smckusick m->m_len--; 188052196Smckusick } 188152196Smckusick } 188252196Smckusick slp->ns_cc -= NFSX_UNSIGNED; 188352196Smckusick slp->ns_reclen = ntohl(recmark) & ~0x80000000; 188452196Smckusick if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 188552196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 188652196Smckusick return (EPERM); 188752196Smckusick } 188852196Smckusick } 188952196Smckusick 189052196Smckusick /* 189152196Smckusick * Now get the record part. 189252196Smckusick */ 189352196Smckusick if (slp->ns_cc == slp->ns_reclen) { 189452196Smckusick recm = slp->ns_raw; 189552196Smckusick slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 189652196Smckusick slp->ns_cc = slp->ns_reclen = 0; 189752196Smckusick } else if (slp->ns_cc > slp->ns_reclen) { 189852196Smckusick len = 0; 189952196Smckusick m = slp->ns_raw; 190052196Smckusick om = (struct mbuf *)0; 190152196Smckusick while (len < slp->ns_reclen) { 190252196Smckusick if ((len + m->m_len) > slp->ns_reclen) { 190352196Smckusick m2 = m_copym(m, 0, slp->ns_reclen - len, 190452196Smckusick waitflag); 190552196Smckusick if (m2) { 190652196Smckusick if (om) { 190752196Smckusick om->m_next = m2; 190852196Smckusick recm = slp->ns_raw; 190952196Smckusick } else 191052196Smckusick recm = m2; 191152196Smckusick m->m_data += slp->ns_reclen - len; 191252196Smckusick m->m_len -= slp->ns_reclen - len; 191352196Smckusick len = slp->ns_reclen; 191452196Smckusick } else { 191552196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 191652196Smckusick return (EWOULDBLOCK); 191752196Smckusick } 191852196Smckusick } else if ((len + m->m_len) == slp->ns_reclen) { 191952196Smckusick om = m; 192052196Smckusick len += m->m_len; 192152196Smckusick m = m->m_next; 192252196Smckusick recm = slp->ns_raw; 192352196Smckusick om->m_next = (struct mbuf *)0; 192452196Smckusick } else { 192552196Smckusick om = m; 192652196Smckusick len += m->m_len; 192752196Smckusick m = m->m_next; 192852196Smckusick } 192952196Smckusick } 193052196Smckusick slp->ns_raw = m; 193152196Smckusick slp->ns_cc -= len; 193252196Smckusick slp->ns_reclen = 0; 193352196Smckusick } else { 193452196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 193552196Smckusick return (0); 193652196Smckusick } 193752196Smckusick nfs_realign(recm, 10 * NFSX_UNSIGNED); 193852196Smckusick if (slp->ns_recend) 193952196Smckusick slp->ns_recend->m_nextpkt = recm; 194052196Smckusick else 194152196Smckusick slp->ns_rec = recm; 194252196Smckusick slp->ns_recend = recm; 194340117Smckusick } 194438414Smckusick } 194541900Smckusick 194641900Smckusick /* 194752196Smckusick * Parse an RPC header. 194841900Smckusick */ 194952196Smckusick nfsrv_dorec(slp, nd) 195052196Smckusick register struct nfssvc_sock *slp; 195152196Smckusick register struct nfsd *nd; 195241900Smckusick { 195352196Smckusick register struct mbuf *m; 195452196Smckusick int error; 195541900Smckusick 195652903Smckusick if ((slp->ns_flag & SLP_VALID) == 0 || 195752196Smckusick (m = slp->ns_rec) == (struct mbuf *)0) 195852196Smckusick return (ENOBUFS); 195952196Smckusick if (slp->ns_rec = m->m_nextpkt) 196052196Smckusick m->m_nextpkt = (struct mbuf *)0; 196152196Smckusick else 196252196Smckusick slp->ns_recend = (struct mbuf *)0; 196352196Smckusick if (m->m_type == MT_SONAME) { 196452196Smckusick nd->nd_nam = m; 196552196Smckusick nd->nd_md = nd->nd_mrep = m->m_next; 196652196Smckusick m->m_next = (struct mbuf *)0; 196752196Smckusick } else { 196852196Smckusick nd->nd_nam = (struct mbuf *)0; 196952196Smckusick nd->nd_md = nd->nd_mrep = m; 197052196Smckusick } 197152196Smckusick nd->nd_dpos = mtod(nd->nd_md, caddr_t); 197252196Smckusick if (error = nfs_getreq(nd, TRUE)) { 197352196Smckusick m_freem(nd->nd_nam); 197452196Smckusick return (error); 197552196Smckusick } 197652196Smckusick return (0); 197752196Smckusick } 197852196Smckusick 197952196Smckusick /* 198052196Smckusick * Parse an RPC request 198152196Smckusick * - verify it 198252196Smckusick * - fill in the cred struct. 198352196Smckusick */ 198452196Smckusick nfs_getreq(nd, has_header) 198552196Smckusick register struct nfsd *nd; 198652196Smckusick int has_header; 198752196Smckusick { 198852196Smckusick register int len, i; 198952196Smckusick register u_long *tl; 199052196Smckusick register long t1; 199152196Smckusick struct uio uio; 199252196Smckusick struct iovec iov; 199352196Smckusick caddr_t dpos, cp2; 199452196Smckusick u_long nfsvers, auth_type; 199552196Smckusick int error = 0, nqnfs = 0; 199652196Smckusick struct mbuf *mrep, *md; 199752196Smckusick 199852196Smckusick mrep = nd->nd_mrep; 199952196Smckusick md = nd->nd_md; 200052196Smckusick dpos = nd->nd_dpos; 200152196Smckusick if (has_header) { 200252196Smckusick nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 200352196Smckusick nd->nd_retxid = *tl++; 200452196Smckusick if (*tl++ != rpc_call) { 200552196Smckusick m_freem(mrep); 200652196Smckusick return (EBADRPC); 200752196Smckusick } 200852196Smckusick } else { 200952196Smckusick nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 201052196Smckusick } 201152196Smckusick nd->nd_repstat = 0; 201252196Smckusick if (*tl++ != rpc_vers) { 201352196Smckusick nd->nd_repstat = ERPCMISMATCH; 201452196Smckusick nd->nd_procnum = NFSPROC_NOOP; 201541900Smckusick return (0); 201652196Smckusick } 201752196Smckusick nfsvers = nfs_vers; 201852196Smckusick if (*tl != nfs_prog) { 201952196Smckusick if (*tl == nqnfs_prog) { 202052196Smckusick nqnfs++; 202152196Smckusick nfsvers = nqnfs_vers; 202252196Smckusick } else { 202352196Smckusick nd->nd_repstat = EPROGUNAVAIL; 202452196Smckusick nd->nd_procnum = NFSPROC_NOOP; 202552196Smckusick return (0); 202652196Smckusick } 202752196Smckusick } 202852196Smckusick tl++; 202952196Smckusick if (*tl++ != nfsvers) { 203052196Smckusick nd->nd_repstat = EPROGMISMATCH; 203152196Smckusick nd->nd_procnum = NFSPROC_NOOP; 203252196Smckusick return (0); 203352196Smckusick } 203452196Smckusick nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 203552196Smckusick if (nd->nd_procnum == NFSPROC_NULL) 203652196Smckusick return (0); 203752196Smckusick if (nd->nd_procnum >= NFS_NPROCS || 203852196Smckusick (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 203952196Smckusick (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 204052196Smckusick nd->nd_repstat = EPROCUNAVAIL; 204152196Smckusick nd->nd_procnum = NFSPROC_NOOP; 204252196Smckusick return (0); 204352196Smckusick } 204452196Smckusick auth_type = *tl++; 204552196Smckusick len = fxdr_unsigned(int, *tl++); 204652196Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 204752196Smckusick m_freem(mrep); 204852196Smckusick return (EBADRPC); 204952196Smckusick } 205041900Smckusick 205141900Smckusick /* 205252196Smckusick * Handle auth_unix or auth_kerb. 205341900Smckusick */ 205452196Smckusick if (auth_type == rpc_auth_unix) { 205552196Smckusick len = fxdr_unsigned(int, *++tl); 205652196Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 205752196Smckusick m_freem(mrep); 205852196Smckusick return (EBADRPC); 205952196Smckusick } 206052196Smckusick nfsm_adv(nfsm_rndup(len)); 206152196Smckusick nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 206252196Smckusick nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 206352196Smckusick nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 206452196Smckusick len = fxdr_unsigned(int, *tl); 206552196Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 206652196Smckusick m_freem(mrep); 206752196Smckusick return (EBADRPC); 206852196Smckusick } 206952196Smckusick nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 207052196Smckusick for (i = 1; i <= len; i++) 207152196Smckusick if (i < NGROUPS) 207252196Smckusick nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 207352196Smckusick else 207452196Smckusick tl++; 207552196Smckusick nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 207652196Smckusick } else if (auth_type == rpc_auth_kerb) { 207752196Smckusick nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 207852196Smckusick nd->nd_authlen = fxdr_unsigned(int, *tl); 207952196Smckusick iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 208052196Smckusick if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 208152196Smckusick m_freem(mrep); 208252196Smckusick return (EBADRPC); 208352196Smckusick } 208452196Smckusick uio.uio_offset = 0; 208552196Smckusick uio.uio_iov = &iov; 208652196Smckusick uio.uio_iovcnt = 1; 208752196Smckusick uio.uio_segflg = UIO_SYSSPACE; 208852196Smckusick iov.iov_base = (caddr_t)nd->nd_authstr; 208952196Smckusick nfsm_mtouio(&uio, uio.uio_resid); 209052196Smckusick nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 209152196Smckusick nd->nd_flag |= NFSD_NEEDAUTH; 209252196Smckusick } 209352196Smckusick 209452196Smckusick /* 209552196Smckusick * Do we have any use for the verifier. 209652196Smckusick * According to the "Remote Procedure Call Protocol Spec." it 209752196Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 209852196Smckusick * For now, just skip over it 209952196Smckusick */ 210052196Smckusick len = fxdr_unsigned(int, *++tl); 210152196Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 210252196Smckusick m_freem(mrep); 210352196Smckusick return (EBADRPC); 210452196Smckusick } 210552196Smckusick if (len > 0) { 210652196Smckusick nfsm_adv(nfsm_rndup(len)); 210752196Smckusick } 210852196Smckusick 210952196Smckusick /* 211052196Smckusick * For nqnfs, get piggybacked lease request. 211152196Smckusick */ 211252196Smckusick if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 211352196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 211452196Smckusick nd->nd_nqlflag = fxdr_unsigned(int, *tl); 211552196Smckusick if (nd->nd_nqlflag) { 211652196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 211752196Smckusick nd->nd_duration = fxdr_unsigned(int, *tl); 211852196Smckusick } else 211952196Smckusick nd->nd_duration = NQ_MINLEASE; 212052196Smckusick } else { 212152196Smckusick nd->nd_nqlflag = NQL_NOVAL; 212252196Smckusick nd->nd_duration = NQ_MINLEASE; 212352196Smckusick } 212452196Smckusick nd->nd_md = md; 212552196Smckusick nd->nd_dpos = dpos; 212641900Smckusick return (0); 212752196Smckusick nfsmout: 212852196Smckusick return (error); 212941900Smckusick } 213041900Smckusick 213141900Smckusick /* 213252196Smckusick * Search for a sleeping nfsd and wake it up. 213352196Smckusick * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 213452196Smckusick * running nfsds will go look for the work in the nfssvc_sock list. 213541900Smckusick */ 213652196Smckusick void 213752196Smckusick nfsrv_wakenfsd(slp) 213852196Smckusick struct nfssvc_sock *slp; 213941900Smckusick { 214052196Smckusick register struct nfsd *nd = nfsd_head.nd_next; 214152196Smckusick 214252903Smckusick if ((slp->ns_flag & SLP_VALID) == 0) 214352903Smckusick return; 214452196Smckusick while (nd != (struct nfsd *)&nfsd_head) { 214552196Smckusick if (nd->nd_flag & NFSD_WAITING) { 214652196Smckusick nd->nd_flag &= ~NFSD_WAITING; 214752196Smckusick if (nd->nd_slp) 214852196Smckusick panic("nfsd wakeup"); 214952978Smckusick slp->ns_sref++; 215052196Smckusick nd->nd_slp = slp; 215152196Smckusick wakeup((caddr_t)nd); 215252196Smckusick return; 215352196Smckusick } 215452196Smckusick nd = nd->nd_next; 215552196Smckusick } 215652903Smckusick slp->ns_flag |= SLP_DOREC; 215752196Smckusick nfsd_head.nd_flag |= NFSD_CHECKSLP; 215841900Smckusick } 215952196Smckusick 216052196Smckusick nfs_msg(p, server, msg) 216152196Smckusick struct proc *p; 216252196Smckusick char *server, *msg; 216352196Smckusick { 216452196Smckusick tpr_t tpr; 216552196Smckusick 216652196Smckusick if (p) 216752196Smckusick tpr = tprintf_open(p); 216852196Smckusick else 216952196Smckusick tpr = NULL; 217052196Smckusick tprintf(tpr, "nfs server %s: %s\n", server, msg); 217152196Smckusick tprintf_close(tpr); 217252196Smckusick } 2173