138414Smckusick /* 247574Skarels * Copyright (c) 1989, 1991 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*52934Smckusick * @(#)nfs_socket.c 7.27 (Berkeley) 03/13/92 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1752196Smckusick #include "types.h" 1838414Smckusick #include "param.h" 1952196Smckusick #include "uio.h" 2040117Smckusick #include "proc.h" 2152196Smckusick #include "signal.h" 2238414Smckusick #include "mount.h" 2338414Smckusick #include "kernel.h" 2438414Smckusick #include "malloc.h" 2538414Smckusick #include "mbuf.h" 2638414Smckusick #include "vnode.h" 2738414Smckusick #include "domain.h" 2838414Smckusick #include "protosw.h" 2938414Smckusick #include "socket.h" 3038414Smckusick #include "socketvar.h" 3147574Skarels #include "syslog.h" 3247737Skarels #include "tprintf.h" 3352196Smckusick #include "machine/endian.h" 3452196Smckusick #include "netinet/in.h" 3552196Smckusick #include "netinet/tcp.h" 3652196Smckusick #ifdef ISO 3752196Smckusick #include "netiso/iso.h" 3852196Smckusick #endif 3952196Smckusick #include "ufs/ufs/quota.h" 4052196Smckusick #include "ufs/ufs/ufsmount.h" 4138414Smckusick #include "rpcv2.h" 4238414Smckusick #include "nfsv2.h" 4338414Smckusick #include "nfs.h" 4438414Smckusick #include "xdr_subs.h" 4538414Smckusick #include "nfsm_subs.h" 4638414Smckusick #include "nfsmount.h" 4752196Smckusick #include "nfsnode.h" 4852196Smckusick #include "nfsrtt.h" 4952196Smckusick #include "nqnfs.h" 5038414Smckusick 5138414Smckusick #define TRUE 1 5243351Smckusick #define FALSE 0 5338414Smckusick 5452196Smckusick int netnetnet = sizeof (struct netaddrhash); 5540117Smckusick /* 5652196Smckusick * Estimate rto for an nfs rpc sent via. an unreliable datagram. 5752196Smckusick * Use the mean and mean deviation of rtt for the appropriate type of rpc 5852196Smckusick * for the frequent rpcs and a default for the others. 5952196Smckusick * The justification for doing "other" this way is that these rpcs 6052196Smckusick * happen so infrequently that timer est. would probably be stale. 6152196Smckusick * Also, since many of these rpcs are 6252196Smckusick * non-idempotent, a conservative timeout is desired. 6352196Smckusick * getattr, lookup - A+2D 6452196Smckusick * read, write - A+4D 6552196Smckusick * other - nm_timeo 6652196Smckusick */ 6752196Smckusick #define NFS_RTO(n, t) \ 6852196Smckusick ((t) == 0 ? (n)->nm_timeo : \ 6952196Smckusick ((t) < 3 ? \ 7052196Smckusick (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 7152196Smckusick ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 7252196Smckusick #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 7352196Smckusick #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 7452196Smckusick /* 7538414Smckusick * External data, mostly RPC constants in XDR form 7638414Smckusick */ 7738414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 7852196Smckusick rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 7952196Smckusick rpc_auth_kerb; 8052196Smckusick extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 8152196Smckusick extern time_t nqnfsstarttime; 8241900Smckusick extern int nonidempotent[NFS_NPROCS]; 8352196Smckusick 8452196Smckusick /* 8552196Smckusick * Maps errno values to nfs error numbers. 8652196Smckusick * Use NFSERR_IO as the catch all for ones not specifically defined in 8752196Smckusick * RFC 1094. 8852196Smckusick */ 8952196Smckusick static int nfsrv_errmap[ELAST] = { 9052196Smckusick NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9152196Smckusick NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9252196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 9352196Smckusick NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 9452196Smckusick NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9552196Smckusick NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 9652196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9752196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9852196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 9952196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10052196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10152196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10252196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 10352196Smckusick NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 10452196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10552196Smckusick NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 10652196Smckusick NFSERR_IO, 10745281Smckusick }; 10852196Smckusick 10952196Smckusick /* 11052196Smckusick * Defines which timer to use for the procnum. 11152196Smckusick * 0 - default 11252196Smckusick * 1 - getattr 11352196Smckusick * 2 - lookup 11452196Smckusick * 3 - read 11552196Smckusick * 4 - write 11652196Smckusick */ 11752196Smckusick static int proct[NFS_NPROCS] = { 11852196Smckusick 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 11952196Smckusick }; 12052196Smckusick 12152196Smckusick /* 12252196Smckusick * There is a congestion window for outstanding rpcs maintained per mount 12352196Smckusick * point. The cwnd size is adjusted in roughly the way that: 12452196Smckusick * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 12552196Smckusick * SIGCOMM '88". ACM, August 1988. 12652196Smckusick * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 12752196Smckusick * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 12852196Smckusick * of rpcs is in progress. 12952196Smckusick * (The sent count and cwnd are scaled for integer arith.) 13052196Smckusick * Variants of "slow start" were tried and were found to be too much of a 13152196Smckusick * performance hit (ave. rtt 3 times larger), 13252196Smckusick * I suspect due to the large rtt that nfs rpcs have. 13352196Smckusick */ 13452196Smckusick #define NFS_CWNDSCALE 256 13552196Smckusick #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 13652196Smckusick static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 13741900Smckusick int nfs_sbwait(); 13852196Smckusick void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 13952196Smckusick void nfs_rcvunlock(), nqnfs_serverd(); 14052196Smckusick struct mbuf *nfsm_rpchead(); 14152196Smckusick int nfsrtton = 0; 14252196Smckusick struct nfsrtt nfsrtt; 14352196Smckusick struct nfsd nfsd_head; 14441900Smckusick 14538414Smckusick int nfsrv_null(), 14638414Smckusick nfsrv_getattr(), 14738414Smckusick nfsrv_setattr(), 14838414Smckusick nfsrv_lookup(), 14938414Smckusick nfsrv_readlink(), 15038414Smckusick nfsrv_read(), 15138414Smckusick nfsrv_write(), 15238414Smckusick nfsrv_create(), 15338414Smckusick nfsrv_remove(), 15438414Smckusick nfsrv_rename(), 15538414Smckusick nfsrv_link(), 15638414Smckusick nfsrv_symlink(), 15738414Smckusick nfsrv_mkdir(), 15838414Smckusick nfsrv_rmdir(), 15938414Smckusick nfsrv_readdir(), 16038414Smckusick nfsrv_statfs(), 16152196Smckusick nfsrv_noop(), 16252196Smckusick nqnfsrv_readdirlook(), 16352196Smckusick nqnfsrv_getlease(), 16452196Smckusick nqnfsrv_vacated(); 16538414Smckusick 16638414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 16738414Smckusick nfsrv_null, 16838414Smckusick nfsrv_getattr, 16938414Smckusick nfsrv_setattr, 17038414Smckusick nfsrv_noop, 17138414Smckusick nfsrv_lookup, 17238414Smckusick nfsrv_readlink, 17338414Smckusick nfsrv_read, 17438414Smckusick nfsrv_noop, 17538414Smckusick nfsrv_write, 17638414Smckusick nfsrv_create, 17738414Smckusick nfsrv_remove, 17838414Smckusick nfsrv_rename, 17938414Smckusick nfsrv_link, 18038414Smckusick nfsrv_symlink, 18138414Smckusick nfsrv_mkdir, 18238414Smckusick nfsrv_rmdir, 18338414Smckusick nfsrv_readdir, 18438414Smckusick nfsrv_statfs, 18552196Smckusick nqnfsrv_readdirlook, 18652196Smckusick nqnfsrv_getlease, 18752196Smckusick nqnfsrv_vacated, 18838414Smckusick }; 18938414Smckusick 19040117Smckusick struct nfsreq nfsreqh; 19138414Smckusick 19238414Smckusick /* 19341900Smckusick * Initialize sockets and congestion for a new NFS connection. 19440117Smckusick * We do not free the sockaddr if error. 19538414Smckusick */ 19652196Smckusick nfs_connect(nmp, rep) 19740117Smckusick register struct nfsmount *nmp; 19852196Smckusick struct nfsreq *rep; 19940117Smckusick { 20041900Smckusick register struct socket *so; 20152196Smckusick int s, error, rcvreserve, sndreserve; 20240117Smckusick struct mbuf *m; 20340117Smckusick 20441900Smckusick nmp->nm_so = (struct socket *)0; 20541900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 20641900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 20740117Smckusick goto bad; 20841900Smckusick so = nmp->nm_so; 20941900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 21040117Smckusick 21141900Smckusick /* 21241900Smckusick * Protocols that do not require connections may be optionally left 21341900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 21441900Smckusick */ 21541900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 21641900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 21741900Smckusick error = ENOTCONN; 21840117Smckusick goto bad; 21940117Smckusick } 22041900Smckusick } else { 22141900Smckusick if (error = soconnect(so, nmp->nm_nam)) 22240117Smckusick goto bad; 22341900Smckusick 22441900Smckusick /* 22541900Smckusick * Wait for the connection to complete. Cribbed from the 22652196Smckusick * connect system call but with the wait timing out so 22752196Smckusick * that interruptible mounts don't hang here for a long time. 22841900Smckusick */ 22941900Smckusick s = splnet(); 23052196Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 23152196Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 23252196Smckusick "nfscon", 2 * hz); 23352196Smckusick if ((so->so_state & SS_ISCONNECTING) && 23452196Smckusick so->so_error == 0 && rep && 23552196Smckusick (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 23652196Smckusick so->so_state &= ~SS_ISCONNECTING; 23752196Smckusick splx(s); 23852196Smckusick goto bad; 23952196Smckusick } 24052196Smckusick } 24141900Smckusick if (so->so_error) { 24241900Smckusick error = so->so_error; 24352196Smckusick so->so_error = 0; 24452196Smckusick splx(s); 24541900Smckusick goto bad; 24641900Smckusick } 24752196Smckusick splx(s); 24840117Smckusick } 24952196Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 25052196Smckusick so->so_rcv.sb_timeo = (5 * hz); 25152196Smckusick so->so_snd.sb_timeo = (5 * hz); 25252196Smckusick } else { 25352196Smckusick so->so_rcv.sb_timeo = 0; 25452196Smckusick so->so_snd.sb_timeo = 0; 25552196Smckusick } 25641900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 25752196Smckusick sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 25852196Smckusick rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 25952196Smckusick } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 26052196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 26152196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 26241900Smckusick } else { 26352196Smckusick if (nmp->nm_sotype != SOCK_STREAM) 26452196Smckusick panic("nfscon sotype"); 26541900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 26641900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 26741900Smckusick *mtod(m, int *) = 1; 26841900Smckusick m->m_len = sizeof(int); 26941900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 27041900Smckusick } 27152196Smckusick if (so->so_proto->pr_protocol == IPPROTO_TCP) { 27241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 27341900Smckusick *mtod(m, int *) = 1; 27441900Smckusick m->m_len = sizeof(int); 27541900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 27641900Smckusick } 27752196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 27852196Smckusick * 2; 27952196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 28052196Smckusick * 2; 28141900Smckusick } 28252196Smckusick if (error = soreserve(so, sndreserve, rcvreserve)) 28352196Smckusick goto bad; 28441900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 28541900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 28640117Smckusick 28741900Smckusick /* Initialize other non-zero congestion variables */ 28852196Smckusick nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 28952196Smckusick nmp->nm_srtt[4] = (NFS_TIMEO << 3); 29052196Smckusick nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 29152196Smckusick nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 29252196Smckusick nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 29341900Smckusick nmp->nm_sent = 0; 29452196Smckusick nmp->nm_timeouts = 0; 29541900Smckusick return (0); 29640117Smckusick 29741900Smckusick bad: 29841900Smckusick nfs_disconnect(nmp); 29941900Smckusick return (error); 30041900Smckusick } 30140117Smckusick 30241900Smckusick /* 30341900Smckusick * Reconnect routine: 30441900Smckusick * Called when a connection is broken on a reliable protocol. 30541900Smckusick * - clean up the old socket 30641900Smckusick * - nfs_connect() again 30741900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 30841900Smckusick * If this fails the mount point is DEAD! 30952196Smckusick * nb: Must be called with the nfs_sndlock() set on the mount point. 31041900Smckusick */ 31152196Smckusick nfs_reconnect(rep) 31241900Smckusick register struct nfsreq *rep; 31341900Smckusick { 31441900Smckusick register struct nfsreq *rp; 31552196Smckusick register struct nfsmount *nmp = rep->r_nmp; 31641900Smckusick int error; 31740117Smckusick 31852196Smckusick nfs_disconnect(nmp); 31952196Smckusick while (error = nfs_connect(nmp, rep)) { 32052196Smckusick if (error == EINTR || error == ERESTART) 32141900Smckusick return (EINTR); 32243351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 32340117Smckusick } 32441900Smckusick 32541900Smckusick /* 32641900Smckusick * Loop through outstanding request list and fix up all requests 32741900Smckusick * on old socket. 32841900Smckusick */ 32941900Smckusick rp = nfsreqh.r_next; 33041900Smckusick while (rp != &nfsreqh) { 33141900Smckusick if (rp->r_nmp == nmp) 33241900Smckusick rp->r_flags |= R_MUSTRESEND; 33341900Smckusick rp = rp->r_next; 33440117Smckusick } 33540117Smckusick return (0); 33640117Smckusick } 33740117Smckusick 33840117Smckusick /* 33940117Smckusick * NFS disconnect. Clean up and unlink. 34040117Smckusick */ 34141900Smckusick void 34240117Smckusick nfs_disconnect(nmp) 34340117Smckusick register struct nfsmount *nmp; 34440117Smckusick { 34541900Smckusick register struct socket *so; 34640117Smckusick 34741900Smckusick if (nmp->nm_so) { 34841900Smckusick so = nmp->nm_so; 34941900Smckusick nmp->nm_so = (struct socket *)0; 35041900Smckusick soshutdown(so, 2); 35141900Smckusick soclose(so); 35240117Smckusick } 35340117Smckusick } 35440117Smckusick 35540117Smckusick /* 35641900Smckusick * This is the nfs send routine. For connection based socket types, it 35752196Smckusick * must be called with an nfs_sndlock() on the socket. 35841900Smckusick * "rep == NULL" indicates that it has been called from a server. 35952196Smckusick * For the client side: 36052196Smckusick * - return EINTR if the RPC is terminated, 0 otherwise 36152196Smckusick * - set R_MUSTRESEND if the send fails for any reason 36252196Smckusick * - do any cleanup required by recoverable socket errors (???) 36352196Smckusick * For the server side: 36452196Smckusick * - return EINTR or ERESTART if interrupted by a signal 36552196Smckusick * - return EPIPE if a connection is lost for connection based sockets (TCP...) 36652196Smckusick * - do any cleanup required by recoverable socket errors (???) 36740117Smckusick */ 36841900Smckusick nfs_send(so, nam, top, rep) 36938414Smckusick register struct socket *so; 37038414Smckusick struct mbuf *nam; 37141900Smckusick register struct mbuf *top; 37241900Smckusick struct nfsreq *rep; 37338414Smckusick { 37441900Smckusick struct mbuf *sendnam; 37552196Smckusick int error, soflags, flags; 37638414Smckusick 37741900Smckusick if (rep) { 37841900Smckusick if (rep->r_flags & R_SOFTTERM) { 37940117Smckusick m_freem(top); 38041900Smckusick return (EINTR); 38140117Smckusick } 38252196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) { 38352196Smckusick rep->r_flags |= R_MUSTRESEND; 38452196Smckusick m_freem(top); 38552196Smckusick return (0); 38652196Smckusick } 38741900Smckusick rep->r_flags &= ~R_MUSTRESEND; 38841900Smckusick soflags = rep->r_nmp->nm_soflags; 38941900Smckusick } else 39041900Smckusick soflags = so->so_proto->pr_flags; 39141900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 39241900Smckusick sendnam = (struct mbuf *)0; 39341900Smckusick else 39441900Smckusick sendnam = nam; 39552196Smckusick if (so->so_type == SOCK_SEQPACKET) 39652196Smckusick flags = MSG_EOR; 39752196Smckusick else 39852196Smckusick flags = 0; 39941900Smckusick 40041900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 40152196Smckusick (struct mbuf *)0, flags); 40252196Smckusick if (error) { 40352196Smckusick if (rep) { 404*52934Smckusick log(LOG_INFO, "nfs send error %d for server %s\n",error, 405*52934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 40652196Smckusick /* 40752196Smckusick * Deal with errors for the client side. 40852196Smckusick */ 40952196Smckusick if (rep->r_flags & R_SOFTTERM) 41052196Smckusick error = EINTR; 41152196Smckusick else 41252196Smckusick rep->r_flags |= R_MUSTRESEND; 413*52934Smckusick } else 414*52934Smckusick log(LOG_INFO, "nfsd send error %d\n", error); 41552196Smckusick 41652196Smckusick /* 41752196Smckusick * Handle any recoverable (soft) socket errors here. (???) 41852196Smckusick */ 41952196Smckusick if (error != EINTR && error != ERESTART && 42052196Smckusick error != EWOULDBLOCK && error != EPIPE) 42141900Smckusick error = 0; 42238414Smckusick } 42338414Smckusick return (error); 42438414Smckusick } 42538414Smckusick 42638414Smckusick /* 42741900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 42841900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 42941900Smckusick * Mark and consolidate the data into a new mbuf list. 43041900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 43141900Smckusick * small mbufs. 43241900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 43341900Smckusick * we have read any of it, even if the system call has been interrupted. 43438414Smckusick */ 43552196Smckusick nfs_receive(rep, aname, mp) 43652196Smckusick register struct nfsreq *rep; 43738414Smckusick struct mbuf **aname; 43838414Smckusick struct mbuf **mp; 43938414Smckusick { 44052196Smckusick register struct socket *so; 44141900Smckusick struct uio auio; 44241900Smckusick struct iovec aio; 44338414Smckusick register struct mbuf *m; 44452196Smckusick struct mbuf *control; 44541900Smckusick u_long len; 44641900Smckusick struct mbuf **getnam; 44752196Smckusick int error, sotype, rcvflg; 44852932Smckusick struct proc *p = curproc; /* XXX */ 44938414Smckusick 45041900Smckusick /* 45141900Smckusick * Set up arguments for soreceive() 45241900Smckusick */ 45341900Smckusick *mp = (struct mbuf *)0; 45441900Smckusick *aname = (struct mbuf *)0; 45552196Smckusick sotype = rep->r_nmp->nm_sotype; 45638414Smckusick 45741900Smckusick /* 45841900Smckusick * For reliable protocols, lock against other senders/receivers 45941900Smckusick * in case a reconnect is necessary. 46041900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 46141900Smckusick * more there is to get. 46241900Smckusick * We must lock the socket against other receivers 46341900Smckusick * until we have an entire rpc request/reply. 46441900Smckusick */ 46552196Smckusick if (sotype != SOCK_DGRAM) { 46652196Smckusick if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 46752196Smckusick return (error); 46841900Smckusick tryagain: 46941900Smckusick /* 47041900Smckusick * Check for fatal errors and resending request. 47141900Smckusick */ 47252196Smckusick /* 47352196Smckusick * Ugh: If a reconnect attempt just happened, nm_so 47452196Smckusick * would have changed. NULL indicates a failed 47552196Smckusick * attempt that has essentially shut down this 47652196Smckusick * mount point. 47752196Smckusick */ 47852196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 47952196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 48052196Smckusick return (EINTR); 48152196Smckusick } 48252196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) { 48352196Smckusick if (error = nfs_reconnect(rep)) { 48452196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 48552196Smckusick return (error); 48640117Smckusick } 48752196Smckusick goto tryagain; 48841900Smckusick } 48952196Smckusick while (rep->r_flags & R_MUSTRESEND) { 49052196Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 49152196Smckusick nfsstats.rpcretries++; 49252196Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 49352196Smckusick if (error == EINTR || error == ERESTART || 49452196Smckusick (error = nfs_reconnect(rep))) { 49552196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 49652196Smckusick return (error); 49752196Smckusick } 49852196Smckusick goto tryagain; 49952196Smckusick } 50052196Smckusick } 50152196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag); 50252196Smckusick if (sotype == SOCK_STREAM) { 50341900Smckusick aio.iov_base = (caddr_t) &len; 50441900Smckusick aio.iov_len = sizeof(u_long); 50541900Smckusick auio.uio_iov = &aio; 50641900Smckusick auio.uio_iovcnt = 1; 50741900Smckusick auio.uio_segflg = UIO_SYSSPACE; 50841900Smckusick auio.uio_rw = UIO_READ; 50941900Smckusick auio.uio_offset = 0; 51041900Smckusick auio.uio_resid = sizeof(u_long); 51152932Smckusick auio.uio_procp = p; 51241900Smckusick do { 51352196Smckusick rcvflg = MSG_WAITALL; 51452196Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 51541900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 51652196Smckusick if (error == EWOULDBLOCK && rep) { 51741900Smckusick if (rep->r_flags & R_SOFTTERM) 51841900Smckusick return (EINTR); 51952196Smckusick } 52041900Smckusick } while (error == EWOULDBLOCK); 52147737Skarels if (!error && auio.uio_resid > 0) { 522*52934Smckusick log(LOG_INFO, 523*52934Smckusick "short receive (%d/%d) from nfs server %s\n", 524*52934Smckusick sizeof(u_long) - auio.uio_resid, 525*52934Smckusick sizeof(u_long), 52647737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 52747737Skarels error = EPIPE; 52847737Skarels } 52940761Skarels if (error) 53041900Smckusick goto errout; 53141900Smckusick len = ntohl(len) & ~0x80000000; 53241900Smckusick /* 53341900Smckusick * This is SERIOUS! We are out of sync with the sender 53441900Smckusick * and forcing a disconnect/reconnect is all I can do. 53541900Smckusick */ 53641900Smckusick if (len > NFS_MAXPACKET) { 537*52934Smckusick log(LOG_ERR, "%s (%d) from nfs server %s\n", 538*52934Smckusick "impossible packet length", 539*52934Smckusick len, 540*52934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 54147737Skarels error = EFBIG; 54247737Skarels goto errout; 54341900Smckusick } 54441900Smckusick auio.uio_resid = len; 54541900Smckusick do { 54647737Skarels rcvflg = MSG_WAITALL; 54741900Smckusick error = soreceive(so, (struct mbuf **)0, 54841900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 54941900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 55041900Smckusick error == ERESTART); 55147737Skarels if (!error && auio.uio_resid > 0) { 552*52934Smckusick log(LOG_INFO, 553*52934Smckusick "short receive (%d/%d) from nfs server %s\n", 554*52934Smckusick len - auio.uio_resid, len, 555*52934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 55647737Skarels error = EPIPE; 55747737Skarels } 55840117Smckusick } else { 55952196Smckusick /* 56052196Smckusick * NB: Since uio_resid is big, MSG_WAITALL is ignored 56152196Smckusick * and soreceive() will return when it has either a 56252196Smckusick * control msg or a data msg. 56352196Smckusick * We have no use for control msg., but must grab them 56452196Smckusick * and then throw them away so we know what is going 56552196Smckusick * on. 56652196Smckusick */ 56752196Smckusick auio.uio_resid = len = 100000000; /* Anything Big */ 56852932Smckusick auio.uio_procp = p; 56941900Smckusick do { 57047737Skarels rcvflg = 0; 57141900Smckusick error = soreceive(so, (struct mbuf **)0, 57252196Smckusick &auio, mp, &control, &rcvflg); 57352196Smckusick if (control) 57452196Smckusick m_freem(control); 57541900Smckusick if (error == EWOULDBLOCK && rep) { 57641900Smckusick if (rep->r_flags & R_SOFTTERM) 57741900Smckusick return (EINTR); 57841900Smckusick } 57952196Smckusick } while (error == EWOULDBLOCK || 58052196Smckusick (!error && *mp == NULL && control)); 58152196Smckusick if ((rcvflg & MSG_EOR) == 0) 58252196Smckusick printf("Egad!!\n"); 58341900Smckusick if (!error && *mp == NULL) 58441900Smckusick error = EPIPE; 58541900Smckusick len -= auio.uio_resid; 58640117Smckusick } 58741900Smckusick errout: 58852196Smckusick if (error && error != EINTR && error != ERESTART) { 58941900Smckusick m_freem(*mp); 59041900Smckusick *mp = (struct mbuf *)0; 591*52934Smckusick if (error != EPIPE) 59247737Skarels log(LOG_INFO, 59347737Skarels "receive error %d from nfs server %s\n", 59447737Skarels error, 59547737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 59652196Smckusick error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 59741900Smckusick if (!error) 59852196Smckusick error = nfs_reconnect(rep); 59952196Smckusick if (!error) 60041900Smckusick goto tryagain; 60140117Smckusick } 60241900Smckusick } else { 60352196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) 60452196Smckusick return (EACCES); 60541900Smckusick if (so->so_state & SS_ISCONNECTED) 60641900Smckusick getnam = (struct mbuf **)0; 60741900Smckusick else 60841900Smckusick getnam = aname; 60941900Smckusick auio.uio_resid = len = 1000000; 61052932Smckusick auio.uio_procp = p; 61141900Smckusick do { 61247737Skarels rcvflg = 0; 61341900Smckusick error = soreceive(so, getnam, &auio, mp, 61441900Smckusick (struct mbuf **)0, &rcvflg); 61552196Smckusick if (error == EWOULDBLOCK && 61641900Smckusick (rep->r_flags & R_SOFTTERM)) 61741900Smckusick return (EINTR); 61841900Smckusick } while (error == EWOULDBLOCK); 61941900Smckusick len -= auio.uio_resid; 62041900Smckusick } 62141900Smckusick if (error) { 62241900Smckusick m_freem(*mp); 62341900Smckusick *mp = (struct mbuf *)0; 62441900Smckusick } 62541900Smckusick /* 62652196Smckusick * Search for any mbufs that are not a multiple of 4 bytes long 62752196Smckusick * or with m_data not longword aligned. 62841900Smckusick * These could cause pointer alignment problems, so copy them to 62941900Smckusick * well aligned mbufs. 63041900Smckusick */ 63152196Smckusick nfs_realign(*mp, 5 * NFSX_UNSIGNED); 63238414Smckusick return (error); 63338414Smckusick } 63438414Smckusick 63538414Smckusick /* 63641900Smckusick * Implement receipt of reply on a socket. 63738414Smckusick * We must search through the list of received datagrams matching them 63838414Smckusick * with outstanding requests using the xid, until ours is found. 63938414Smckusick */ 64041900Smckusick /* ARGSUSED */ 64152196Smckusick nfs_reply(myrep) 64239344Smckusick struct nfsreq *myrep; 64338414Smckusick { 64438414Smckusick register struct nfsreq *rep; 64552196Smckusick register struct nfsmount *nmp = myrep->r_nmp; 64652196Smckusick register long t1; 64752196Smckusick struct mbuf *mrep, *nam, *md; 64852196Smckusick u_long rxid, *tl; 64952196Smckusick caddr_t dpos, cp2; 65052196Smckusick int error; 65138414Smckusick 65241900Smckusick /* 65341900Smckusick * Loop around until we get our own reply 65441900Smckusick */ 65541900Smckusick for (;;) { 65641900Smckusick /* 65741900Smckusick * Lock against other receivers so that I don't get stuck in 65841900Smckusick * sbwait() after someone else has received my reply for me. 65941900Smckusick * Also necessary for connection based protocols to avoid 66041900Smckusick * race conditions during a reconnect. 66141900Smckusick */ 66252196Smckusick if (error = nfs_rcvlock(myrep)) 66352196Smckusick return (error); 66441900Smckusick /* Already received, bye bye */ 66541900Smckusick if (myrep->r_mrep != NULL) { 66652196Smckusick nfs_rcvunlock(&nmp->nm_flag); 66741900Smckusick return (0); 66840117Smckusick } 66941900Smckusick /* 67041900Smckusick * Get the next Rpc reply off the socket 67141900Smckusick */ 67252196Smckusick error = nfs_receive(myrep, &nam, &mrep); 67352196Smckusick nfs_rcvunlock(&nmp->nm_flag); 67452196Smckusick if (error) printf("rcv err=%d\n",error); 67552196Smckusick if (error) { 67638414Smckusick 67741900Smckusick /* 67841900Smckusick * Ignore routing errors on connectionless protocols?? 67941900Smckusick */ 68041900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 68141900Smckusick nmp->nm_so->so_error = 0; 68241900Smckusick continue; 68341900Smckusick } 68441900Smckusick return (error); 68538414Smckusick } 68652196Smckusick if (nam) 68752196Smckusick m_freem(nam); 68841900Smckusick 68941900Smckusick /* 69041900Smckusick * Get the xid and check that it is an rpc reply 69141900Smckusick */ 69252196Smckusick md = mrep; 69352196Smckusick dpos = mtod(md, caddr_t); 69452196Smckusick nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 69552196Smckusick rxid = *tl++; 69652196Smckusick if (*tl != rpc_reply) { 69752196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 69852196Smckusick if (nqnfs_callback(nmp, mrep, md, dpos)) 69952196Smckusick nfsstats.rpcinvalid++; 70052196Smckusick } else { 70152196Smckusick nfsstats.rpcinvalid++; 70252196Smckusick m_freem(mrep); 70352196Smckusick } 70452196Smckusick nfsmout: 70541900Smckusick continue; 70638414Smckusick } 70752196Smckusick 70841900Smckusick /* 70941900Smckusick * Loop through the request list to match up the reply 71041900Smckusick * Iff no match, just drop the datagram 71141900Smckusick */ 71241900Smckusick rep = nfsreqh.r_next; 71341900Smckusick while (rep != &nfsreqh) { 71445281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 71541900Smckusick /* Found it.. */ 71652196Smckusick rep->r_mrep = mrep; 71752196Smckusick rep->r_md = md; 71852196Smckusick rep->r_dpos = dpos; 71952196Smckusick if (nfsrtton) { 72052196Smckusick struct rttl *rt; 72152196Smckusick 72252196Smckusick rt = &nfsrtt.rttl[nfsrtt.pos]; 72352196Smckusick rt->proc = rep->r_procnum; 72452196Smckusick rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 72552196Smckusick rt->sent = nmp->nm_sent; 72652196Smckusick rt->cwnd = nmp->nm_cwnd; 72752196Smckusick rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 72852196Smckusick rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 72952196Smckusick rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 73052196Smckusick rt->tstamp = time; 73152196Smckusick if (rep->r_flags & R_TIMING) 73252196Smckusick rt->rtt = rep->r_rtt; 73352196Smckusick else 73452196Smckusick rt->rtt = 1000000; 73552196Smckusick nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 73652196Smckusick } 73741900Smckusick /* 73852196Smckusick * Update congestion window. 73952196Smckusick * Do the additive increase of 74052196Smckusick * one rpc/rtt. 74141900Smckusick */ 74252196Smckusick if (nmp->nm_cwnd <= nmp->nm_sent) { 74352196Smckusick nmp->nm_cwnd += 74452196Smckusick (NFS_CWNDSCALE * NFS_CWNDSCALE + 74552196Smckusick (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 74652196Smckusick if (nmp->nm_cwnd > NFS_MAXCWND) 74752196Smckusick nmp->nm_cwnd = NFS_MAXCWND; 74852196Smckusick } 74952196Smckusick nmp->nm_sent -= NFS_CWNDSCALE; 75052196Smckusick /* 75152196Smckusick * Update rtt using a gain of 0.125 on the mean 75252196Smckusick * and a gain of 0.25 on the deviation. 75352196Smckusick */ 75441900Smckusick if (rep->r_flags & R_TIMING) { 75552196Smckusick /* 75652196Smckusick * Since the timer resolution of 75752196Smckusick * NFS_HZ is so course, it can often 75852196Smckusick * result in r_rtt == 0. Since 75952196Smckusick * r_rtt == N means that the actual 76052196Smckusick * rtt is between N+dt and N+2-dt ticks, 76152196Smckusick * add 1. 76252196Smckusick */ 76352196Smckusick t1 = rep->r_rtt + 1; 76452196Smckusick t1 -= (NFS_SRTT(rep) >> 3); 76552196Smckusick NFS_SRTT(rep) += t1; 76652196Smckusick if (t1 < 0) 76752196Smckusick t1 = -t1; 76852196Smckusick t1 -= (NFS_SDRTT(rep) >> 2); 76952196Smckusick NFS_SDRTT(rep) += t1; 77041900Smckusick } 77152196Smckusick nmp->nm_timeouts = 0; 77240117Smckusick break; 77338414Smckusick } 77441900Smckusick rep = rep->r_next; 77538414Smckusick } 77641900Smckusick /* 77741900Smckusick * If not matched to a request, drop it. 77841900Smckusick * If it's mine, get out. 77941900Smckusick */ 78041900Smckusick if (rep == &nfsreqh) { 78141900Smckusick nfsstats.rpcunexpected++; 78252196Smckusick m_freem(mrep); 78341900Smckusick } else if (rep == myrep) 78441900Smckusick return (0); 78538414Smckusick } 78638414Smckusick } 78738414Smckusick 78838414Smckusick /* 78938414Smckusick * nfs_request - goes something like this 79038414Smckusick * - fill in request struct 79138414Smckusick * - links it into list 79241900Smckusick * - calls nfs_send() for first transmit 79341900Smckusick * - calls nfs_receive() to get reply 79438414Smckusick * - break down rpc header and return with nfs reply pointed to 79538414Smckusick * by mrep or error 79638414Smckusick * nb: always frees up mreq mbuf list 79738414Smckusick */ 79852196Smckusick nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 79938414Smckusick struct vnode *vp; 80052196Smckusick struct mbuf *mrest; 80141900Smckusick int procnum; 80241900Smckusick struct proc *procp; 80352196Smckusick struct ucred *cred; 80438414Smckusick struct mbuf **mrp; 80538414Smckusick struct mbuf **mdp; 80638414Smckusick caddr_t *dposp; 80738414Smckusick { 80838414Smckusick register struct mbuf *m, *mrep; 80938414Smckusick register struct nfsreq *rep; 81048048Smckusick register u_long *tl; 81152196Smckusick register int i; 81241900Smckusick struct nfsmount *nmp; 81352196Smckusick struct mbuf *md, *mheadend; 81439344Smckusick struct nfsreq *reph; 81552196Smckusick struct nfsnode *tp, *np; 81652196Smckusick time_t reqtime, waituntil; 81752196Smckusick caddr_t dpos, cp2; 81852196Smckusick int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 81952196Smckusick int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 82052196Smckusick u_long xid; 82152196Smckusick char *auth_str; 82238414Smckusick 82352196Smckusick nmp = VFSTONFS(vp->v_mount); 82438414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 82541900Smckusick rep->r_nmp = nmp; 82638414Smckusick rep->r_vp = vp; 82741900Smckusick rep->r_procp = procp; 82852196Smckusick rep->r_procnum = procnum; 82952196Smckusick i = 0; 83052196Smckusick m = mrest; 83138414Smckusick while (m) { 83252196Smckusick i += m->m_len; 83338414Smckusick m = m->m_next; 83438414Smckusick } 83552196Smckusick mrest_len = i; 83652196Smckusick 83752196Smckusick /* 83852196Smckusick * Get the RPC header with authorization. 83952196Smckusick */ 84052196Smckusick kerbauth: 84152196Smckusick auth_str = (char *)0; 84252196Smckusick if (nmp->nm_flag & NFSMNT_KERB) { 84352196Smckusick if (failed_auth) { 84452196Smckusick error = nfs_getauth(nmp, rep, cred, &auth_type, 84552196Smckusick &auth_str, &auth_len); 84652196Smckusick if (error) { 84752196Smckusick free((caddr_t)rep, M_NFSREQ); 84852196Smckusick m_freem(mrest); 84952196Smckusick return (error); 85052196Smckusick } 85152196Smckusick } else { 85252196Smckusick auth_type = RPCAUTH_UNIX; 85352196Smckusick auth_len = 5 * NFSX_UNSIGNED; 85445281Smckusick } 85552196Smckusick } else { 85652196Smckusick auth_type = RPCAUTH_UNIX; 85752196Smckusick auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 85852196Smckusick nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 85952196Smckusick 5 * NFSX_UNSIGNED; 86045281Smckusick } 86152196Smckusick m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 86252196Smckusick auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 86352196Smckusick if (auth_str) 86452196Smckusick free(auth_str, M_TEMP); 86552196Smckusick 86641900Smckusick /* 86752196Smckusick * For stream protocols, insert a Sun RPC Record Mark. 86841900Smckusick */ 86952196Smckusick if (nmp->nm_sotype == SOCK_STREAM) { 87052196Smckusick M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 87152196Smckusick *mtod(m, u_long *) = htonl(0x80000000 | 87252196Smckusick (m->m_pkthdr.len - NFSX_UNSIGNED)); 87341900Smckusick } 87452196Smckusick rep->r_mreq = m; 87552196Smckusick rep->r_xid = xid; 87652196Smckusick tryagain: 87752196Smckusick if (nmp->nm_flag & NFSMNT_SOFT) 87852196Smckusick rep->r_retry = nmp->nm_retry; 87952196Smckusick else 88052196Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 88152196Smckusick rep->r_rtt = rep->r_rexmit = 0; 88252196Smckusick if (proct[procnum] > 0) 88352196Smckusick rep->r_flags = R_TIMING; 88452196Smckusick else 88552196Smckusick rep->r_flags = 0; 88652196Smckusick rep->r_mrep = NULL; 88738414Smckusick 88840117Smckusick /* 88940117Smckusick * Do the client side RPC. 89040117Smckusick */ 89140117Smckusick nfsstats.rpcrequests++; 89241900Smckusick /* 89341900Smckusick * Chain request into list of outstanding requests. Be sure 89441900Smckusick * to put it LAST so timer finds oldest requests first. 89541900Smckusick */ 89652196Smckusick s = splsoftclock(); 89739344Smckusick reph = &nfsreqh; 89841900Smckusick reph->r_prev->r_next = rep; 89941900Smckusick rep->r_prev = reph->r_prev; 90039344Smckusick reph->r_prev = rep; 90139344Smckusick rep->r_next = reph; 90252196Smckusick 90352196Smckusick /* Get send time for nqnfs */ 90452196Smckusick reqtime = time.tv_sec; 90552196Smckusick 90640117Smckusick /* 90740117Smckusick * If backing off another request or avoiding congestion, don't 90840117Smckusick * send this one now but let timer do it. If not timing a request, 90940117Smckusick * do it now. 91040117Smckusick */ 91152196Smckusick if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 91252196Smckusick (nmp->nm_flag & NFSMNT_DUMBTIMR) || 91352196Smckusick nmp->nm_sent < nmp->nm_cwnd)) { 91440117Smckusick splx(s); 91541900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 91652196Smckusick error = nfs_sndlock(&nmp->nm_flag, rep); 91752196Smckusick if (!error) { 91852196Smckusick m = m_copym(m, 0, M_COPYALL, M_WAIT); 91952196Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 92052196Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 92152196Smckusick nfs_sndunlock(&nmp->nm_flag); 92252196Smckusick } 92352196Smckusick if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 92452196Smckusick nmp->nm_sent += NFS_CWNDSCALE; 92552196Smckusick rep->r_flags |= R_SENT; 92652196Smckusick } 92752196Smckusick } else { 92841900Smckusick splx(s); 92952196Smckusick rep->r_rtt = -1; 93052196Smckusick } 93138414Smckusick 93238414Smckusick /* 93340117Smckusick * Wait for the reply from our send or the timer's. 93440117Smckusick */ 93541900Smckusick if (!error) 93652196Smckusick error = nfs_reply(rep); 93738414Smckusick 93840117Smckusick /* 93940117Smckusick * RPC done, unlink the request. 94040117Smckusick */ 94152196Smckusick s = splsoftclock(); 94238414Smckusick rep->r_prev->r_next = rep->r_next; 94339344Smckusick rep->r_next->r_prev = rep->r_prev; 94438414Smckusick splx(s); 94541900Smckusick 94641900Smckusick /* 94741900Smckusick * If there was a successful reply and a tprintf msg. 94841900Smckusick * tprintf a response. 94941900Smckusick */ 95047737Skarels if (!error && (rep->r_flags & R_TPRINTFMSG)) 95147737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 95247737Skarels "is alive again"); 95345281Smckusick mrep = rep->r_mrep; 95452196Smckusick md = rep->r_md; 95552196Smckusick dpos = rep->r_dpos; 95652196Smckusick if (error) { 95752196Smckusick m_freem(rep->r_mreq); 95852196Smckusick free((caddr_t)rep, M_NFSREQ); 95938414Smckusick return (error); 96052196Smckusick } 96138414Smckusick 96238414Smckusick /* 96338414Smckusick * break down the rpc header and check if ok 96438414Smckusick */ 96552196Smckusick nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 96648048Smckusick if (*tl++ == rpc_msgdenied) { 96748048Smckusick if (*tl == rpc_mismatch) 96838414Smckusick error = EOPNOTSUPP; 96952196Smckusick else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 97052196Smckusick if (*tl == rpc_rejectedcred && failed_auth == 0) { 97152196Smckusick failed_auth++; 97252196Smckusick mheadend->m_next = (struct mbuf *)0; 97352196Smckusick m_freem(mrep); 97452196Smckusick m_freem(rep->r_mreq); 97552196Smckusick goto kerbauth; 97652196Smckusick } else 97752196Smckusick error = EAUTH; 97852196Smckusick } else 97938414Smckusick error = EACCES; 98038414Smckusick m_freem(mrep); 98152196Smckusick m_freem(rep->r_mreq); 98252196Smckusick free((caddr_t)rep, M_NFSREQ); 98338414Smckusick return (error); 98438414Smckusick } 98552196Smckusick 98638414Smckusick /* 98738414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 98838414Smckusick * for nfs_reqhead(), but for now just dump it 98938414Smckusick */ 99048048Smckusick if (*++tl != 0) { 99152196Smckusick i = nfsm_rndup(fxdr_unsigned(long, *tl)); 99252196Smckusick nfsm_adv(i); 99338414Smckusick } 99452196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 99538414Smckusick /* 0 == ok */ 99648048Smckusick if (*tl == 0) { 99752196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 99848048Smckusick if (*tl != 0) { 99948048Smckusick error = fxdr_unsigned(int, *tl); 100038414Smckusick m_freem(mrep); 100152196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 100252196Smckusick error == NQNFS_TRYLATER) { 100352196Smckusick error = 0; 100452196Smckusick waituntil = time.tv_sec + trylater_delay; 100552196Smckusick while (time.tv_sec < waituntil) 100652196Smckusick (void) tsleep((caddr_t)&lbolt, 100752196Smckusick PSOCK, "nqnfstry", 0); 100852196Smckusick trylater_delay *= nfs_backoff[trylater_cnt]; 100952196Smckusick if (trylater_cnt < 7) 101052196Smckusick trylater_cnt++; 101152196Smckusick goto tryagain; 101252196Smckusick } 101352196Smckusick m_freem(rep->r_mreq); 101452196Smckusick free((caddr_t)rep, M_NFSREQ); 101538414Smckusick return (error); 101638414Smckusick } 101752196Smckusick 101852196Smckusick /* 101952196Smckusick * For nqnfs, get any lease in reply 102052196Smckusick */ 102152196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 102252196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 102352196Smckusick if (*tl) { 102452196Smckusick np = VTONFS(vp); 102552196Smckusick nqlflag = fxdr_unsigned(int, *tl); 102652196Smckusick nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 102752196Smckusick cachable = fxdr_unsigned(int, *tl++); 102852196Smckusick reqtime += fxdr_unsigned(int, *tl++); 102952196Smckusick if (reqtime > time.tv_sec) { 103052196Smckusick if (np->n_tnext) { 103152196Smckusick if (np->n_tnext == (struct nfsnode *)nmp) 103252196Smckusick nmp->nm_tprev = np->n_tprev; 103352196Smckusick else 103452196Smckusick np->n_tnext->n_tprev = np->n_tprev; 103552196Smckusick if (np->n_tprev == (struct nfsnode *)nmp) 103652196Smckusick nmp->nm_tnext = np->n_tnext; 103752196Smckusick else 103852196Smckusick np->n_tprev->n_tnext = np->n_tnext; 103952196Smckusick if (nqlflag == NQL_WRITE) 104052196Smckusick np->n_flag |= NQNFSWRITE; 104152196Smckusick } else if (nqlflag == NQL_READ) 104252196Smckusick np->n_flag &= ~NQNFSWRITE; 104352196Smckusick else 104452196Smckusick np->n_flag |= NQNFSWRITE; 104552196Smckusick if (cachable) 104652196Smckusick np->n_flag &= ~NQNFSNONCACHE; 104752196Smckusick else 104852196Smckusick np->n_flag |= NQNFSNONCACHE; 104952196Smckusick np->n_expiry = reqtime; 105052196Smckusick fxdr_hyper(tl, &np->n_lrev); 105152196Smckusick tp = nmp->nm_tprev; 105252196Smckusick while (tp != (struct nfsnode *)nmp && 105352196Smckusick tp->n_expiry > np->n_expiry) 105452196Smckusick tp = tp->n_tprev; 105552196Smckusick if (tp == (struct nfsnode *)nmp) { 105652196Smckusick np->n_tnext = nmp->nm_tnext; 105752196Smckusick nmp->nm_tnext = np; 105852196Smckusick } else { 105952196Smckusick np->n_tnext = tp->n_tnext; 106052196Smckusick tp->n_tnext = np; 106152196Smckusick } 106252196Smckusick np->n_tprev = tp; 106352196Smckusick if (np->n_tnext == (struct nfsnode *)nmp) 106452196Smckusick nmp->nm_tprev = np; 106552196Smckusick else 106652196Smckusick np->n_tnext->n_tprev = np; 106752196Smckusick } 106852196Smckusick } 106952196Smckusick } 107038414Smckusick *mrp = mrep; 107138414Smckusick *mdp = md; 107238414Smckusick *dposp = dpos; 107352196Smckusick m_freem(rep->r_mreq); 107452196Smckusick FREE((caddr_t)rep, M_NFSREQ); 107538414Smckusick return (0); 107638414Smckusick } 107738414Smckusick m_freem(mrep); 107852196Smckusick m_freem(rep->r_mreq); 107952196Smckusick free((caddr_t)rep, M_NFSREQ); 108052196Smckusick error = EPROTONOSUPPORT; 108138414Smckusick nfsmout: 108238414Smckusick return (error); 108338414Smckusick } 108438414Smckusick 108538414Smckusick /* 108638414Smckusick * Generate the rpc reply header 108738414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 108838414Smckusick */ 108952196Smckusick nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 109038414Smckusick int siz; 109152196Smckusick struct nfsd *nd; 109238414Smckusick int err; 109352196Smckusick int cache; 109452196Smckusick u_quad_t *frev; 109538414Smckusick struct mbuf **mrq; 109638414Smckusick struct mbuf **mbp; 109738414Smckusick caddr_t *bposp; 109838414Smckusick { 109948048Smckusick register u_long *tl; 110052196Smckusick register struct mbuf *mreq; 110139494Smckusick caddr_t bpos; 110252196Smckusick struct mbuf *mb, *mb2; 110338414Smckusick 110452196Smckusick MGETHDR(mreq, M_WAIT, MT_DATA); 110538414Smckusick mb = mreq; 110652196Smckusick /* 110752196Smckusick * If this is a big reply, use a cluster else 110852196Smckusick * try and leave leading space for the lower level headers. 110952196Smckusick */ 111052196Smckusick siz += RPC_REPLYSIZ; 111152196Smckusick if (siz >= MINCLSIZE) { 111241900Smckusick MCLGET(mreq, M_WAIT); 111352196Smckusick } else 111452196Smckusick mreq->m_data += max_hdr; 111548048Smckusick tl = mtod(mreq, u_long *); 111638414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 111748048Smckusick bpos = ((caddr_t)tl)+mreq->m_len; 111852196Smckusick *tl++ = nd->nd_retxid; 111948048Smckusick *tl++ = rpc_reply; 112052196Smckusick if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 112148048Smckusick *tl++ = rpc_msgdenied; 112252196Smckusick if (err == NQNFS_AUTHERR) { 112352196Smckusick *tl++ = rpc_autherr; 112452196Smckusick *tl = rpc_rejectedcred; 112552196Smckusick mreq->m_len -= NFSX_UNSIGNED; 112652196Smckusick bpos -= NFSX_UNSIGNED; 112752196Smckusick } else { 112852196Smckusick *tl++ = rpc_mismatch; 112952196Smckusick *tl++ = txdr_unsigned(2); 113052196Smckusick *tl = txdr_unsigned(2); 113152196Smckusick } 113238414Smckusick } else { 113348048Smckusick *tl++ = rpc_msgaccepted; 113448048Smckusick *tl++ = 0; 113548048Smckusick *tl++ = 0; 113638414Smckusick switch (err) { 113738414Smckusick case EPROGUNAVAIL: 113848048Smckusick *tl = txdr_unsigned(RPC_PROGUNAVAIL); 113938414Smckusick break; 114038414Smckusick case EPROGMISMATCH: 114148048Smckusick *tl = txdr_unsigned(RPC_PROGMISMATCH); 114248048Smckusick nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 114348048Smckusick *tl++ = txdr_unsigned(2); 114448048Smckusick *tl = txdr_unsigned(2); /* someday 3 */ 114538414Smckusick break; 114638414Smckusick case EPROCUNAVAIL: 114748048Smckusick *tl = txdr_unsigned(RPC_PROCUNAVAIL); 114838414Smckusick break; 114938414Smckusick default: 115048048Smckusick *tl = 0; 115138414Smckusick if (err != VNOVAL) { 115248048Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED); 115352196Smckusick if (err) 115452196Smckusick *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 115552196Smckusick else 115652196Smckusick *tl = 0; 115738414Smckusick } 115838414Smckusick break; 115938414Smckusick }; 116038414Smckusick } 116152196Smckusick 116252196Smckusick /* 116352196Smckusick * For nqnfs, piggyback lease as requested. 116452196Smckusick */ 116552196Smckusick if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 116652196Smckusick if (nd->nd_nqlflag) { 116752196Smckusick nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 116852196Smckusick *tl++ = txdr_unsigned(nd->nd_nqlflag); 116952196Smckusick *tl++ = txdr_unsigned(cache); 117052196Smckusick *tl++ = txdr_unsigned(nd->nd_duration); 117152196Smckusick txdr_hyper(frev, tl); 117252196Smckusick } else { 117352196Smckusick if (nd->nd_nqlflag != 0) 117452196Smckusick panic("nqreph"); 117552196Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED); 117652196Smckusick *tl = 0; 117752196Smckusick } 117852196Smckusick } 117938414Smckusick *mrq = mreq; 118038414Smckusick *mbp = mb; 118138414Smckusick *bposp = bpos; 118238414Smckusick if (err != 0 && err != VNOVAL) 118338414Smckusick nfsstats.srvrpc_errs++; 118438414Smckusick return (0); 118538414Smckusick } 118638414Smckusick 118738414Smckusick /* 118838414Smckusick * Nfs timer routine 118938414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 119038414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 119140117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 119238414Smckusick */ 119338414Smckusick nfs_timer() 119438414Smckusick { 119538414Smckusick register struct nfsreq *rep; 119638414Smckusick register struct mbuf *m; 119738414Smckusick register struct socket *so; 119841900Smckusick register struct nfsmount *nmp; 119952196Smckusick register int timeo; 120052196Smckusick static long lasttime = 0; 120140117Smckusick int s, error; 120238414Smckusick 120338414Smckusick s = splnet(); 120441900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 120541900Smckusick nmp = rep->r_nmp; 120652196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 120741900Smckusick continue; 120852196Smckusick if (nfs_sigintr(nmp, rep, rep->r_procp)) { 120941900Smckusick rep->r_flags |= R_SOFTTERM; 121041900Smckusick continue; 121141900Smckusick } 121252196Smckusick if (rep->r_rtt >= 0) { 121352196Smckusick rep->r_rtt++; 121452196Smckusick if (nmp->nm_flag & NFSMNT_DUMBTIMR) 121552196Smckusick timeo = nmp->nm_timeo; 121652196Smckusick else 121752196Smckusick timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 121852196Smckusick if (nmp->nm_timeouts > 0) 121952196Smckusick timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 122052196Smckusick if (rep->r_rtt <= timeo) 122152196Smckusick continue; 122252196Smckusick if (nmp->nm_timeouts < 8) 122352196Smckusick nmp->nm_timeouts++; 122440117Smckusick } 122541900Smckusick /* 122641900Smckusick * Check for server not responding 122741900Smckusick */ 122841900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 122952196Smckusick rep->r_rexmit > nmp->nm_deadthresh) { 123047737Skarels nfs_msg(rep->r_procp, 123147737Skarels nmp->nm_mountp->mnt_stat.f_mntfromname, 123247737Skarels "not responding"); 123341900Smckusick rep->r_flags |= R_TPRINTFMSG; 123441900Smckusick } 123543351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 123641900Smckusick nfsstats.rpctimeouts++; 123741900Smckusick rep->r_flags |= R_SOFTTERM; 123841900Smckusick continue; 123941900Smckusick } 124052196Smckusick if (nmp->nm_sotype != SOCK_DGRAM) { 124152196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 124252196Smckusick rep->r_rexmit = NFS_MAXREXMIT; 124343351Smckusick continue; 124452196Smckusick } 124552196Smckusick if ((so = nmp->nm_so) == NULL) 124652196Smckusick continue; 124741900Smckusick 124841900Smckusick /* 124941900Smckusick * If there is enough space and the window allows.. 125041900Smckusick * Resend it 125152196Smckusick * Set r_rtt to -1 in case we fail to send it now. 125241900Smckusick */ 125352196Smckusick rep->r_rtt = -1; 125441900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 125552196Smckusick ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 125652196Smckusick (rep->r_flags & R_SENT) || 125752196Smckusick nmp->nm_sent < nmp->nm_cwnd) && 125852196Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 125941900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 126041900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 126152196Smckusick (struct mbuf *)0, (struct mbuf *)0); 126241900Smckusick else 126341900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 126452196Smckusick nmp->nm_nam, (struct mbuf *)0); 126541900Smckusick if (error) { 126641900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 126741900Smckusick so->so_error = 0; 126841900Smckusick } else { 126941900Smckusick /* 127052196Smckusick * Iff first send, start timing 127152196Smckusick * else turn timing off, backoff timer 127252196Smckusick * and divide congestion window by 2. 127341900Smckusick */ 127452196Smckusick if (rep->r_flags & R_SENT) { 127552196Smckusick rep->r_flags &= ~R_TIMING; 127652196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 127752196Smckusick rep->r_rexmit = NFS_MAXREXMIT; 127852196Smckusick nmp->nm_cwnd >>= 1; 127952196Smckusick if (nmp->nm_cwnd < NFS_CWNDSCALE) 128052196Smckusick nmp->nm_cwnd = NFS_CWNDSCALE; 128152196Smckusick nfsstats.rpcretries++; 128252196Smckusick } else { 128352196Smckusick rep->r_flags |= R_SENT; 128452196Smckusick nmp->nm_sent += NFS_CWNDSCALE; 128552196Smckusick } 128652196Smckusick rep->r_rtt = 0; 128741900Smckusick } 128841900Smckusick } 128940117Smckusick } 129052196Smckusick 129152196Smckusick /* 129252196Smckusick * Call the nqnfs server timer once a second to handle leases. 129352196Smckusick */ 129452196Smckusick if (lasttime != time.tv_sec) { 129552196Smckusick lasttime = time.tv_sec; 129652196Smckusick nqnfs_serverd(); 129752196Smckusick } 129840117Smckusick splx(s); 129940117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 130040117Smckusick } 130140117Smckusick 130240117Smckusick /* 130352196Smckusick * Test for a termination condition pending on the process. 130452196Smckusick * This is used for NFSMNT_INT mounts. 130540117Smckusick */ 130652196Smckusick nfs_sigintr(nmp, rep, p) 130752196Smckusick struct nfsmount *nmp; 130852196Smckusick struct nfsreq *rep; 130952196Smckusick register struct proc *p; 131052196Smckusick { 131140117Smckusick 131252196Smckusick if (rep && (rep->r_flags & R_SOFTTERM)) 131352196Smckusick return (EINTR); 131452196Smckusick if (!(nmp->nm_flag & NFSMNT_INT)) 131552196Smckusick return (0); 131652196Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 131752196Smckusick NFSINT_SIGMASK)) 131852196Smckusick return (EINTR); 131952196Smckusick return (0); 132052196Smckusick } 132152196Smckusick 132240117Smckusick /* 132352196Smckusick * Lock a socket against others. 132452196Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 132552196Smckusick * and also to avoid race conditions between the processes with nfs requests 132652196Smckusick * in progress when a reconnect is necessary. 132740117Smckusick */ 132852196Smckusick nfs_sndlock(flagp, rep) 132952196Smckusick register int *flagp; 133052196Smckusick struct nfsreq *rep; 133152196Smckusick { 133252196Smckusick struct proc *p; 133340117Smckusick 133452196Smckusick if (rep) 133552196Smckusick p = rep->r_procp; 133652196Smckusick else 133752196Smckusick p = (struct proc *)0; 133852196Smckusick while (*flagp & NFSMNT_SNDLOCK) { 133952196Smckusick if (nfs_sigintr(rep->r_nmp, rep, p)) 134052196Smckusick return (EINTR); 134152196Smckusick *flagp |= NFSMNT_WANTSND; 134252196Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 134352196Smckusick } 134452196Smckusick *flagp |= NFSMNT_SNDLOCK; 134552196Smckusick return (0); 134652196Smckusick } 134752196Smckusick 134852196Smckusick /* 134952196Smckusick * Unlock the stream socket for others. 135052196Smckusick */ 135152196Smckusick void 135252196Smckusick nfs_sndunlock(flagp) 135352196Smckusick register int *flagp; 135440117Smckusick { 135540117Smckusick 135652196Smckusick if ((*flagp & NFSMNT_SNDLOCK) == 0) 135752196Smckusick panic("nfs sndunlock"); 135852196Smckusick *flagp &= ~NFSMNT_SNDLOCK; 135952196Smckusick if (*flagp & NFSMNT_WANTSND) { 136052196Smckusick *flagp &= ~NFSMNT_WANTSND; 136152196Smckusick wakeup((caddr_t)flagp); 136240117Smckusick } 136352196Smckusick } 136452196Smckusick 136552196Smckusick nfs_rcvlock(rep) 136652196Smckusick register struct nfsreq *rep; 136752196Smckusick { 136852196Smckusick register int *flagp = &rep->r_nmp->nm_flag; 136952196Smckusick 137052196Smckusick while (*flagp & NFSMNT_RCVLOCK) { 137152196Smckusick if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 137252196Smckusick return (EINTR); 137352196Smckusick *flagp |= NFSMNT_WANTRCV; 137452196Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 137540117Smckusick } 137652196Smckusick *flagp |= NFSMNT_RCVLOCK; 137752196Smckusick return (0); 137852196Smckusick } 137940117Smckusick 138052196Smckusick /* 138152196Smckusick * Unlock the stream socket for others. 138252196Smckusick */ 138352196Smckusick void 138452196Smckusick nfs_rcvunlock(flagp) 138552196Smckusick register int *flagp; 138652196Smckusick { 138752196Smckusick 138852196Smckusick if ((*flagp & NFSMNT_RCVLOCK) == 0) 138952196Smckusick panic("nfs rcvunlock"); 139052196Smckusick *flagp &= ~NFSMNT_RCVLOCK; 139152196Smckusick if (*flagp & NFSMNT_WANTRCV) { 139252196Smckusick *flagp &= ~NFSMNT_WANTRCV; 139352196Smckusick wakeup((caddr_t)flagp); 139452196Smckusick } 139552196Smckusick } 139652196Smckusick 139752196Smckusick /* 139852196Smckusick * This function compares two net addresses by family and returns TRUE 139952196Smckusick * if they are the same host. 140052196Smckusick * If there is any doubt, return FALSE. 140152196Smckusick * The AF_INET family is handled as a special case so that address mbufs 140252196Smckusick * don't need to be saved to store "struct in_addr", which is only 4 bytes. 140352196Smckusick */ 140452196Smckusick nfs_netaddr_match(family, haddr, hmask, nam) 140552196Smckusick int family; 140652196Smckusick union nethostaddr *haddr; 140752196Smckusick union nethostaddr *hmask; 140852196Smckusick struct mbuf *nam; 140952196Smckusick { 141052196Smckusick register struct sockaddr_in *inetaddr; 141152196Smckusick #ifdef ISO 141252196Smckusick register struct sockaddr_iso *isoaddr1, *isoaddr2; 141352196Smckusick #endif 141452196Smckusick 141552196Smckusick 141652196Smckusick switch (family) { 141752196Smckusick case AF_INET: 141852196Smckusick inetaddr = mtod(nam, struct sockaddr_in *); 141952196Smckusick if (inetaddr->sin_family != AF_INET) 142052196Smckusick return (0); 142152196Smckusick if (hmask) { 142252196Smckusick if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 142352196Smckusick (haddr->had_inetaddr & hmask->had_inetaddr)) 142452196Smckusick return (1); 142552196Smckusick } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 142652196Smckusick return (1); 142752196Smckusick break; 142852196Smckusick #ifdef ISO 142952196Smckusick case AF_ISO: 143052196Smckusick isoaddr1 = mtod(nam, struct sockaddr_iso *); 143152196Smckusick if (isoaddr1->siso_family != AF_ISO) 143252196Smckusick return (0); 143352196Smckusick isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 143452196Smckusick if (isoaddr1->siso_nlen > 0 && 143552196Smckusick isoaddr1->siso_nlen == isoaddr2->siso_nlen && 143652196Smckusick SAME_ISOADDR(isoaddr1, isoaddr2)) 143752196Smckusick return (1); 143852196Smckusick break; 143952196Smckusick #endif /* ISO */ 144052196Smckusick default: 144152196Smckusick break; 144252196Smckusick }; 144352196Smckusick return (0); 144452196Smckusick } 144552196Smckusick 144652196Smckusick /* 144752196Smckusick * Build hash lists of net addresses and hang them off the mount point. 144852196Smckusick * Called by ufs_mount() to set up the lists of export addresses. 144952196Smckusick */ 145052196Smckusick hang_addrlist(mp, argp) 145152196Smckusick struct mount *mp; 145252196Smckusick struct ufs_args *argp; 145352196Smckusick { 145452196Smckusick register struct netaddrhash *np, **hnp; 145552196Smckusick register int i; 145652196Smckusick struct ufsmount *ump; 145752196Smckusick struct sockaddr *saddr; 145852196Smckusick struct mbuf *nam, *msk = (struct mbuf *)0; 145952196Smckusick union nethostaddr netmsk; 146052196Smckusick int error; 146152196Smckusick 146252196Smckusick if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 146352196Smckusick MT_SONAME)) 146452196Smckusick return (error); 146552196Smckusick saddr = mtod(nam, struct sockaddr *); 146652196Smckusick ump = VFSTOUFS(mp); 146752196Smckusick if (saddr->sa_family == AF_INET && 146852196Smckusick ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 146952196Smckusick m_freem(nam); 147052196Smckusick if (mp->mnt_flag & MNT_DEFEXPORTED) 147152196Smckusick return (EPERM); 147252196Smckusick np = &ump->um_defexported; 147352196Smckusick np->neth_exflags = argp->exflags; 147452196Smckusick np->neth_anon = argp->anon; 147552196Smckusick np->neth_anon.cr_ref = 1; 147652196Smckusick mp->mnt_flag |= MNT_DEFEXPORTED; 147752196Smckusick return (0); 147852196Smckusick } 147952196Smckusick if (argp->msklen > 0) { 148052196Smckusick if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 148152196Smckusick MT_SONAME)) { 148252196Smckusick m_freem(nam); 148352196Smckusick return (error); 148452196Smckusick } 148552196Smckusick 148652196Smckusick /* 148752196Smckusick * Scan all the hash lists to check against duplications. 148852196Smckusick * For the net list, try both masks to catch a subnet 148952196Smckusick * of another network. 149052196Smckusick */ 149152196Smckusick hnp = &ump->um_netaddr[NETMASK_HASH]; 149252196Smckusick np = *hnp; 149352196Smckusick if (saddr->sa_family == AF_INET) 149452196Smckusick netmsk.had_inetaddr = 149552196Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 149652196Smckusick else 149752196Smckusick netmsk.had_nam = msk; 149852196Smckusick while (np) { 149952196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 150052196Smckusick &np->neth_hmask, nam) || 150152196Smckusick nfs_netaddr_match(np->neth_family, &np->neth_haddr, 150252196Smckusick &netmsk, nam)) { 150352196Smckusick m_freem(nam); 150452196Smckusick m_freem(msk); 150552196Smckusick return (EPERM); 150640117Smckusick } 150752196Smckusick np = np->neth_next; 150852196Smckusick } 150952196Smckusick for (i = 0; i < NETHASHSZ; i++) { 151052196Smckusick np = ump->um_netaddr[i]; 151152196Smckusick while (np) { 151252196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 151352196Smckusick &netmsk, nam)) { 151452196Smckusick m_freem(nam); 151552196Smckusick m_freem(msk); 151652196Smckusick return (EPERM); 151752196Smckusick } 151852196Smckusick np = np->neth_next; 151952196Smckusick } 152052196Smckusick } 152152196Smckusick } else { 152252196Smckusick hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 152352196Smckusick np = ump->um_netaddr[NETMASK_HASH]; 152452196Smckusick while (np) { 152552196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 152652196Smckusick &np->neth_hmask, nam)) { 152752196Smckusick m_freem(nam); 152852196Smckusick return (EPERM); 152952196Smckusick } 153052196Smckusick np = np->neth_next; 153152196Smckusick } 153252196Smckusick np = *hnp; 153352196Smckusick while (np) { 153452196Smckusick if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 153552196Smckusick (union nethostaddr *)0, nam)) { 153652196Smckusick m_freem(nam); 153752196Smckusick return (EPERM); 153852196Smckusick } 153952196Smckusick np = np->neth_next; 154052196Smckusick } 154140117Smckusick } 154252196Smckusick np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 154352196Smckusick M_WAITOK); 154452196Smckusick np->neth_family = saddr->sa_family; 154552196Smckusick if (saddr->sa_family == AF_INET) { 154652196Smckusick np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 154752196Smckusick m_freem(nam); 154852196Smckusick if (msk) { 154952196Smckusick np->neth_inetmask = netmsk.had_inetaddr; 155052196Smckusick m_freem(msk); 155152196Smckusick if (np->neth_inetaddr &~ np->neth_inetmask) 155252196Smckusick return (EPERM); 155352196Smckusick } else 155452196Smckusick np->neth_inetmask = 0xffffffff; 155552196Smckusick } else { 155652196Smckusick np->neth_nam = nam; 155752196Smckusick np->neth_msk = msk; 155852196Smckusick } 155952196Smckusick np->neth_exflags = argp->exflags; 156052196Smckusick np->neth_anon = argp->anon; 156152196Smckusick np->neth_anon.cr_ref = 1; 156252196Smckusick np->neth_next = *hnp; 156352196Smckusick *hnp = np; 156452196Smckusick return (0); 156540117Smckusick } 156640117Smckusick 156752196Smckusick /* 156852196Smckusick * Free the net address hash lists that are hanging off the mount points. 156952196Smckusick */ 157052196Smckusick free_addrlist(ump) 157152196Smckusick struct ufsmount *ump; 157240117Smckusick { 157352196Smckusick register struct netaddrhash *np, *onp; 157452196Smckusick register int i; 157540117Smckusick 157652196Smckusick for (i = 0; i <= NETHASHSZ; i++) { 157752196Smckusick np = ump->um_netaddr[i]; 157852196Smckusick ump->um_netaddr[i] = (struct netaddrhash *)0; 157952196Smckusick while (np) { 158052196Smckusick onp = np; 158152196Smckusick np = np->neth_next; 158252196Smckusick if (onp->neth_family != AF_INET) { 158352196Smckusick m_freem(onp->neth_nam); 158452196Smckusick m_freem(onp->neth_msk); 158538414Smckusick } 158652196Smckusick free((caddr_t)onp, M_NETADDR); 158738414Smckusick } 158838414Smckusick } 158938414Smckusick } 159038414Smckusick 159138414Smckusick /* 159252196Smckusick * Generate a hash code for an iso host address. Used by NETADDRHASH() for 159352196Smckusick * iso addresses. 159438414Smckusick */ 159552196Smckusick iso_addrhash(saddr) 159652196Smckusick struct sockaddr *saddr; 159741900Smckusick { 159852196Smckusick #ifdef ISO 159952196Smckusick register struct sockaddr_iso *siso; 160052196Smckusick register int i, sum; 160152196Smckusick 160252196Smckusick sum = 0; 160352196Smckusick for (i = 0; i < siso->siso_nlen; i++) 160452196Smckusick sum += siso->siso_data[i]; 160552196Smckusick return (sum & (NETHASHSZ - 1)); 160652196Smckusick #else 160752196Smckusick return (0); 160852196Smckusick #endif /* ISO */ 160941900Smckusick } 161040117Smckusick 161152196Smckusick /* 161252196Smckusick * Check for badly aligned mbuf data areas and 161352196Smckusick * realign data in an mbuf list by copying the data areas up, as required. 161452196Smckusick */ 161552196Smckusick void 161652196Smckusick nfs_realign(m, hsiz) 161752196Smckusick register struct mbuf *m; 161852196Smckusick int hsiz; 161947737Skarels { 162052196Smckusick register struct mbuf *m2; 162152196Smckusick register int siz, mlen, olen; 162252196Smckusick register caddr_t tcp, fcp; 162352196Smckusick struct mbuf *mnew; 162447737Skarels 162552196Smckusick while (m) { 162652196Smckusick /* 162752196Smckusick * This never happens for UDP, rarely happens for TCP 162852196Smckusick * but frequently happens for iso transport. 162952196Smckusick */ 163052196Smckusick if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 163152196Smckusick olen = m->m_len; 163252196Smckusick fcp = mtod(m, caddr_t); 163352196Smckusick m->m_flags &= ~M_PKTHDR; 163452196Smckusick if (m->m_flags & M_EXT) 163552196Smckusick m->m_data = m->m_ext.ext_buf; 163652196Smckusick else 163752196Smckusick m->m_data = m->m_dat; 163852196Smckusick m->m_len = 0; 163952196Smckusick tcp = mtod(m, caddr_t); 164052196Smckusick mnew = m; 164152196Smckusick m2 = m->m_next; 164252196Smckusick 164352196Smckusick /* 164452196Smckusick * If possible, only put the first invariant part 164552196Smckusick * of the RPC header in the first mbuf. 164652196Smckusick */ 164752196Smckusick if (olen <= hsiz) 164852196Smckusick mlen = hsiz; 164952196Smckusick else 165052196Smckusick mlen = M_TRAILINGSPACE(m); 165152196Smckusick 165252196Smckusick /* 165352196Smckusick * Loop through the mbuf list consolidating data. 165452196Smckusick */ 165552196Smckusick while (m) { 165652196Smckusick while (olen > 0) { 165752196Smckusick if (mlen == 0) { 165852196Smckusick m2->m_flags &= ~M_PKTHDR; 165952196Smckusick if (m2->m_flags & M_EXT) 166052196Smckusick m2->m_data = m2->m_ext.ext_buf; 166152196Smckusick else 166252196Smckusick m2->m_data = m2->m_dat; 166352196Smckusick m2->m_len = 0; 166452196Smckusick mlen = M_TRAILINGSPACE(m2); 166552196Smckusick tcp = mtod(m2, caddr_t); 166652196Smckusick mnew = m2; 166752196Smckusick m2 = m2->m_next; 166852196Smckusick } 166952196Smckusick siz = MIN(mlen, olen); 167052196Smckusick if (tcp != fcp) 167152196Smckusick bcopy(fcp, tcp, siz); 167252196Smckusick mnew->m_len += siz; 167352196Smckusick mlen -= siz; 167452196Smckusick olen -= siz; 167552196Smckusick tcp += siz; 167652196Smckusick fcp += siz; 167752196Smckusick } 167852196Smckusick m = m->m_next; 167952196Smckusick if (m) { 168052196Smckusick olen = m->m_len; 168152196Smckusick fcp = mtod(m, caddr_t); 168252196Smckusick } 168352196Smckusick } 168452196Smckusick 168552196Smckusick /* 168652196Smckusick * Finally, set m_len == 0 for any trailing mbufs that have 168752196Smckusick * been copied out of. 168852196Smckusick */ 168952196Smckusick while (m2) { 169052196Smckusick m2->m_len = 0; 169152196Smckusick m2 = m2->m_next; 169252196Smckusick } 169352196Smckusick return; 169452196Smckusick } 169552196Smckusick m = m->m_next; 169652196Smckusick } 169747737Skarels } 169847737Skarels 169941900Smckusick /* 170052196Smckusick * Socket upcall routine for the nfsd sockets. 170152196Smckusick * The caddr_t arg is a pointer to the "struct nfssvc_sock". 170252196Smckusick * Essentially do as much as possible non-blocking, else punt and it will 170352196Smckusick * be called with M_WAIT from an nfsd. 170441900Smckusick */ 170552196Smckusick void 170652196Smckusick nfsrv_rcv(so, arg, waitflag) 170752196Smckusick struct socket *so; 170852196Smckusick caddr_t arg; 170952196Smckusick int waitflag; 171038414Smckusick { 171152196Smckusick register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 171252196Smckusick register struct mbuf *m; 171352196Smckusick struct mbuf *mp, *nam; 171452196Smckusick struct uio auio; 171552196Smckusick int flags, error; 171640117Smckusick 171752903Smckusick if ((slp->ns_flag & SLP_VALID) == 0) 171852903Smckusick return; 171952903Smckusick #ifdef notdef 172052903Smckusick /* 172152903Smckusick * Define this to test for nfsds handling this under heavy load. 172252903Smckusick */ 172352903Smckusick if (waitflag == M_DONTWAIT) { 172452903Smckusick slp->ns_flag |= SLP_NEEDQ; goto dorecs; 172552903Smckusick } 172652903Smckusick #endif 172752932Smckusick auio.uio_procp = NULL; 172852196Smckusick if (so->so_type == SOCK_STREAM) { 172952196Smckusick /* 173052196Smckusick * If there are already records on the queue, defer soreceive() 173152196Smckusick * to an nfsd so that there is feedback to the TCP layer that 173252196Smckusick * the nfs servers are heavily loaded. 173352196Smckusick */ 173452196Smckusick if (slp->ns_rec && waitflag == M_DONTWAIT) { 173552196Smckusick slp->ns_flag |= SLP_NEEDQ; 173652903Smckusick goto dorecs; 173752196Smckusick } 173852196Smckusick 173952196Smckusick /* 174052196Smckusick * Do soreceive(). 174152196Smckusick */ 174252196Smckusick auio.uio_resid = 1000000000; 174352196Smckusick flags = MSG_DONTWAIT; 174452196Smckusick error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 174552196Smckusick if (error || mp == (struct mbuf *)0) { 174652903Smckusick if (error == EWOULDBLOCK) 174752903Smckusick slp->ns_flag |= SLP_NEEDQ; 174852903Smckusick else 174952196Smckusick slp->ns_flag |= SLP_DISCONN; 175052196Smckusick goto dorecs; 175152196Smckusick } 175252196Smckusick m = mp; 175352196Smckusick if (slp->ns_rawend) { 175452196Smckusick slp->ns_rawend->m_next = m; 175552196Smckusick slp->ns_cc += 1000000000 - auio.uio_resid; 175652196Smckusick } else { 175752196Smckusick slp->ns_raw = m; 175852196Smckusick slp->ns_cc = 1000000000 - auio.uio_resid; 175952196Smckusick } 176052196Smckusick while (m->m_next) 176152196Smckusick m = m->m_next; 176252196Smckusick slp->ns_rawend = m; 176352196Smckusick 176452196Smckusick /* 176552196Smckusick * Now try and parse record(s) out of the raw stream data. 176652196Smckusick */ 176752196Smckusick if (error = nfsrv_getstream(slp, waitflag)) { 176852196Smckusick if (error == EPERM) 176952196Smckusick slp->ns_flag |= SLP_DISCONN; 177052903Smckusick else 177152196Smckusick slp->ns_flag |= SLP_NEEDQ; 177252196Smckusick } 177352196Smckusick } else { 177452196Smckusick do { 177552196Smckusick auio.uio_resid = 1000000000; 177652196Smckusick flags = MSG_DONTWAIT; 177752196Smckusick error = soreceive(so, &nam, &auio, &mp, 177852196Smckusick (struct mbuf **)0, &flags); 177952196Smckusick if (mp) { 178052196Smckusick nfs_realign(mp, 10 * NFSX_UNSIGNED); 178152196Smckusick if (nam) { 178252196Smckusick m = nam; 178352196Smckusick m->m_next = mp; 178452196Smckusick } else 178552196Smckusick m = mp; 178652196Smckusick if (slp->ns_recend) 178752196Smckusick slp->ns_recend->m_nextpkt = m; 178852196Smckusick else 178952196Smckusick slp->ns_rec = m; 179052196Smckusick slp->ns_recend = m; 179152196Smckusick m->m_nextpkt = (struct mbuf *)0; 179252196Smckusick } 179352196Smckusick if (error) { 179452196Smckusick if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 179552196Smckusick && error != EWOULDBLOCK) { 179652196Smckusick slp->ns_flag |= SLP_DISCONN; 179752903Smckusick goto dorecs; 179852196Smckusick } 179952196Smckusick } 180052196Smckusick } while (mp); 180140117Smckusick } 180252196Smckusick 180352196Smckusick /* 180452196Smckusick * Now try and process the request records, non-blocking. 180552196Smckusick */ 180652196Smckusick dorecs: 180752903Smckusick if (waitflag == M_DONTWAIT && 180852903Smckusick (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 180952196Smckusick nfsrv_wakenfsd(slp); 181041900Smckusick } 181140117Smckusick 181241900Smckusick /* 181352196Smckusick * Try and extract an RPC request from the mbuf data list received on a 181452196Smckusick * stream socket. The "waitflag" argument indicates whether or not it 181552196Smckusick * can sleep. 181641900Smckusick */ 181752196Smckusick nfsrv_getstream(slp, waitflag) 181852196Smckusick register struct nfssvc_sock *slp; 181952196Smckusick int waitflag; 182041900Smckusick { 182152196Smckusick register struct mbuf *m; 182252196Smckusick register char *cp1, *cp2; 182352196Smckusick register int len; 182452196Smckusick struct mbuf *om, *m2, *recm; 182552196Smckusick u_long recmark; 182641900Smckusick 182752196Smckusick if (slp->ns_flag & SLP_GETSTREAM) 182852196Smckusick panic("nfs getstream"); 182952196Smckusick slp->ns_flag |= SLP_GETSTREAM; 183052196Smckusick for (;;) { 183152196Smckusick if (slp->ns_reclen == 0) { 183252196Smckusick if (slp->ns_cc < NFSX_UNSIGNED) { 183352196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 183452196Smckusick return (0); 183552196Smckusick } 183652196Smckusick m = slp->ns_raw; 183752196Smckusick if (m->m_len >= NFSX_UNSIGNED) { 183852196Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 183952196Smckusick m->m_data += NFSX_UNSIGNED; 184052196Smckusick m->m_len -= NFSX_UNSIGNED; 184152196Smckusick } else { 184252196Smckusick cp1 = (caddr_t)&recmark; 184352196Smckusick cp2 = mtod(m, caddr_t); 184452196Smckusick while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 184552196Smckusick while (m->m_len == 0) { 184652196Smckusick m = m->m_next; 184752196Smckusick cp2 = mtod(m, caddr_t); 184852196Smckusick } 184952196Smckusick *cp1++ = *cp2++; 185052196Smckusick m->m_data++; 185152196Smckusick m->m_len--; 185252196Smckusick } 185352196Smckusick } 185452196Smckusick slp->ns_cc -= NFSX_UNSIGNED; 185552196Smckusick slp->ns_reclen = ntohl(recmark) & ~0x80000000; 185652196Smckusick if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 185752196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 185852196Smckusick return (EPERM); 185952196Smckusick } 186052196Smckusick } 186152196Smckusick 186252196Smckusick /* 186352196Smckusick * Now get the record part. 186452196Smckusick */ 186552196Smckusick if (slp->ns_cc == slp->ns_reclen) { 186652196Smckusick recm = slp->ns_raw; 186752196Smckusick slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 186852196Smckusick slp->ns_cc = slp->ns_reclen = 0; 186952196Smckusick } else if (slp->ns_cc > slp->ns_reclen) { 187052196Smckusick len = 0; 187152196Smckusick m = slp->ns_raw; 187252196Smckusick om = (struct mbuf *)0; 187352196Smckusick while (len < slp->ns_reclen) { 187452196Smckusick if ((len + m->m_len) > slp->ns_reclen) { 187552196Smckusick m2 = m_copym(m, 0, slp->ns_reclen - len, 187652196Smckusick waitflag); 187752196Smckusick if (m2) { 187852196Smckusick if (om) { 187952196Smckusick om->m_next = m2; 188052196Smckusick recm = slp->ns_raw; 188152196Smckusick } else 188252196Smckusick recm = m2; 188352196Smckusick m->m_data += slp->ns_reclen - len; 188452196Smckusick m->m_len -= slp->ns_reclen - len; 188552196Smckusick len = slp->ns_reclen; 188652196Smckusick } else { 188752196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 188852196Smckusick return (EWOULDBLOCK); 188952196Smckusick } 189052196Smckusick } else if ((len + m->m_len) == slp->ns_reclen) { 189152196Smckusick om = m; 189252196Smckusick len += m->m_len; 189352196Smckusick m = m->m_next; 189452196Smckusick recm = slp->ns_raw; 189552196Smckusick om->m_next = (struct mbuf *)0; 189652196Smckusick } else { 189752196Smckusick om = m; 189852196Smckusick len += m->m_len; 189952196Smckusick m = m->m_next; 190052196Smckusick } 190152196Smckusick } 190252196Smckusick slp->ns_raw = m; 190352196Smckusick slp->ns_cc -= len; 190452196Smckusick slp->ns_reclen = 0; 190552196Smckusick } else { 190652196Smckusick slp->ns_flag &= ~SLP_GETSTREAM; 190752196Smckusick return (0); 190852196Smckusick } 190952196Smckusick nfs_realign(recm, 10 * NFSX_UNSIGNED); 191052196Smckusick if (slp->ns_recend) 191152196Smckusick slp->ns_recend->m_nextpkt = recm; 191252196Smckusick else 191352196Smckusick slp->ns_rec = recm; 191452196Smckusick slp->ns_recend = recm; 191540117Smckusick } 191638414Smckusick } 191741900Smckusick 191841900Smckusick /* 191952196Smckusick * Parse an RPC header. 192041900Smckusick */ 192152196Smckusick nfsrv_dorec(slp, nd) 192252196Smckusick register struct nfssvc_sock *slp; 192352196Smckusick register struct nfsd *nd; 192441900Smckusick { 192552196Smckusick register struct mbuf *m; 192652196Smckusick int error; 192741900Smckusick 192852903Smckusick if ((slp->ns_flag & SLP_VALID) == 0 || 192952196Smckusick (m = slp->ns_rec) == (struct mbuf *)0) 193052196Smckusick return (ENOBUFS); 193152196Smckusick if (slp->ns_rec = m->m_nextpkt) 193252196Smckusick m->m_nextpkt = (struct mbuf *)0; 193352196Smckusick else 193452196Smckusick slp->ns_recend = (struct mbuf *)0; 193552196Smckusick if (m->m_type == MT_SONAME) { 193652196Smckusick nd->nd_nam = m; 193752196Smckusick nd->nd_md = nd->nd_mrep = m->m_next; 193852196Smckusick m->m_next = (struct mbuf *)0; 193952196Smckusick } else { 194052196Smckusick nd->nd_nam = (struct mbuf *)0; 194152196Smckusick nd->nd_md = nd->nd_mrep = m; 194252196Smckusick } 194352196Smckusick nd->nd_dpos = mtod(nd->nd_md, caddr_t); 194452196Smckusick if (error = nfs_getreq(nd, TRUE)) { 194552196Smckusick m_freem(nd->nd_nam); 194652196Smckusick return (error); 194752196Smckusick } 194852196Smckusick return (0); 194952196Smckusick } 195052196Smckusick 195152196Smckusick /* 195252196Smckusick * Parse an RPC request 195352196Smckusick * - verify it 195452196Smckusick * - fill in the cred struct. 195552196Smckusick */ 195652196Smckusick nfs_getreq(nd, has_header) 195752196Smckusick register struct nfsd *nd; 195852196Smckusick int has_header; 195952196Smckusick { 196052196Smckusick register int len, i; 196152196Smckusick register u_long *tl; 196252196Smckusick register long t1; 196352196Smckusick struct uio uio; 196452196Smckusick struct iovec iov; 196552196Smckusick caddr_t dpos, cp2; 196652196Smckusick u_long nfsvers, auth_type; 196752196Smckusick int error = 0, nqnfs = 0; 196852196Smckusick struct mbuf *mrep, *md; 196952196Smckusick 197052196Smckusick mrep = nd->nd_mrep; 197152196Smckusick md = nd->nd_md; 197252196Smckusick dpos = nd->nd_dpos; 197352196Smckusick if (has_header) { 197452196Smckusick nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 197552196Smckusick nd->nd_retxid = *tl++; 197652196Smckusick if (*tl++ != rpc_call) { 197752196Smckusick m_freem(mrep); 197852196Smckusick return (EBADRPC); 197952196Smckusick } 198052196Smckusick } else { 198152196Smckusick nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 198252196Smckusick } 198352196Smckusick nd->nd_repstat = 0; 198452196Smckusick if (*tl++ != rpc_vers) { 198552196Smckusick nd->nd_repstat = ERPCMISMATCH; 198652196Smckusick nd->nd_procnum = NFSPROC_NOOP; 198741900Smckusick return (0); 198852196Smckusick } 198952196Smckusick nfsvers = nfs_vers; 199052196Smckusick if (*tl != nfs_prog) { 199152196Smckusick if (*tl == nqnfs_prog) { 199252196Smckusick nqnfs++; 199352196Smckusick nfsvers = nqnfs_vers; 199452196Smckusick } else { 199552196Smckusick nd->nd_repstat = EPROGUNAVAIL; 199652196Smckusick nd->nd_procnum = NFSPROC_NOOP; 199752196Smckusick return (0); 199852196Smckusick } 199952196Smckusick } 200052196Smckusick tl++; 200152196Smckusick if (*tl++ != nfsvers) { 200252196Smckusick nd->nd_repstat = EPROGMISMATCH; 200352196Smckusick nd->nd_procnum = NFSPROC_NOOP; 200452196Smckusick return (0); 200552196Smckusick } 200652196Smckusick nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 200752196Smckusick if (nd->nd_procnum == NFSPROC_NULL) 200852196Smckusick return (0); 200952196Smckusick if (nd->nd_procnum >= NFS_NPROCS || 201052196Smckusick (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 201152196Smckusick (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 201252196Smckusick nd->nd_repstat = EPROCUNAVAIL; 201352196Smckusick nd->nd_procnum = NFSPROC_NOOP; 201452196Smckusick return (0); 201552196Smckusick } 201652196Smckusick auth_type = *tl++; 201752196Smckusick len = fxdr_unsigned(int, *tl++); 201852196Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 201952196Smckusick m_freem(mrep); 202052196Smckusick return (EBADRPC); 202152196Smckusick } 202241900Smckusick 202341900Smckusick /* 202452196Smckusick * Handle auth_unix or auth_kerb. 202541900Smckusick */ 202652196Smckusick if (auth_type == rpc_auth_unix) { 202752196Smckusick len = fxdr_unsigned(int, *++tl); 202852196Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 202952196Smckusick m_freem(mrep); 203052196Smckusick return (EBADRPC); 203152196Smckusick } 203252196Smckusick nfsm_adv(nfsm_rndup(len)); 203352196Smckusick nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 203452196Smckusick nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 203552196Smckusick nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 203652196Smckusick len = fxdr_unsigned(int, *tl); 203752196Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 203852196Smckusick m_freem(mrep); 203952196Smckusick return (EBADRPC); 204052196Smckusick } 204152196Smckusick nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 204252196Smckusick for (i = 1; i <= len; i++) 204352196Smckusick if (i < NGROUPS) 204452196Smckusick nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 204552196Smckusick else 204652196Smckusick tl++; 204752196Smckusick nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 204852196Smckusick } else if (auth_type == rpc_auth_kerb) { 204952196Smckusick nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 205052196Smckusick nd->nd_authlen = fxdr_unsigned(int, *tl); 205152196Smckusick iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 205252196Smckusick if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 205352196Smckusick m_freem(mrep); 205452196Smckusick return (EBADRPC); 205552196Smckusick } 205652196Smckusick uio.uio_offset = 0; 205752196Smckusick uio.uio_iov = &iov; 205852196Smckusick uio.uio_iovcnt = 1; 205952196Smckusick uio.uio_segflg = UIO_SYSSPACE; 206052196Smckusick iov.iov_base = (caddr_t)nd->nd_authstr; 206152196Smckusick nfsm_mtouio(&uio, uio.uio_resid); 206252196Smckusick nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 206352196Smckusick nd->nd_flag |= NFSD_NEEDAUTH; 206452196Smckusick } 206552196Smckusick 206652196Smckusick /* 206752196Smckusick * Do we have any use for the verifier. 206852196Smckusick * According to the "Remote Procedure Call Protocol Spec." it 206952196Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 207052196Smckusick * For now, just skip over it 207152196Smckusick */ 207252196Smckusick len = fxdr_unsigned(int, *++tl); 207352196Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 207452196Smckusick m_freem(mrep); 207552196Smckusick return (EBADRPC); 207652196Smckusick } 207752196Smckusick if (len > 0) { 207852196Smckusick nfsm_adv(nfsm_rndup(len)); 207952196Smckusick } 208052196Smckusick 208152196Smckusick /* 208252196Smckusick * For nqnfs, get piggybacked lease request. 208352196Smckusick */ 208452196Smckusick if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 208552196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 208652196Smckusick nd->nd_nqlflag = fxdr_unsigned(int, *tl); 208752196Smckusick if (nd->nd_nqlflag) { 208852196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 208952196Smckusick nd->nd_duration = fxdr_unsigned(int, *tl); 209052196Smckusick } else 209152196Smckusick nd->nd_duration = NQ_MINLEASE; 209252196Smckusick } else { 209352196Smckusick nd->nd_nqlflag = NQL_NOVAL; 209452196Smckusick nd->nd_duration = NQ_MINLEASE; 209552196Smckusick } 209652196Smckusick nd->nd_md = md; 209752196Smckusick nd->nd_dpos = dpos; 209841900Smckusick return (0); 209952196Smckusick nfsmout: 210052196Smckusick return (error); 210141900Smckusick } 210241900Smckusick 210341900Smckusick /* 210452196Smckusick * Search for a sleeping nfsd and wake it up. 210552196Smckusick * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 210652196Smckusick * running nfsds will go look for the work in the nfssvc_sock list. 210741900Smckusick */ 210852196Smckusick void 210952196Smckusick nfsrv_wakenfsd(slp) 211052196Smckusick struct nfssvc_sock *slp; 211141900Smckusick { 211252196Smckusick register struct nfsd *nd = nfsd_head.nd_next; 211352196Smckusick 211452903Smckusick if ((slp->ns_flag & SLP_VALID) == 0) 211552903Smckusick return; 211652196Smckusick while (nd != (struct nfsd *)&nfsd_head) { 211752196Smckusick if (nd->nd_flag & NFSD_WAITING) { 211852196Smckusick nd->nd_flag &= ~NFSD_WAITING; 211952196Smckusick if (nd->nd_slp) 212052196Smckusick panic("nfsd wakeup"); 212152196Smckusick nd->nd_slp = slp; 212252196Smckusick wakeup((caddr_t)nd); 212352196Smckusick return; 212452196Smckusick } 212552196Smckusick nd = nd->nd_next; 212652196Smckusick } 212752903Smckusick slp->ns_flag |= SLP_DOREC; 212852196Smckusick nfsd_head.nd_flag |= NFSD_CHECKSLP; 212941900Smckusick } 213052196Smckusick 213152196Smckusick nfs_msg(p, server, msg) 213252196Smckusick struct proc *p; 213352196Smckusick char *server, *msg; 213452196Smckusick { 213552196Smckusick tpr_t tpr; 213652196Smckusick 213752196Smckusick if (p) 213852196Smckusick tpr = tprintf_open(p); 213952196Smckusick else 214052196Smckusick tpr = NULL; 214152196Smckusick tprintf(tpr, "nfs server %s: %s\n", server, msg); 214252196Smckusick tprintf_close(tpr); 214352196Smckusick } 2144