138414Smckusick /* 247574Skarels * Copyright (c) 1989, 1991 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*48048Smckusick * @(#)nfs_socket.c 7.22 (Berkeley) 04/16/91 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1738414Smckusick #include "param.h" 1840117Smckusick #include "proc.h" 1938414Smckusick #include "mount.h" 2038414Smckusick #include "kernel.h" 2138414Smckusick #include "malloc.h" 2238414Smckusick #include "mbuf.h" 23*48048Smckusick #include "namei.h" 2438414Smckusick #include "vnode.h" 2538414Smckusick #include "domain.h" 2638414Smckusick #include "protosw.h" 2738414Smckusick #include "socket.h" 2838414Smckusick #include "socketvar.h" 2947574Skarels #include "syslog.h" 3047737Skarels #include "tprintf.h" 3142877Smckusick #include "../netinet/in.h" 3242877Smckusick #include "../netinet/tcp.h" 3347574Skarels 3438414Smckusick #include "rpcv2.h" 3538414Smckusick #include "nfsv2.h" 3638414Smckusick #include "nfs.h" 3738414Smckusick #include "xdr_subs.h" 3838414Smckusick #include "nfsm_subs.h" 3938414Smckusick #include "nfsmount.h" 4038414Smckusick 4138414Smckusick #define TRUE 1 4243351Smckusick #define FALSE 0 4338414Smckusick 4440117Smckusick /* 4538414Smckusick * External data, mostly RPC constants in XDR form 4638414Smckusick */ 4738414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 4838414Smckusick rpc_msgaccepted, rpc_call; 4938414Smckusick extern u_long nfs_prog, nfs_vers; 5043351Smckusick /* Maybe these should be bits in a u_long ?? */ 5141900Smckusick extern int nonidempotent[NFS_NPROCS]; 5245281Smckusick static int compressrequest[NFS_NPROCS] = { 5345281Smckusick FALSE, 5445281Smckusick TRUE, 5545281Smckusick TRUE, 5645281Smckusick FALSE, 5745281Smckusick TRUE, 5845281Smckusick TRUE, 5945281Smckusick TRUE, 6045281Smckusick FALSE, 6145281Smckusick FALSE, 6245281Smckusick TRUE, 6345281Smckusick TRUE, 6445281Smckusick TRUE, 6545281Smckusick TRUE, 6645281Smckusick TRUE, 6745281Smckusick TRUE, 6845281Smckusick TRUE, 6945281Smckusick TRUE, 7045281Smckusick TRUE, 7145281Smckusick }; 7241900Smckusick int nfs_sbwait(); 7341900Smckusick void nfs_disconnect(); 7445281Smckusick struct mbuf *nfs_compress(), *nfs_uncompress(); 7541900Smckusick 7638414Smckusick int nfsrv_null(), 7738414Smckusick nfsrv_getattr(), 7838414Smckusick nfsrv_setattr(), 7938414Smckusick nfsrv_lookup(), 8038414Smckusick nfsrv_readlink(), 8138414Smckusick nfsrv_read(), 8238414Smckusick nfsrv_write(), 8338414Smckusick nfsrv_create(), 8438414Smckusick nfsrv_remove(), 8538414Smckusick nfsrv_rename(), 8638414Smckusick nfsrv_link(), 8738414Smckusick nfsrv_symlink(), 8838414Smckusick nfsrv_mkdir(), 8938414Smckusick nfsrv_rmdir(), 9038414Smckusick nfsrv_readdir(), 9138414Smckusick nfsrv_statfs(), 9238414Smckusick nfsrv_noop(); 9338414Smckusick 9438414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 9538414Smckusick nfsrv_null, 9638414Smckusick nfsrv_getattr, 9738414Smckusick nfsrv_setattr, 9838414Smckusick nfsrv_noop, 9938414Smckusick nfsrv_lookup, 10038414Smckusick nfsrv_readlink, 10138414Smckusick nfsrv_read, 10238414Smckusick nfsrv_noop, 10338414Smckusick nfsrv_write, 10438414Smckusick nfsrv_create, 10538414Smckusick nfsrv_remove, 10638414Smckusick nfsrv_rename, 10738414Smckusick nfsrv_link, 10838414Smckusick nfsrv_symlink, 10938414Smckusick nfsrv_mkdir, 11038414Smckusick nfsrv_rmdir, 11138414Smckusick nfsrv_readdir, 11238414Smckusick nfsrv_statfs, 11338414Smckusick }; 11438414Smckusick 11540117Smckusick struct nfsreq nfsreqh; 11640117Smckusick int nfsrexmtthresh = NFS_FISHY; 11741900Smckusick int nfs_tcpnodelay = 1; 11838414Smckusick 11938414Smckusick /* 12041900Smckusick * Initialize sockets and congestion for a new NFS connection. 12140117Smckusick * We do not free the sockaddr if error. 12238414Smckusick */ 12341900Smckusick nfs_connect(nmp) 12440117Smckusick register struct nfsmount *nmp; 12540117Smckusick { 12641900Smckusick register struct socket *so; 12741900Smckusick int s, error; 12840117Smckusick struct mbuf *m; 12940117Smckusick 13041900Smckusick nmp->nm_so = (struct socket *)0; 13141900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 13241900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 13340117Smckusick goto bad; 13441900Smckusick so = nmp->nm_so; 13541900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 13640117Smckusick 13741900Smckusick /* 13841900Smckusick * Protocols that do not require connections may be optionally left 13941900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 14041900Smckusick */ 14141900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 14241900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 14341900Smckusick error = ENOTCONN; 14440117Smckusick goto bad; 14540117Smckusick } 14641900Smckusick } else { 14741900Smckusick if (error = soconnect(so, nmp->nm_nam)) 14840117Smckusick goto bad; 14941900Smckusick 15041900Smckusick /* 15141900Smckusick * Wait for the connection to complete. Cribbed from the 15241900Smckusick * connect system call but with the wait at negative prio. 15341900Smckusick */ 15441900Smckusick s = splnet(); 15541900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 15643351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 15741900Smckusick splx(s); 15841900Smckusick if (so->so_error) { 15941900Smckusick error = so->so_error; 16041900Smckusick goto bad; 16141900Smckusick } 16240117Smckusick } 16341900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 16443351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 16541900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16641900Smckusick so->so_snd.sb_timeo = (5 * hz); 16741900Smckusick } else { 16841900Smckusick so->so_rcv.sb_timeo = 0; 16941900Smckusick so->so_snd.sb_timeo = 0; 17041900Smckusick } 17147574Skarels if (error = soreserve(so, 17247574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR), NFS_MAXPACKET), 17347574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR), NFS_MAXPACKET))) 17441900Smckusick goto bad; 17541900Smckusick } else { 17643351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 17741900Smckusick so->so_rcv.sb_timeo = (5 * hz); 17841900Smckusick so->so_snd.sb_timeo = (5 * hz); 17941900Smckusick } else { 18041900Smckusick so->so_rcv.sb_timeo = 0; 18141900Smckusick so->so_snd.sb_timeo = 0; 18241900Smckusick } 18341900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 18441900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18541900Smckusick *mtod(m, int *) = 1; 18641900Smckusick m->m_len = sizeof(int); 18741900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 18841900Smckusick } 18941900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 19041900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 19141900Smckusick nfs_tcpnodelay) { 19241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 19341900Smckusick *mtod(m, int *) = 1; 19441900Smckusick m->m_len = sizeof(int); 19541900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 19641900Smckusick } 19741900Smckusick if (error = soreserve(so, 19847574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)), 19947574Skarels NFS_MAXPACKET + sizeof(u_long)), 20047574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long)), 20147574Skarels NFS_MAXPACKET + sizeof(u_long)))) 20241900Smckusick goto bad; 20341900Smckusick } 20441900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 20541900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 20640117Smckusick 20741900Smckusick /* Initialize other non-zero congestion variables */ 20841900Smckusick nmp->nm_rto = NFS_TIMEO; 20941900Smckusick nmp->nm_window = 2; /* Initial send window */ 21041900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 21141900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 21241900Smckusick nmp->nm_sent = 0; 21341900Smckusick nmp->nm_currexmit = 0; 21441900Smckusick return (0); 21540117Smckusick 21641900Smckusick bad: 21741900Smckusick nfs_disconnect(nmp); 21841900Smckusick return (error); 21941900Smckusick } 22040117Smckusick 22141900Smckusick /* 22241900Smckusick * Reconnect routine: 22341900Smckusick * Called when a connection is broken on a reliable protocol. 22441900Smckusick * - clean up the old socket 22541900Smckusick * - nfs_connect() again 22641900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 22741900Smckusick * If this fails the mount point is DEAD! 22841900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 22941900Smckusick */ 23041900Smckusick nfs_reconnect(rep, nmp) 23141900Smckusick register struct nfsreq *rep; 23241900Smckusick register struct nfsmount *nmp; 23341900Smckusick { 23441900Smckusick register struct nfsreq *rp; 23541900Smckusick int error; 23640117Smckusick 23747737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 23847737Skarels "trying reconnect"); 23941900Smckusick while (error = nfs_connect(nmp)) { 24042243Smckusick #ifdef lint 24142243Smckusick error = error; 24242243Smckusick #endif /* lint */ 24341900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 24441900Smckusick return (EINTR); 24543351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 24640117Smckusick } 24747737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 24847737Skarels "reconnected"); 24941900Smckusick 25041900Smckusick /* 25141900Smckusick * Loop through outstanding request list and fix up all requests 25241900Smckusick * on old socket. 25341900Smckusick */ 25441900Smckusick rp = nfsreqh.r_next; 25541900Smckusick while (rp != &nfsreqh) { 25641900Smckusick if (rp->r_nmp == nmp) 25741900Smckusick rp->r_flags |= R_MUSTRESEND; 25841900Smckusick rp = rp->r_next; 25940117Smckusick } 26040117Smckusick return (0); 26140117Smckusick } 26240117Smckusick 26340117Smckusick /* 26440117Smckusick * NFS disconnect. Clean up and unlink. 26540117Smckusick */ 26641900Smckusick void 26740117Smckusick nfs_disconnect(nmp) 26840117Smckusick register struct nfsmount *nmp; 26940117Smckusick { 27041900Smckusick register struct socket *so; 27140117Smckusick 27241900Smckusick if (nmp->nm_so) { 27341900Smckusick so = nmp->nm_so; 27441900Smckusick nmp->nm_so = (struct socket *)0; 27541900Smckusick soshutdown(so, 2); 27641900Smckusick soclose(so); 27740117Smckusick } 27840117Smckusick } 27940117Smckusick 28040117Smckusick /* 28141900Smckusick * This is the nfs send routine. For connection based socket types, it 28241900Smckusick * must be called with an nfs_solock() on the socket. 28341900Smckusick * "rep == NULL" indicates that it has been called from a server. 28440117Smckusick */ 28541900Smckusick nfs_send(so, nam, top, rep) 28638414Smckusick register struct socket *so; 28738414Smckusick struct mbuf *nam; 28841900Smckusick register struct mbuf *top; 28941900Smckusick struct nfsreq *rep; 29038414Smckusick { 29141900Smckusick struct mbuf *sendnam; 29241900Smckusick int error, soflags; 29338414Smckusick 29441900Smckusick if (rep) { 29541900Smckusick if (rep->r_flags & R_SOFTTERM) { 29640117Smckusick m_freem(top); 29741900Smckusick return (EINTR); 29840117Smckusick } 29943062Smckusick if (rep->r_nmp->nm_so == NULL && 30041900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 30141900Smckusick return (error); 30241900Smckusick rep->r_flags &= ~R_MUSTRESEND; 30343062Smckusick so = rep->r_nmp->nm_so; 30441900Smckusick soflags = rep->r_nmp->nm_soflags; 30541900Smckusick } else 30641900Smckusick soflags = so->so_proto->pr_flags; 30741900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 30841900Smckusick sendnam = (struct mbuf *)0; 30941900Smckusick else 31041900Smckusick sendnam = nam; 31141900Smckusick 31241900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 31341900Smckusick (struct mbuf *)0, 0); 31441900Smckusick if (error == EWOULDBLOCK && rep) { 31541900Smckusick if (rep->r_flags & R_SOFTTERM) 31641900Smckusick error = EINTR; 31741900Smckusick else { 31841900Smckusick rep->r_flags |= R_MUSTRESEND; 31941900Smckusick error = 0; 32040117Smckusick } 32138414Smckusick } 32241900Smckusick /* 32341900Smckusick * Ignore socket errors?? 32441900Smckusick */ 32541900Smckusick if (error && error != EINTR && error != ERESTART) 32641900Smckusick error = 0; 32738414Smckusick return (error); 32838414Smckusick } 32938414Smckusick 33038414Smckusick /* 33141900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 33241900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 33341900Smckusick * Mark and consolidate the data into a new mbuf list. 33441900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 33541900Smckusick * small mbufs. 33641900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 33741900Smckusick * we have read any of it, even if the system call has been interrupted. 33838414Smckusick */ 33941900Smckusick nfs_receive(so, aname, mp, rep) 34038414Smckusick register struct socket *so; 34138414Smckusick struct mbuf **aname; 34238414Smckusick struct mbuf **mp; 34341900Smckusick register struct nfsreq *rep; 34438414Smckusick { 34541900Smckusick struct uio auio; 34641900Smckusick struct iovec aio; 34738414Smckusick register struct mbuf *m; 34845281Smckusick struct mbuf *m2, *mnew, **mbp; 34941900Smckusick caddr_t fcp, tcp; 35041900Smckusick u_long len; 35141900Smckusick struct mbuf **getnam; 35247737Skarels int error, siz, mlen, soflags, rcvflg; 35338414Smckusick 35441900Smckusick /* 35541900Smckusick * Set up arguments for soreceive() 35641900Smckusick */ 35741900Smckusick *mp = (struct mbuf *)0; 35841900Smckusick *aname = (struct mbuf *)0; 35941900Smckusick if (rep) 36041900Smckusick soflags = rep->r_nmp->nm_soflags; 36141900Smckusick else 36241900Smckusick soflags = so->so_proto->pr_flags; 36338414Smckusick 36441900Smckusick /* 36541900Smckusick * For reliable protocols, lock against other senders/receivers 36641900Smckusick * in case a reconnect is necessary. 36741900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 36841900Smckusick * more there is to get. 36941900Smckusick * We must lock the socket against other receivers 37041900Smckusick * until we have an entire rpc request/reply. 37141900Smckusick */ 37241900Smckusick if (soflags & PR_CONNREQUIRED) { 37341900Smckusick tryagain: 37441900Smckusick /* 37541900Smckusick * Check for fatal errors and resending request. 37641900Smckusick */ 37741900Smckusick if (rep) { 37841900Smckusick /* 37941900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 38041900Smckusick * would have changed. NULL indicates a failed 38141900Smckusick * attempt that has essentially shut down this 38241900Smckusick * mount point. 38341900Smckusick */ 38441900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 38541900Smckusick (rep->r_flags & R_SOFTTERM)) 38641900Smckusick return (EINTR); 38741900Smckusick while (rep->r_flags & R_MUSTRESEND) { 38841900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 38941900Smckusick nfsstats.rpcretries++; 39041900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 39141900Smckusick rep)) 39241900Smckusick goto errout; 39340117Smckusick } 39441900Smckusick } 39541900Smckusick if ((soflags & PR_ATOMIC) == 0) { 39641900Smckusick aio.iov_base = (caddr_t) &len; 39741900Smckusick aio.iov_len = sizeof(u_long); 39841900Smckusick auio.uio_iov = &aio; 39941900Smckusick auio.uio_iovcnt = 1; 40041900Smckusick auio.uio_segflg = UIO_SYSSPACE; 40141900Smckusick auio.uio_rw = UIO_READ; 402*48048Smckusick auio.uio_procp = (struct proc *)0; 40341900Smckusick auio.uio_offset = 0; 40441900Smckusick auio.uio_resid = sizeof(u_long); 40541900Smckusick do { 40647737Skarels rcvflg = MSG_WAITALL; 40747737Skarels error = soreceive(so, (struct mbuf **)0, &auio, 40841900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 40947737Skarels if (error == EWOULDBLOCK && rep) { 41041900Smckusick if (rep->r_flags & R_SOFTTERM) 41141900Smckusick return (EINTR); 41241900Smckusick if (rep->r_flags & R_MUSTRESEND) 41341900Smckusick goto tryagain; 41447737Skarels } 41541900Smckusick } while (error == EWOULDBLOCK); 41647737Skarels if (!error && auio.uio_resid > 0) { 41747737Skarels if (rep) 41847737Skarels log(LOG_INFO, 41947737Skarels "short receive (%d/%d) from nfs server %s\n", 42047737Skarels sizeof(u_long) - auio.uio_resid, 42147737Skarels sizeof(u_long), 42247737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 42347737Skarels error = EPIPE; 42447737Skarels } 42540761Skarels if (error) 42641900Smckusick goto errout; 42741900Smckusick len = ntohl(len) & ~0x80000000; 42841900Smckusick /* 42941900Smckusick * This is SERIOUS! We are out of sync with the sender 43041900Smckusick * and forcing a disconnect/reconnect is all I can do. 43141900Smckusick */ 43241900Smckusick if (len > NFS_MAXPACKET) { 43347737Skarels if (rep) 43447737Skarels log(LOG_ERR, "%s (%d) from nfs server %s\n", 43547737Skarels "impossible packet length", 43647737Skarels len, 43747737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 43847737Skarels error = EFBIG; 43947737Skarels goto errout; 44041900Smckusick } 44141900Smckusick auio.uio_resid = len; 44241900Smckusick do { 44347737Skarels rcvflg = MSG_WAITALL; 44441900Smckusick error = soreceive(so, (struct mbuf **)0, 44541900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 44641900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 44741900Smckusick error == ERESTART); 44847737Skarels if (!error && auio.uio_resid > 0) { 44947737Skarels if (rep) 45047737Skarels log(LOG_INFO, 45147737Skarels "short receive (%d/%d) from nfs server %s\n", 45247737Skarels len - auio.uio_resid, len, 45347737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 45447737Skarels error = EPIPE; 45547737Skarels } 45640117Smckusick } else { 45741900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 45841900Smckusick do { 45947737Skarels rcvflg = 0; 46041900Smckusick error = soreceive(so, (struct mbuf **)0, 46141900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 46241900Smckusick if (error == EWOULDBLOCK && rep) { 46341900Smckusick if (rep->r_flags & R_SOFTTERM) 46441900Smckusick return (EINTR); 46541900Smckusick if (rep->r_flags & R_MUSTRESEND) 46641900Smckusick goto tryagain; 46741900Smckusick } 46841900Smckusick } while (error == EWOULDBLOCK); 46941900Smckusick if (!error && *mp == NULL) 47041900Smckusick error = EPIPE; 47141900Smckusick len -= auio.uio_resid; 47240117Smckusick } 47341900Smckusick errout: 47441900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 47541900Smckusick m_freem(*mp); 47641900Smckusick *mp = (struct mbuf *)0; 47747737Skarels if (error != EPIPE && rep) 47847737Skarels log(LOG_INFO, 47947737Skarels "receive error %d from nfs server %s\n", 48047737Skarels error, 48147737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 48241900Smckusick nfs_disconnect(rep->r_nmp); 48341900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 48441900Smckusick if (!error) 48541900Smckusick goto tryagain; 48640117Smckusick } 48741900Smckusick } else { 48841900Smckusick if (so->so_state & SS_ISCONNECTED) 48941900Smckusick getnam = (struct mbuf **)0; 49041900Smckusick else 49141900Smckusick getnam = aname; 49241900Smckusick auio.uio_resid = len = 1000000; 49341900Smckusick do { 49447737Skarels rcvflg = 0; 49541900Smckusick error = soreceive(so, getnam, &auio, mp, 49641900Smckusick (struct mbuf **)0, &rcvflg); 49741900Smckusick if (error == EWOULDBLOCK && rep && 49841900Smckusick (rep->r_flags & R_SOFTTERM)) 49941900Smckusick return (EINTR); 50041900Smckusick } while (error == EWOULDBLOCK); 50141900Smckusick len -= auio.uio_resid; 50241900Smckusick } 50341900Smckusick if (error) { 50441900Smckusick m_freem(*mp); 50541900Smckusick *mp = (struct mbuf *)0; 50641900Smckusick } 50741900Smckusick /* 50841900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 50941900Smckusick * These could cause pointer alignment problems, so copy them to 51041900Smckusick * well aligned mbufs. 51141900Smckusick */ 51241900Smckusick m = *mp; 51341900Smckusick mbp = mp; 51441900Smckusick while (m) { 51541900Smckusick /* 51641900Smckusick * All this for something that may never happen. 51741900Smckusick */ 51845281Smckusick if (m->m_next && (m->m_len & 0x3)) { 51941900Smckusick printf("nfs_rcv odd length!\n"); 52042243Smckusick mlen = 0; 52141900Smckusick while (m) { 52245281Smckusick fcp = mtod(m, caddr_t); 52345281Smckusick while (m->m_len > 0) { 52445281Smckusick if (mlen == 0) { 52545281Smckusick MGET(m2, M_WAIT, MT_DATA); 52645281Smckusick if (len >= MINCLSIZE) 52745281Smckusick MCLGET(m2, M_WAIT); 52845281Smckusick m2->m_len = 0; 52945281Smckusick mlen = M_TRAILINGSPACE(m2); 53045281Smckusick tcp = mtod(m2, caddr_t); 53145281Smckusick *mbp = m2; 53245281Smckusick mbp = &m2->m_next; 53345281Smckusick } 53445281Smckusick siz = MIN(mlen, m->m_len); 53545281Smckusick bcopy(fcp, tcp, siz); 53645281Smckusick m2->m_len += siz; 53745281Smckusick mlen -= siz; 53845281Smckusick len -= siz; 53945281Smckusick tcp += siz; 54045281Smckusick m->m_len -= siz; 54145281Smckusick fcp += siz; 54241900Smckusick } 54345281Smckusick MFREE(m, mnew); 54445281Smckusick m = mnew; 54541900Smckusick } 54641900Smckusick break; 54740117Smckusick } 54841900Smckusick len -= m->m_len; 54941900Smckusick mbp = &m->m_next; 55041900Smckusick m = m->m_next; 55138414Smckusick } 55238414Smckusick return (error); 55338414Smckusick } 55438414Smckusick 55538414Smckusick /* 55641900Smckusick * Implement receipt of reply on a socket. 55738414Smckusick * We must search through the list of received datagrams matching them 55838414Smckusick * with outstanding requests using the xid, until ours is found. 55938414Smckusick */ 56041900Smckusick /* ARGSUSED */ 56141900Smckusick nfs_reply(nmp, myrep) 56241900Smckusick struct nfsmount *nmp; 56339344Smckusick struct nfsreq *myrep; 56438414Smckusick { 56538414Smckusick register struct mbuf *m; 56638414Smckusick register struct nfsreq *rep; 56741900Smckusick register int error = 0; 56845281Smckusick u_long rxid; 56941900Smckusick struct mbuf *mp, *nam; 57041900Smckusick char *cp; 57141900Smckusick int cnt, xfer; 57238414Smckusick 57341900Smckusick /* 57441900Smckusick * Loop around until we get our own reply 57541900Smckusick */ 57641900Smckusick for (;;) { 57741900Smckusick /* 57841900Smckusick * Lock against other receivers so that I don't get stuck in 57941900Smckusick * sbwait() after someone else has received my reply for me. 58041900Smckusick * Also necessary for connection based protocols to avoid 58141900Smckusick * race conditions during a reconnect. 58241900Smckusick */ 58343351Smckusick nfs_solock(&nmp->nm_flag); 58441900Smckusick /* Already received, bye bye */ 58541900Smckusick if (myrep->r_mrep != NULL) { 58641900Smckusick nfs_sounlock(&nmp->nm_flag); 58741900Smckusick return (0); 58840117Smckusick } 58941900Smckusick /* 59041900Smckusick * Get the next Rpc reply off the socket 59141900Smckusick */ 59241900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 59341900Smckusick nfs_sounlock(&nmp->nm_flag); 59438414Smckusick 59541900Smckusick /* 59641900Smckusick * Ignore routing errors on connectionless protocols?? 59741900Smckusick */ 59841900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 59941900Smckusick nmp->nm_so->so_error = 0; 60041900Smckusick continue; 60141900Smckusick } 60241900Smckusick 60341900Smckusick /* 60441900Smckusick * Otherwise cleanup and return a fatal error. 60541900Smckusick */ 60641900Smckusick if (myrep->r_flags & R_TIMING) { 60741900Smckusick myrep->r_flags &= ~R_TIMING; 60841900Smckusick nmp->nm_rtt = -1; 60941900Smckusick } 61041900Smckusick if (myrep->r_flags & R_SENT) { 61141900Smckusick myrep->r_flags &= ~R_SENT; 61241900Smckusick nmp->nm_sent--; 61341900Smckusick } 61441900Smckusick return (error); 61538414Smckusick } 61641900Smckusick 61741900Smckusick /* 61841900Smckusick * Get the xid and check that it is an rpc reply 61941900Smckusick */ 62041900Smckusick m = mp; 62145281Smckusick while (m && m->m_len == 0) 62245281Smckusick m = m->m_next; 62345281Smckusick if (m == NULL) { 62440117Smckusick nfsstats.rpcinvalid++; 62541900Smckusick m_freem(mp); 62641900Smckusick nfs_sounlock(&nmp->nm_flag); 62741900Smckusick continue; 62838414Smckusick } 62945281Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED); 63041900Smckusick /* 63141900Smckusick * Loop through the request list to match up the reply 63241900Smckusick * Iff no match, just drop the datagram 63341900Smckusick */ 63441900Smckusick m = mp; 63541900Smckusick rep = nfsreqh.r_next; 63641900Smckusick while (rep != &nfsreqh) { 63745281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 63841900Smckusick /* Found it.. */ 63941900Smckusick rep->r_mrep = m; 64041900Smckusick /* 64141900Smckusick * Update timing 64241900Smckusick */ 64341900Smckusick if (rep->r_flags & R_TIMING) { 64441900Smckusick nfs_updatetimer(rep->r_nmp); 64541900Smckusick rep->r_flags &= ~R_TIMING; 64641900Smckusick rep->r_nmp->nm_rtt = -1; 64741900Smckusick } 64841900Smckusick if (rep->r_flags & R_SENT) { 64941900Smckusick rep->r_flags &= ~R_SENT; 65041900Smckusick rep->r_nmp->nm_sent--; 65141900Smckusick } 65240117Smckusick break; 65338414Smckusick } 65441900Smckusick rep = rep->r_next; 65538414Smckusick } 65641900Smckusick nfs_sounlock(&nmp->nm_flag); 65741900Smckusick if (nam) 65841900Smckusick m_freem(nam); 65941900Smckusick /* 66041900Smckusick * If not matched to a request, drop it. 66141900Smckusick * If it's mine, get out. 66241900Smckusick */ 66341900Smckusick if (rep == &nfsreqh) { 66441900Smckusick nfsstats.rpcunexpected++; 66541900Smckusick m_freem(m); 66641900Smckusick } else if (rep == myrep) 66741900Smckusick return (0); 66838414Smckusick } 66938414Smckusick } 67038414Smckusick 67138414Smckusick /* 67238414Smckusick * nfs_request - goes something like this 67338414Smckusick * - fill in request struct 67438414Smckusick * - links it into list 67541900Smckusick * - calls nfs_send() for first transmit 67641900Smckusick * - calls nfs_receive() to get reply 67738414Smckusick * - break down rpc header and return with nfs reply pointed to 67838414Smckusick * by mrep or error 67938414Smckusick * nb: always frees up mreq mbuf list 68038414Smckusick */ 68143351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 68238414Smckusick struct vnode *vp; 68338414Smckusick struct mbuf *mreq; 68438414Smckusick u_long xid; 68541900Smckusick int procnum; 68641900Smckusick struct proc *procp; 68743351Smckusick int tryhard; 68838414Smckusick struct mount *mp; 68938414Smckusick struct mbuf **mrp; 69038414Smckusick struct mbuf **mdp; 69138414Smckusick caddr_t *dposp; 69238414Smckusick { 69338414Smckusick register struct mbuf *m, *mrep; 69438414Smckusick register struct nfsreq *rep; 695*48048Smckusick register u_long *tl; 69638414Smckusick register int len; 69741900Smckusick struct nfsmount *nmp; 69838414Smckusick struct mbuf *md; 69939344Smckusick struct nfsreq *reph; 70038414Smckusick caddr_t dpos; 70138414Smckusick char *cp2; 70238414Smckusick int t1; 70345281Smckusick int s, compressed; 70441900Smckusick int error = 0; 70538414Smckusick 70641900Smckusick nmp = VFSTONFS(mp); 70738414Smckusick m = mreq; 70838414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70938414Smckusick rep->r_xid = xid; 71041900Smckusick rep->r_nmp = nmp; 71138414Smckusick rep->r_vp = vp; 71241900Smckusick rep->r_procp = procp; 71343351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 71443351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 71541900Smckusick rep->r_retry = nmp->nm_retry; 71638414Smckusick else 71740117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71840117Smckusick rep->r_flags = rep->r_rexmit = 0; 71941900Smckusick /* 72041900Smckusick * Three cases: 72141900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 72241900Smckusick * - idempotent requests on SOCK_DGRAM use 0 72341900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 72441900Smckusick * Timeouts are still done on reliable transports to ensure detection 72543351Smckusick * of excessive connection delay. 72641900Smckusick */ 72741900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 72841900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 72941900Smckusick else if (nonidempotent[procnum]) 73041900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 73141900Smckusick else 73241900Smckusick rep->r_timerinit = 0; 73341900Smckusick rep->r_timer = rep->r_timerinit; 73438414Smckusick rep->r_mrep = NULL; 73538414Smckusick len = 0; 73638414Smckusick while (m) { 73738414Smckusick len += m->m_len; 73838414Smckusick m = m->m_next; 73938414Smckusick } 74041900Smckusick mreq->m_pkthdr.len = len; 74141900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 74245281Smckusick compressed = 0; 74345281Smckusick m = mreq; 74445281Smckusick if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) { 74545281Smckusick mreq = nfs_compress(mreq); 74645281Smckusick if (mreq != m) { 74745281Smckusick len = mreq->m_pkthdr.len; 74845281Smckusick compressed++; 74945281Smckusick } 75045281Smckusick } 75141900Smckusick /* 75241900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 75341900Smckusick */ 75441900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 75541900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 75641900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 75741900Smckusick } 75841900Smckusick rep->r_mreq = mreq; 75938414Smckusick 76040117Smckusick /* 76140117Smckusick * Do the client side RPC. 76240117Smckusick */ 76340117Smckusick nfsstats.rpcrequests++; 76441900Smckusick /* 76541900Smckusick * Chain request into list of outstanding requests. Be sure 76641900Smckusick * to put it LAST so timer finds oldest requests first. 76741900Smckusick */ 76840117Smckusick s = splnet(); 76939344Smckusick reph = &nfsreqh; 77041900Smckusick reph->r_prev->r_next = rep; 77141900Smckusick rep->r_prev = reph->r_prev; 77239344Smckusick reph->r_prev = rep; 77339344Smckusick rep->r_next = reph; 77440117Smckusick /* 77540117Smckusick * If backing off another request or avoiding congestion, don't 77640117Smckusick * send this one now but let timer do it. If not timing a request, 77740117Smckusick * do it now. 77840117Smckusick */ 77941900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 78041900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 78141900Smckusick nmp->nm_sent++; 78241900Smckusick rep->r_flags |= R_SENT; 78341900Smckusick if (nmp->nm_rtt == -1) { 78441900Smckusick nmp->nm_rtt = 0; 78541900Smckusick rep->r_flags |= R_TIMING; 78641900Smckusick } 78740117Smckusick splx(s); 78841900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 78941900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 79043351Smckusick nfs_solock(&nmp->nm_flag); 79141900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 79241900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 79341900Smckusick nfs_sounlock(&nmp->nm_flag); 79441900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 79541900Smckusick nmp->nm_so->so_error = error = 0; 79641900Smckusick } else 79741900Smckusick splx(s); 79838414Smckusick 79938414Smckusick /* 80040117Smckusick * Wait for the reply from our send or the timer's. 80140117Smckusick */ 80241900Smckusick if (!error) 80341900Smckusick error = nfs_reply(nmp, rep); 80438414Smckusick 80540117Smckusick /* 80640117Smckusick * RPC done, unlink the request. 80740117Smckusick */ 80838414Smckusick s = splnet(); 80938414Smckusick rep->r_prev->r_next = rep->r_next; 81039344Smckusick rep->r_next->r_prev = rep->r_prev; 81138414Smckusick splx(s); 81241900Smckusick 81341900Smckusick /* 81441900Smckusick * If there was a successful reply and a tprintf msg. 81541900Smckusick * tprintf a response. 81641900Smckusick */ 81747737Skarels if (!error && (rep->r_flags & R_TPRINTFMSG)) 81847737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 81947737Skarels "is alive again"); 82038414Smckusick m_freem(rep->r_mreq); 82145281Smckusick mrep = rep->r_mrep; 82238414Smckusick FREE((caddr_t)rep, M_NFSREQ); 82338414Smckusick if (error) 82438414Smckusick return (error); 82538414Smckusick 82645281Smckusick if (compressed) 82745281Smckusick mrep = nfs_uncompress(mrep); 82845281Smckusick md = mrep; 82938414Smckusick /* 83038414Smckusick * break down the rpc header and check if ok 83138414Smckusick */ 83238414Smckusick dpos = mtod(md, caddr_t); 833*48048Smckusick nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED); 834*48048Smckusick tl += 2; 835*48048Smckusick if (*tl++ == rpc_msgdenied) { 836*48048Smckusick if (*tl == rpc_mismatch) 83738414Smckusick error = EOPNOTSUPP; 83838414Smckusick else 83938414Smckusick error = EACCES; 84038414Smckusick m_freem(mrep); 84138414Smckusick return (error); 84238414Smckusick } 84338414Smckusick /* 84438414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 84538414Smckusick * for nfs_reqhead(), but for now just dump it 84638414Smckusick */ 847*48048Smckusick if (*++tl != 0) { 848*48048Smckusick len = nfsm_rndup(fxdr_unsigned(long, *tl)); 84938414Smckusick nfsm_adv(len); 85038414Smckusick } 851*48048Smckusick nfsm_disect(tl, u_long *, NFSX_UNSIGNED); 85238414Smckusick /* 0 == ok */ 853*48048Smckusick if (*tl == 0) { 854*48048Smckusick nfsm_disect(tl, u_long *, NFSX_UNSIGNED); 855*48048Smckusick if (*tl != 0) { 856*48048Smckusick error = fxdr_unsigned(int, *tl); 85738414Smckusick m_freem(mrep); 85838414Smckusick return (error); 85938414Smckusick } 86038414Smckusick *mrp = mrep; 86138414Smckusick *mdp = md; 86238414Smckusick *dposp = dpos; 86338414Smckusick return (0); 86438414Smckusick } 86538414Smckusick m_freem(mrep); 86638414Smckusick return (EPROTONOSUPPORT); 86738414Smckusick nfsmout: 86838414Smckusick return (error); 86938414Smckusick } 87038414Smckusick 87138414Smckusick /* 87238414Smckusick * Get a request for the server main loop 87338414Smckusick * - receive a request via. nfs_soreceive() 87438414Smckusick * - verify it 87538414Smckusick * - fill in the cred struct. 87638414Smckusick */ 87742243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 87845281Smckusick msk, mtch, wascomp) 87938414Smckusick struct socket *so; 88038414Smckusick u_long prog; 88138414Smckusick u_long vers; 88238414Smckusick int maxproc; 88338414Smckusick struct mbuf **nam; 88438414Smckusick struct mbuf **mrp; 88538414Smckusick struct mbuf **mdp; 88638414Smckusick caddr_t *dposp; 88738414Smckusick u_long *retxid; 88842243Smckusick u_long *procnum; 88938414Smckusick register struct ucred *cr; 89041900Smckusick struct mbuf *msk, *mtch; 89145281Smckusick int *wascomp; 89238414Smckusick { 89338414Smckusick register int i; 894*48048Smckusick register u_long *tl; 89539494Smckusick register long t1; 89639494Smckusick caddr_t dpos, cp2; 89739494Smckusick int error = 0; 89839494Smckusick struct mbuf *mrep, *md; 89939494Smckusick int len; 90038414Smckusick 90141900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 90241900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90341900Smckusick } else { 90441900Smckusick mrep = (struct mbuf *)0; 90541900Smckusick do { 90641900Smckusick if (mrep) { 90741900Smckusick m_freem(*nam); 90841900Smckusick m_freem(mrep); 90941900Smckusick } 91041900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 91141900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 91241900Smckusick } 91341900Smckusick if (error) 91438414Smckusick return (error); 91538414Smckusick md = mrep; 91645281Smckusick mrep = nfs_uncompress(mrep); 91745281Smckusick if (mrep != md) { 91845281Smckusick *wascomp = 1; 91945281Smckusick md = mrep; 92045281Smckusick } else 92145281Smckusick *wascomp = 0; 92238414Smckusick dpos = mtod(mrep, caddr_t); 923*48048Smckusick nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED); 924*48048Smckusick *retxid = *tl++; 925*48048Smckusick if (*tl++ != rpc_call) { 92638414Smckusick m_freem(mrep); 92738414Smckusick return (ERPCMISMATCH); 92838414Smckusick } 929*48048Smckusick if (*tl++ != rpc_vers) { 93038414Smckusick m_freem(mrep); 93138414Smckusick return (ERPCMISMATCH); 93238414Smckusick } 933*48048Smckusick if (*tl++ != prog) { 93438414Smckusick m_freem(mrep); 93538414Smckusick return (EPROGUNAVAIL); 93638414Smckusick } 937*48048Smckusick if (*tl++ != vers) { 93838414Smckusick m_freem(mrep); 93938414Smckusick return (EPROGMISMATCH); 94038414Smckusick } 941*48048Smckusick *procnum = fxdr_unsigned(u_long, *tl++); 94242243Smckusick if (*procnum == NFSPROC_NULL) { 94338414Smckusick *mrp = mrep; 94438414Smckusick return (0); 94538414Smckusick } 946*48048Smckusick if (*procnum > maxproc || *tl++ != rpc_auth_unix) { 94738414Smckusick m_freem(mrep); 94838414Smckusick return (EPROCUNAVAIL); 94938414Smckusick } 950*48048Smckusick len = fxdr_unsigned(int, *tl++); 95141900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 95241900Smckusick m_freem(mrep); 95341900Smckusick return (EBADRPC); 95441900Smckusick } 955*48048Smckusick len = fxdr_unsigned(int, *++tl); 95641900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 95741900Smckusick m_freem(mrep); 95841900Smckusick return (EBADRPC); 95941900Smckusick } 96039494Smckusick nfsm_adv(nfsm_rndup(len)); 961*48048Smckusick nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED); 962*48048Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *tl++); 963*48048Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *tl++); 964*48048Smckusick len = fxdr_unsigned(int, *tl); 96541900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 96638414Smckusick m_freem(mrep); 96738414Smckusick return (EBADRPC); 96838414Smckusick } 969*48048Smckusick nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 97039494Smckusick for (i = 1; i <= len; i++) 97141900Smckusick if (i < NGROUPS) 972*48048Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 97341900Smckusick else 974*48048Smckusick tl++; 97541900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 97638414Smckusick /* 97738414Smckusick * Do we have any use for the verifier. 97838414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 97938414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 98038414Smckusick * For now, just skip over it 98138414Smckusick */ 982*48048Smckusick len = fxdr_unsigned(int, *++tl); 98341900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 98441900Smckusick m_freem(mrep); 98541900Smckusick return (EBADRPC); 98641900Smckusick } 98739494Smckusick if (len > 0) 98839494Smckusick nfsm_adv(nfsm_rndup(len)); 98938414Smckusick *mrp = mrep; 99038414Smckusick *mdp = md; 99138414Smckusick *dposp = dpos; 99238414Smckusick return (0); 99338414Smckusick nfsmout: 99438414Smckusick return (error); 99538414Smckusick } 99638414Smckusick 99738414Smckusick /* 99838414Smckusick * Generate the rpc reply header 99938414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 100038414Smckusick */ 100138414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 100238414Smckusick int siz; 100338414Smckusick u_long retxid; 100438414Smckusick int err; 100538414Smckusick struct mbuf **mrq; 100638414Smckusick struct mbuf **mbp; 100738414Smckusick caddr_t *bposp; 100838414Smckusick { 1009*48048Smckusick register u_long *tl; 101039494Smckusick register long t1; 101139494Smckusick caddr_t bpos; 101239494Smckusick struct mbuf *mreq, *mb, *mb2; 101338414Smckusick 101438414Smckusick NFSMGETHDR(mreq); 101538414Smckusick mb = mreq; 101638414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 101741900Smckusick MCLGET(mreq, M_WAIT); 1018*48048Smckusick tl = mtod(mreq, u_long *); 101938414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 1020*48048Smckusick bpos = ((caddr_t)tl)+mreq->m_len; 1021*48048Smckusick *tl++ = retxid; 1022*48048Smckusick *tl++ = rpc_reply; 102338414Smckusick if (err == ERPCMISMATCH) { 1024*48048Smckusick *tl++ = rpc_msgdenied; 1025*48048Smckusick *tl++ = rpc_mismatch; 1026*48048Smckusick *tl++ = txdr_unsigned(2); 1027*48048Smckusick *tl = txdr_unsigned(2); 102838414Smckusick } else { 1029*48048Smckusick *tl++ = rpc_msgaccepted; 1030*48048Smckusick *tl++ = 0; 1031*48048Smckusick *tl++ = 0; 103238414Smckusick switch (err) { 103338414Smckusick case EPROGUNAVAIL: 1034*48048Smckusick *tl = txdr_unsigned(RPC_PROGUNAVAIL); 103538414Smckusick break; 103638414Smckusick case EPROGMISMATCH: 1037*48048Smckusick *tl = txdr_unsigned(RPC_PROGMISMATCH); 1038*48048Smckusick nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 1039*48048Smckusick *tl++ = txdr_unsigned(2); 1040*48048Smckusick *tl = txdr_unsigned(2); /* someday 3 */ 104138414Smckusick break; 104238414Smckusick case EPROCUNAVAIL: 1043*48048Smckusick *tl = txdr_unsigned(RPC_PROCUNAVAIL); 104438414Smckusick break; 104538414Smckusick default: 1046*48048Smckusick *tl = 0; 104738414Smckusick if (err != VNOVAL) { 1048*48048Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1049*48048Smckusick *tl = txdr_unsigned(err); 105038414Smckusick } 105138414Smckusick break; 105238414Smckusick }; 105338414Smckusick } 105438414Smckusick *mrq = mreq; 105538414Smckusick *mbp = mb; 105638414Smckusick *bposp = bpos; 105738414Smckusick if (err != 0 && err != VNOVAL) 105838414Smckusick nfsstats.srvrpc_errs++; 105938414Smckusick return (0); 106038414Smckusick } 106138414Smckusick 106238414Smckusick /* 106338414Smckusick * Nfs timer routine 106438414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 106538414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 106640117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 106738414Smckusick */ 106838414Smckusick nfs_timer() 106938414Smckusick { 107038414Smckusick register struct nfsreq *rep; 107138414Smckusick register struct mbuf *m; 107238414Smckusick register struct socket *so; 107341900Smckusick register struct nfsmount *nmp; 107440117Smckusick int s, error; 107538414Smckusick 107638414Smckusick s = splnet(); 107741900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 107841900Smckusick nmp = rep->r_nmp; 107941900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 108041900Smckusick (so = nmp->nm_so) == NULL) 108141900Smckusick continue; 108241900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 108341900Smckusick rep->r_flags |= R_SOFTTERM; 108441900Smckusick continue; 108541900Smckusick } 108640117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 108741900Smckusick nmp->nm_rtt++; 108841900Smckusick /* If not timed out */ 108941900Smckusick if (++rep->r_timer < nmp->nm_rto) 109041900Smckusick continue; 109140117Smckusick /* Do backoff and save new timeout in mount */ 109240117Smckusick if (rep->r_flags & R_TIMING) { 109341900Smckusick nfs_backofftimer(nmp); 109440117Smckusick rep->r_flags &= ~R_TIMING; 109541900Smckusick nmp->nm_rtt = -1; 109640117Smckusick } 109740117Smckusick if (rep->r_flags & R_SENT) { 109840117Smckusick rep->r_flags &= ~R_SENT; 109941900Smckusick nmp->nm_sent--; 110040117Smckusick } 110141900Smckusick 110241900Smckusick /* 110341900Smckusick * Check for too many retries on soft mount. 110441900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 110541900Smckusick */ 110641900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 110740117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 110840117Smckusick 110941900Smckusick /* 111041900Smckusick * Check for server not responding 111141900Smckusick */ 111241900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 111343351Smckusick rep->r_rexmit > NFS_FISHY) { 111447737Skarels nfs_msg(rep->r_procp, 111547737Skarels nmp->nm_mountp->mnt_stat.f_mntfromname, 111647737Skarels "not responding"); 111741900Smckusick rep->r_flags |= R_TPRINTFMSG; 111841900Smckusick } 111943351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 112041900Smckusick nfsstats.rpctimeouts++; 112141900Smckusick rep->r_flags |= R_SOFTTERM; 112241900Smckusick continue; 112341900Smckusick } 112443351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 112543351Smckusick continue; 112641900Smckusick 112741900Smckusick /* 112841900Smckusick * If there is enough space and the window allows.. 112941900Smckusick * Resend it 113041900Smckusick */ 113141900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 113241900Smckusick nmp->nm_sent < nmp->nm_window && 113341900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 113441900Smckusick nfsstats.rpcretries++; 113541900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 113641900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113741900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113841900Smckusick else 113941900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 114041900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 114141900Smckusick if (error) { 114241900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 114341900Smckusick so->so_error = 0; 114441900Smckusick } else { 114541900Smckusick /* 114641900Smckusick * We need to time the request even though we 114741900Smckusick * are retransmitting. 114841900Smckusick */ 114941900Smckusick nmp->nm_rtt = 0; 115041900Smckusick nmp->nm_sent++; 115141900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 115241900Smckusick rep->r_timer = rep->r_timerinit; 115341900Smckusick } 115441900Smckusick } 115540117Smckusick } 115640117Smckusick splx(s); 115740117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115840117Smckusick } 115940117Smckusick 116040117Smckusick /* 116140117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 116240117Smckusick * used here. The timer state is held in the nfsmount structure and 116340117Smckusick * a single request is used to clock the response. When successful 116440117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 116540117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116640117Smckusick * routines. 116740117Smckusick * 116840117Smckusick * Congestion variables are held in the nfshost structure which 116940117Smckusick * is referenced by nfsmounts and shared per-server. This separation 117040117Smckusick * makes it possible to do per-mount timing which allows varying disk 117140117Smckusick * access times to be dealt with, while preserving a network oriented 117240117Smckusick * congestion control scheme. 117340117Smckusick * 117440117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 117540117Smckusick * with adjusted scaling factors. We start with one request, then send 117640117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117740117Smckusick * we increment at a rate proportional to the window. On failure, we 117840117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117940117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 118040117Smckusick * for now. 118140117Smckusick * 118240117Smckusick * NFS behaves much more like a transport protocol with these changes, 118340117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 118440117Smckusick * implementations. 118540117Smckusick * 118640117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118740117Smckusick */ 118840117Smckusick 118940117Smckusick /* 119040117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 119140117Smckusick * responds only after performing lookups/diskio/etc, we have to be 119240117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 119341900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 119440117Smckusick */ 119541900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119640117Smckusick 119741900Smckusick nfs_updatetimer(nmp) 119841900Smckusick register struct nfsmount *nmp; 119940117Smckusick { 120040117Smckusick 120140117Smckusick /* If retransmitted, clear and return */ 120241900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 120341900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 120440117Smckusick return; 120540117Smckusick } 120640117Smckusick /* If have a measurement, do smoothing */ 120741900Smckusick if (nmp->nm_srtt) { 120840117Smckusick register short delta; 120941900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 121041900Smckusick if ((nmp->nm_srtt += delta) <= 0) 121141900Smckusick nmp->nm_srtt = 1; 121240117Smckusick if (delta < 0) 121340117Smckusick delta = -delta; 121441900Smckusick delta -= (nmp->nm_rttvar >> 2); 121541900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 121641900Smckusick nmp->nm_rttvar = 1; 121740117Smckusick /* Else initialize */ 121840117Smckusick } else { 121941900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 122041900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 122141900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 122240117Smckusick } 122340117Smckusick /* Compute new Retransmission TimeOut and clip */ 122441900Smckusick nmp->nm_rto = NFS_RTO(nmp); 122541900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 122641900Smckusick nmp->nm_rto = NFS_MINTIMEO; 122741900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122841900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122940117Smckusick 123040117Smckusick /* Update window estimate */ 123141900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 123241900Smckusick nmp->nm_window += 4; 123340117Smckusick else { /* slowly */ 123441900Smckusick register long incr = ++nmp->nm_winext; 123541900Smckusick incr = (incr * incr) / nmp->nm_window; 123640117Smckusick if (incr > 0) { 123741900Smckusick nmp->nm_winext = 0; 123841900Smckusick ++nmp->nm_window; 123940117Smckusick } 124040117Smckusick } 124141900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 124241900Smckusick nmp->nm_window = NFS_MAXWINDOW; 124340117Smckusick } 124440117Smckusick 124541900Smckusick nfs_backofftimer(nmp) 124641900Smckusick register struct nfsmount *nmp; 124740117Smckusick { 124840117Smckusick register unsigned long newrto; 124940117Smckusick 125040117Smckusick /* Clip shift count */ 125141900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 125241900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 125340117Smckusick /* Back off RTO exponentially */ 125441900Smckusick newrto = NFS_RTO(nmp); 125541900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125640117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125740117Smckusick newrto = NFS_MAXTIMEO; 125841900Smckusick nmp->nm_rto = newrto; 125940117Smckusick 126040117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 126141900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 126241900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 126341900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 126441900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 126541900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 126641900Smckusick nmp->nm_srtt = 0; 126738414Smckusick } 126838414Smckusick } 126938414Smckusick } 127040117Smckusick /* Close down window but remember this point (3/4 current) for later */ 127141900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 127241900Smckusick nmp->nm_window = 1; 127341900Smckusick nmp->nm_winext = 0; 127438414Smckusick } 127538414Smckusick 127638414Smckusick /* 127741900Smckusick * Test for a termination signal pending on procp. 127841900Smckusick * This is used for NFSMNT_INT mounts. 127938414Smckusick */ 128041900Smckusick nfs_sigintr(p) 128141900Smckusick register struct proc *p; 128241900Smckusick { 128341900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 128441900Smckusick NFSINT_SIGMASK)) 128541900Smckusick return (1); 128641900Smckusick else 128741900Smckusick return (0); 128841900Smckusick } 128940117Smckusick 129047737Skarels nfs_msg(p, server, msg) 129147737Skarels struct proc *p; 129247737Skarels char *server, *msg; 129347737Skarels { 129447737Skarels tpr_t tpr; 129547737Skarels 129647737Skarels if (p) 129747737Skarels tpr = tprintf_open(p); 129847737Skarels else 129947737Skarels tpr = NULL; 130047737Skarels tprintf(tpr, "nfs server %s: %s\n", server, msg); 130147737Skarels tprintf_close(tpr); 130247737Skarels } 130347737Skarels 130441900Smckusick /* 130541900Smckusick * Lock a socket against others. 130641900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 130741900Smckusick * and also to avoid race conditions between the processes with nfs requests 130841900Smckusick * in progress when a reconnect is necessary. 130941900Smckusick */ 131043351Smckusick nfs_solock(flagp) 131143351Smckusick register int *flagp; 131238414Smckusick { 131340117Smckusick 131441900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 131541900Smckusick *flagp |= NFSMNT_WANTSCK; 131643351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 131740117Smckusick } 131841900Smckusick *flagp |= NFSMNT_SCKLOCK; 131941900Smckusick } 132040117Smckusick 132141900Smckusick /* 132241900Smckusick * Unlock the stream socket for others. 132341900Smckusick */ 132441900Smckusick nfs_sounlock(flagp) 132543351Smckusick register int *flagp; 132641900Smckusick { 132741900Smckusick 132841900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 132941900Smckusick panic("nfs sounlock"); 133041900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 133141900Smckusick if (*flagp & NFSMNT_WANTSCK) { 133241900Smckusick *flagp &= ~NFSMNT_WANTSCK; 133341900Smckusick wakeup((caddr_t)flagp); 133440117Smckusick } 133538414Smckusick } 133641900Smckusick 133741900Smckusick /* 133841900Smckusick * This function compares two net addresses by family and returns TRUE 133941900Smckusick * if they are the same. 134041900Smckusick * If there is any doubt, return FALSE. 134141900Smckusick */ 134241900Smckusick nfs_netaddr_match(nam1, nam2) 134341900Smckusick struct mbuf *nam1, *nam2; 134441900Smckusick { 134541900Smckusick register struct sockaddr *saddr1, *saddr2; 134641900Smckusick 134741900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 134841900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 134941900Smckusick if (saddr1->sa_family != saddr2->sa_family) 135041900Smckusick return (0); 135141900Smckusick 135241900Smckusick /* 135341900Smckusick * Must do each address family separately since unused fields 135441900Smckusick * are undefined values and not always zeroed. 135541900Smckusick */ 135641900Smckusick switch (saddr1->sa_family) { 135741900Smckusick case AF_INET: 135841900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 135941900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 136041900Smckusick return (1); 136141900Smckusick break; 136241900Smckusick default: 136341900Smckusick break; 136441900Smckusick }; 136541900Smckusick return (0); 136641900Smckusick } 136741900Smckusick 136841900Smckusick /* 136941900Smckusick * Check the hostname fields for nfsd's mask and match fields. 137041900Smckusick * By address family: 137141900Smckusick * - Bitwise AND the mask with the host address field 137241900Smckusick * - Compare for == with match 137341900Smckusick * return TRUE if not equal 137441900Smckusick */ 137541900Smckusick nfs_badnam(nam, msk, mtch) 137641900Smckusick register struct mbuf *nam, *msk, *mtch; 137741900Smckusick { 137841900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 137941900Smckusick case AF_INET: 138041900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 138141900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 138241900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 138341900Smckusick default: 138441900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 138541900Smckusick return (0); 138641900Smckusick }; 138741900Smckusick } 1388