138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 8*44511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*44511Sbostic * @(#)nfs_socket.c 7.18 (Berkeley) 06/28/90 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1738414Smckusick #include "types.h" 1838414Smckusick #include "param.h" 1938414Smckusick #include "uio.h" 2038414Smckusick #include "user.h" 2140117Smckusick #include "proc.h" 2240117Smckusick #include "signal.h" 2338414Smckusick #include "mount.h" 2438414Smckusick #include "kernel.h" 2538414Smckusick #include "malloc.h" 2638414Smckusick #include "mbuf.h" 2738414Smckusick #include "vnode.h" 2838414Smckusick #include "domain.h" 2938414Smckusick #include "protosw.h" 3038414Smckusick #include "socket.h" 3138414Smckusick #include "socketvar.h" 3242877Smckusick #include "../netinet/in.h" 3342877Smckusick #include "../netinet/tcp.h" 3438414Smckusick #include "rpcv2.h" 3538414Smckusick #include "nfsv2.h" 3638414Smckusick #include "nfs.h" 3738414Smckusick #include "xdr_subs.h" 3838414Smckusick #include "nfsm_subs.h" 3938414Smckusick #include "nfsmount.h" 4038414Smckusick 4140117Smckusick #include "syslog.h" 4240117Smckusick 4338414Smckusick #define TRUE 1 4443351Smckusick #define FALSE 0 4538414Smckusick 4640117Smckusick /* 4738414Smckusick * External data, mostly RPC constants in XDR form 4838414Smckusick */ 4938414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 5038414Smckusick rpc_msgaccepted, rpc_call; 5138414Smckusick extern u_long nfs_prog, nfs_vers; 5243351Smckusick /* Maybe these should be bits in a u_long ?? */ 5341900Smckusick extern int nonidempotent[NFS_NPROCS]; 5441900Smckusick int nfs_sbwait(); 5541900Smckusick void nfs_disconnect(); 5641900Smckusick 5738414Smckusick int nfsrv_null(), 5838414Smckusick nfsrv_getattr(), 5938414Smckusick nfsrv_setattr(), 6038414Smckusick nfsrv_lookup(), 6138414Smckusick nfsrv_readlink(), 6238414Smckusick nfsrv_read(), 6338414Smckusick nfsrv_write(), 6438414Smckusick nfsrv_create(), 6538414Smckusick nfsrv_remove(), 6638414Smckusick nfsrv_rename(), 6738414Smckusick nfsrv_link(), 6838414Smckusick nfsrv_symlink(), 6938414Smckusick nfsrv_mkdir(), 7038414Smckusick nfsrv_rmdir(), 7138414Smckusick nfsrv_readdir(), 7238414Smckusick nfsrv_statfs(), 7338414Smckusick nfsrv_noop(); 7438414Smckusick 7538414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 7638414Smckusick nfsrv_null, 7738414Smckusick nfsrv_getattr, 7838414Smckusick nfsrv_setattr, 7938414Smckusick nfsrv_noop, 8038414Smckusick nfsrv_lookup, 8138414Smckusick nfsrv_readlink, 8238414Smckusick nfsrv_read, 8338414Smckusick nfsrv_noop, 8438414Smckusick nfsrv_write, 8538414Smckusick nfsrv_create, 8638414Smckusick nfsrv_remove, 8738414Smckusick nfsrv_rename, 8838414Smckusick nfsrv_link, 8938414Smckusick nfsrv_symlink, 9038414Smckusick nfsrv_mkdir, 9138414Smckusick nfsrv_rmdir, 9238414Smckusick nfsrv_readdir, 9338414Smckusick nfsrv_statfs, 9438414Smckusick }; 9538414Smckusick 9640117Smckusick struct nfsreq nfsreqh; 9740117Smckusick int nfsrexmtthresh = NFS_FISHY; 9841900Smckusick int nfs_tcpnodelay = 1; 9938414Smckusick 10038414Smckusick /* 10141900Smckusick * Initialize sockets and congestion for a new NFS connection. 10240117Smckusick * We do not free the sockaddr if error. 10338414Smckusick */ 10441900Smckusick nfs_connect(nmp) 10540117Smckusick register struct nfsmount *nmp; 10640117Smckusick { 10741900Smckusick register struct socket *so; 10841900Smckusick int s, error; 10940117Smckusick struct mbuf *m; 11040117Smckusick 11141900Smckusick nmp->nm_so = (struct socket *)0; 11241900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 11341900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 11440117Smckusick goto bad; 11541900Smckusick so = nmp->nm_so; 11641900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 11740117Smckusick 11841900Smckusick /* 11941900Smckusick * Protocols that do not require connections may be optionally left 12041900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 12141900Smckusick */ 12241900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 12341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 12441900Smckusick error = ENOTCONN; 12540117Smckusick goto bad; 12640117Smckusick } 12741900Smckusick } else { 12841900Smckusick if (error = soconnect(so, nmp->nm_nam)) 12940117Smckusick goto bad; 13041900Smckusick 13141900Smckusick /* 13241900Smckusick * Wait for the connection to complete. Cribbed from the 13341900Smckusick * connect system call but with the wait at negative prio. 13441900Smckusick */ 13541900Smckusick s = splnet(); 13641900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 13743351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 13841900Smckusick splx(s); 13941900Smckusick if (so->so_error) { 14041900Smckusick error = so->so_error; 14141900Smckusick goto bad; 14241900Smckusick } 14340117Smckusick } 14441900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 14543351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 14641900Smckusick so->so_rcv.sb_timeo = (5 * hz); 14741900Smckusick so->so_snd.sb_timeo = (5 * hz); 14841900Smckusick } else { 14941900Smckusick so->so_rcv.sb_timeo = 0; 15041900Smckusick so->so_snd.sb_timeo = 0; 15141900Smckusick } 15241900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 15343351Smckusick nmp->nm_rsize + NFS_MAXPKTHDR)) 15441900Smckusick goto bad; 15541900Smckusick } else { 15643351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 15741900Smckusick so->so_rcv.sb_timeo = (5 * hz); 15841900Smckusick so->so_snd.sb_timeo = (5 * hz); 15941900Smckusick } else { 16041900Smckusick so->so_rcv.sb_timeo = 0; 16141900Smckusick so->so_snd.sb_timeo = 0; 16241900Smckusick } 16341900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 16441900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 16541900Smckusick *mtod(m, int *) = 1; 16641900Smckusick m->m_len = sizeof(int); 16741900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 16841900Smckusick } 16941900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 17041900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 17141900Smckusick nfs_tcpnodelay) { 17241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 17341900Smckusick *mtod(m, int *) = 1; 17441900Smckusick m->m_len = sizeof(int); 17541900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 17641900Smckusick } 17741900Smckusick if (error = soreserve(so, 17843351Smckusick nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long), 17941900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 18041900Smckusick goto bad; 18141900Smckusick } 18241900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 18341900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 18440117Smckusick 18541900Smckusick /* Initialize other non-zero congestion variables */ 18641900Smckusick nmp->nm_rto = NFS_TIMEO; 18741900Smckusick nmp->nm_window = 2; /* Initial send window */ 18841900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 18941900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 19041900Smckusick nmp->nm_sent = 0; 19141900Smckusick nmp->nm_currexmit = 0; 19241900Smckusick return (0); 19340117Smckusick 19441900Smckusick bad: 19541900Smckusick nfs_disconnect(nmp); 19641900Smckusick return (error); 19741900Smckusick } 19840117Smckusick 19941900Smckusick /* 20041900Smckusick * Reconnect routine: 20141900Smckusick * Called when a connection is broken on a reliable protocol. 20241900Smckusick * - clean up the old socket 20341900Smckusick * - nfs_connect() again 20441900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 20541900Smckusick * If this fails the mount point is DEAD! 20641900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 20741900Smckusick */ 20841900Smckusick nfs_reconnect(rep, nmp) 20941900Smckusick register struct nfsreq *rep; 21041900Smckusick register struct nfsmount *nmp; 21141900Smckusick { 21241900Smckusick register struct nfsreq *rp; 21341900Smckusick int error; 21440117Smckusick 21541900Smckusick if (rep->r_procp) 21643061Smarc tprintf(rep->r_procp->p_session, 21741900Smckusick "Nfs server %s, trying reconnect\n", 21841900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 21941900Smckusick else 22043061Smarc tprintf(NULL, "Nfs server %s, trying a reconnect\n", 22141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 22241900Smckusick while (error = nfs_connect(nmp)) { 22342243Smckusick #ifdef lint 22442243Smckusick error = error; 22542243Smckusick #endif /* lint */ 22641900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 22741900Smckusick return (EINTR); 22843351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 22940117Smckusick } 23041900Smckusick if (rep->r_procp) 23143061Smarc tprintf(rep->r_procp->p_session, 23241900Smckusick "Nfs server %s, reconnected\n", 23341900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23441900Smckusick else 23543061Smarc tprintf(NULL, "Nfs server %s, reconnected\n", 23641900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23741900Smckusick 23841900Smckusick /* 23941900Smckusick * Loop through outstanding request list and fix up all requests 24041900Smckusick * on old socket. 24141900Smckusick */ 24241900Smckusick rp = nfsreqh.r_next; 24341900Smckusick while (rp != &nfsreqh) { 24441900Smckusick if (rp->r_nmp == nmp) 24541900Smckusick rp->r_flags |= R_MUSTRESEND; 24641900Smckusick rp = rp->r_next; 24740117Smckusick } 24840117Smckusick return (0); 24940117Smckusick } 25040117Smckusick 25140117Smckusick /* 25240117Smckusick * NFS disconnect. Clean up and unlink. 25340117Smckusick */ 25441900Smckusick void 25540117Smckusick nfs_disconnect(nmp) 25640117Smckusick register struct nfsmount *nmp; 25740117Smckusick { 25841900Smckusick register struct socket *so; 25940117Smckusick 26041900Smckusick if (nmp->nm_so) { 26141900Smckusick so = nmp->nm_so; 26241900Smckusick nmp->nm_so = (struct socket *)0; 26341900Smckusick soshutdown(so, 2); 26441900Smckusick soclose(so); 26540117Smckusick } 26640117Smckusick } 26740117Smckusick 26840117Smckusick /* 26941900Smckusick * This is the nfs send routine. For connection based socket types, it 27041900Smckusick * must be called with an nfs_solock() on the socket. 27141900Smckusick * "rep == NULL" indicates that it has been called from a server. 27240117Smckusick */ 27341900Smckusick nfs_send(so, nam, top, rep) 27438414Smckusick register struct socket *so; 27538414Smckusick struct mbuf *nam; 27641900Smckusick register struct mbuf *top; 27741900Smckusick struct nfsreq *rep; 27838414Smckusick { 27941900Smckusick struct mbuf *sendnam; 28041900Smckusick int error, soflags; 28138414Smckusick 28241900Smckusick if (rep) { 28341900Smckusick if (rep->r_flags & R_SOFTTERM) { 28440117Smckusick m_freem(top); 28541900Smckusick return (EINTR); 28640117Smckusick } 28743062Smckusick if (rep->r_nmp->nm_so == NULL && 28841900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 28941900Smckusick return (error); 29041900Smckusick rep->r_flags &= ~R_MUSTRESEND; 29143062Smckusick so = rep->r_nmp->nm_so; 29241900Smckusick soflags = rep->r_nmp->nm_soflags; 29341900Smckusick } else 29441900Smckusick soflags = so->so_proto->pr_flags; 29541900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 29641900Smckusick sendnam = (struct mbuf *)0; 29741900Smckusick else 29841900Smckusick sendnam = nam; 29941900Smckusick 30041900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 30141900Smckusick (struct mbuf *)0, 0); 30241900Smckusick if (error == EWOULDBLOCK && rep) { 30341900Smckusick if (rep->r_flags & R_SOFTTERM) 30441900Smckusick error = EINTR; 30541900Smckusick else { 30641900Smckusick rep->r_flags |= R_MUSTRESEND; 30741900Smckusick error = 0; 30840117Smckusick } 30938414Smckusick } 31041900Smckusick /* 31141900Smckusick * Ignore socket errors?? 31241900Smckusick */ 31341900Smckusick if (error && error != EINTR && error != ERESTART) 31441900Smckusick error = 0; 31538414Smckusick return (error); 31638414Smckusick } 31738414Smckusick 31838414Smckusick /* 31941900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 32041900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 32141900Smckusick * Mark and consolidate the data into a new mbuf list. 32241900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 32341900Smckusick * small mbufs. 32441900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 32541900Smckusick * we have read any of it, even if the system call has been interrupted. 32638414Smckusick */ 32741900Smckusick nfs_receive(so, aname, mp, rep) 32838414Smckusick register struct socket *so; 32938414Smckusick struct mbuf **aname; 33038414Smckusick struct mbuf **mp; 33141900Smckusick register struct nfsreq *rep; 33238414Smckusick { 33341900Smckusick struct uio auio; 33441900Smckusick struct iovec aio; 33538414Smckusick register struct mbuf *m; 33641900Smckusick struct mbuf *m2, *m3, *mnew, **mbp; 33741900Smckusick caddr_t fcp, tcp; 33841900Smckusick u_long len; 33941900Smckusick struct mbuf **getnam; 34041900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 34138414Smckusick 34241900Smckusick /* 34341900Smckusick * Set up arguments for soreceive() 34441900Smckusick */ 34541900Smckusick *mp = (struct mbuf *)0; 34641900Smckusick *aname = (struct mbuf *)0; 34741900Smckusick if (rep) 34841900Smckusick soflags = rep->r_nmp->nm_soflags; 34941900Smckusick else 35041900Smckusick soflags = so->so_proto->pr_flags; 35138414Smckusick 35241900Smckusick /* 35341900Smckusick * For reliable protocols, lock against other senders/receivers 35441900Smckusick * in case a reconnect is necessary. 35541900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 35641900Smckusick * more there is to get. 35741900Smckusick * We must lock the socket against other receivers 35841900Smckusick * until we have an entire rpc request/reply. 35941900Smckusick */ 36041900Smckusick if (soflags & PR_CONNREQUIRED) { 36141900Smckusick tryagain: 36241900Smckusick /* 36341900Smckusick * Check for fatal errors and resending request. 36441900Smckusick */ 36541900Smckusick if (rep) { 36641900Smckusick /* 36741900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 36841900Smckusick * would have changed. NULL indicates a failed 36941900Smckusick * attempt that has essentially shut down this 37041900Smckusick * mount point. 37141900Smckusick */ 37241900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 37341900Smckusick (rep->r_flags & R_SOFTTERM)) 37441900Smckusick return (EINTR); 37541900Smckusick while (rep->r_flags & R_MUSTRESEND) { 37641900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 37741900Smckusick nfsstats.rpcretries++; 37841900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 37941900Smckusick rep)) 38041900Smckusick goto errout; 38140117Smckusick } 38241900Smckusick } 38341900Smckusick if ((soflags & PR_ATOMIC) == 0) { 38441900Smckusick aio.iov_base = (caddr_t) &len; 38541900Smckusick aio.iov_len = sizeof(u_long); 38641900Smckusick auio.uio_iov = &aio; 38741900Smckusick auio.uio_iovcnt = 1; 38841900Smckusick auio.uio_segflg = UIO_SYSSPACE; 38941900Smckusick auio.uio_rw = UIO_READ; 39041900Smckusick auio.uio_offset = 0; 39141900Smckusick auio.uio_resid = sizeof(u_long); 39241900Smckusick do { 39341900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 39441900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 39541900Smckusick if (error == EWOULDBLOCK && rep) { 39641900Smckusick if (rep->r_flags & R_SOFTTERM) 39741900Smckusick return (EINTR); 39841900Smckusick if (rep->r_flags & R_MUSTRESEND) 39941900Smckusick goto tryagain; 40041900Smckusick } 40141900Smckusick } while (error == EWOULDBLOCK); 40241900Smckusick if (!error && auio.uio_resid > 0) 40341900Smckusick error = EPIPE; 40440761Skarels if (error) 40541900Smckusick goto errout; 40641900Smckusick len = ntohl(len) & ~0x80000000; 40741900Smckusick /* 40841900Smckusick * This is SERIOUS! We are out of sync with the sender 40941900Smckusick * and forcing a disconnect/reconnect is all I can do. 41041900Smckusick */ 41141900Smckusick if (len > NFS_MAXPACKET) { 41241900Smckusick error = EFBIG; 41341900Smckusick goto errout; 41441900Smckusick } 41541900Smckusick auio.uio_resid = len; 41641900Smckusick do { 41741900Smckusick error = soreceive(so, (struct mbuf **)0, 41841900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 41941900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 42041900Smckusick error == ERESTART); 42141900Smckusick if (!error && auio.uio_resid > 0) 42241900Smckusick error = EPIPE; 42340117Smckusick } else { 42441900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 42541900Smckusick do { 42641900Smckusick error = soreceive(so, (struct mbuf **)0, 42741900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 42841900Smckusick if (error == EWOULDBLOCK && rep) { 42941900Smckusick if (rep->r_flags & R_SOFTTERM) 43041900Smckusick return (EINTR); 43141900Smckusick if (rep->r_flags & R_MUSTRESEND) 43241900Smckusick goto tryagain; 43341900Smckusick } 43441900Smckusick } while (error == EWOULDBLOCK); 43541900Smckusick if (!error && *mp == NULL) 43641900Smckusick error = EPIPE; 43741900Smckusick len -= auio.uio_resid; 43840117Smckusick } 43941900Smckusick errout: 44041900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 44141900Smckusick m_freem(*mp); 44241900Smckusick *mp = (struct mbuf *)0; 44341900Smckusick nfs_disconnect(rep->r_nmp); 44441900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 44541900Smckusick if (!error) 44641900Smckusick goto tryagain; 44740117Smckusick } 44841900Smckusick } else { 44941900Smckusick if (so->so_state & SS_ISCONNECTED) 45041900Smckusick getnam = (struct mbuf **)0; 45141900Smckusick else 45241900Smckusick getnam = aname; 45341900Smckusick auio.uio_resid = len = 1000000; 45441900Smckusick do { 45541900Smckusick error = soreceive(so, getnam, &auio, mp, 45641900Smckusick (struct mbuf **)0, &rcvflg); 45741900Smckusick if (error == EWOULDBLOCK && rep && 45841900Smckusick (rep->r_flags & R_SOFTTERM)) 45941900Smckusick return (EINTR); 46041900Smckusick } while (error == EWOULDBLOCK); 46141900Smckusick len -= auio.uio_resid; 46241900Smckusick } 46341900Smckusick if (error) { 46441900Smckusick m_freem(*mp); 46541900Smckusick *mp = (struct mbuf *)0; 46641900Smckusick } 46741900Smckusick /* 46841900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 46941900Smckusick * These could cause pointer alignment problems, so copy them to 47041900Smckusick * well aligned mbufs. 47141900Smckusick */ 47241900Smckusick m = *mp; 47341900Smckusick mbp = mp; 47441900Smckusick while (m) { 47541900Smckusick /* 47641900Smckusick * All this for something that may never happen. 47741900Smckusick */ 47841900Smckusick if (m->m_len & 0x3) { 47941900Smckusick printf("nfs_rcv odd length!\n"); 48041900Smckusick fcp = mtod(m, caddr_t); 48141900Smckusick mnew = m2 = (struct mbuf *)0; 48242243Smckusick #ifdef lint 48342243Smckusick m3 = (struct mbuf *)0; 48442243Smckusick mlen = 0; 48542243Smckusick #endif /* lint */ 48641900Smckusick while (m) { 48741900Smckusick if (m2 == NULL || mlen == 0) { 48841900Smckusick MGET(m2, M_WAIT, MT_DATA); 48941900Smckusick if (len > MINCLSIZE) 49041900Smckusick MCLGET(m2, M_WAIT); 49141900Smckusick m2->m_len = 0; 49241900Smckusick mlen = M_TRAILINGSPACE(m2); 49341900Smckusick tcp = mtod(m2, caddr_t); 49441900Smckusick if (mnew) { 49541900Smckusick m3->m_next = m2; 49641900Smckusick m3 = m2; 49741900Smckusick } else 49841900Smckusick mnew = m3 = m2; 49941900Smckusick } 50041900Smckusick siz = (mlen > m->m_len) ? m->m_len : mlen; 50141900Smckusick bcopy(fcp, tcp, siz); 50241900Smckusick m2->m_len += siz; 50341900Smckusick mlen -= siz; 50441900Smckusick len -= siz; 50541900Smckusick tcp += siz; 50641900Smckusick m->m_len -= siz; 50741900Smckusick fcp += siz; 50841900Smckusick if (m->m_len == 0) { 50941900Smckusick do { 51041900Smckusick m = m->m_next; 51141900Smckusick } while (m && m->m_len == 0); 51241900Smckusick if (m) 51341900Smckusick fcp = mtod(m, caddr_t); 51441900Smckusick } 51541900Smckusick } 51641900Smckusick m = *mbp; 51741900Smckusick *mbp = mnew; 51841900Smckusick m_freem(m); 51941900Smckusick break; 52040117Smckusick } 52141900Smckusick len -= m->m_len; 52241900Smckusick mbp = &m->m_next; 52341900Smckusick m = m->m_next; 52438414Smckusick } 52538414Smckusick return (error); 52638414Smckusick } 52738414Smckusick 52838414Smckusick struct rpc_replyhead { 52938414Smckusick u_long r_xid; 53038414Smckusick u_long r_rep; 53138414Smckusick }; 53238414Smckusick 53338414Smckusick /* 53441900Smckusick * Implement receipt of reply on a socket. 53538414Smckusick * We must search through the list of received datagrams matching them 53638414Smckusick * with outstanding requests using the xid, until ours is found. 53738414Smckusick */ 53841900Smckusick /* ARGSUSED */ 53941900Smckusick nfs_reply(nmp, myrep) 54041900Smckusick struct nfsmount *nmp; 54139344Smckusick struct nfsreq *myrep; 54238414Smckusick { 54338414Smckusick register struct mbuf *m; 54438414Smckusick register struct nfsreq *rep; 54541900Smckusick register int error = 0; 54638414Smckusick struct rpc_replyhead replyh; 54741900Smckusick struct mbuf *mp, *nam; 54841900Smckusick char *cp; 54941900Smckusick int cnt, xfer; 55038414Smckusick 55141900Smckusick /* 55241900Smckusick * Loop around until we get our own reply 55341900Smckusick */ 55441900Smckusick for (;;) { 55541900Smckusick /* 55641900Smckusick * Lock against other receivers so that I don't get stuck in 55741900Smckusick * sbwait() after someone else has received my reply for me. 55841900Smckusick * Also necessary for connection based protocols to avoid 55941900Smckusick * race conditions during a reconnect. 56041900Smckusick */ 56143351Smckusick nfs_solock(&nmp->nm_flag); 56241900Smckusick /* Already received, bye bye */ 56341900Smckusick if (myrep->r_mrep != NULL) { 56441900Smckusick nfs_sounlock(&nmp->nm_flag); 56541900Smckusick return (0); 56640117Smckusick } 56741900Smckusick /* 56841900Smckusick * Get the next Rpc reply off the socket 56941900Smckusick */ 57041900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 57141900Smckusick nfs_sounlock(&nmp->nm_flag); 57238414Smckusick 57341900Smckusick /* 57441900Smckusick * Ignore routing errors on connectionless protocols?? 57541900Smckusick */ 57641900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 57741900Smckusick nmp->nm_so->so_error = 0; 57841900Smckusick continue; 57941900Smckusick } 58041900Smckusick 58141900Smckusick /* 58241900Smckusick * Otherwise cleanup and return a fatal error. 58341900Smckusick */ 58441900Smckusick if (myrep->r_flags & R_TIMING) { 58541900Smckusick myrep->r_flags &= ~R_TIMING; 58641900Smckusick nmp->nm_rtt = -1; 58741900Smckusick } 58841900Smckusick if (myrep->r_flags & R_SENT) { 58941900Smckusick myrep->r_flags &= ~R_SENT; 59041900Smckusick nmp->nm_sent--; 59141900Smckusick } 59241900Smckusick return (error); 59338414Smckusick } 59441900Smckusick 59541900Smckusick /* 59641900Smckusick * Get the xid and check that it is an rpc reply 59741900Smckusick */ 59841900Smckusick m = mp; 59941900Smckusick if (m->m_len >= 2*NFSX_UNSIGNED) 60041900Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 60141900Smckusick 2*NFSX_UNSIGNED); 60241900Smckusick else { 60341900Smckusick cnt = 2*NFSX_UNSIGNED; 60441900Smckusick cp = (caddr_t)&replyh; 60541900Smckusick while (m && cnt > 0) { 60641900Smckusick if (m->m_len > 0) { 60741900Smckusick xfer = (m->m_len >= cnt) ? cnt : 60841900Smckusick m->m_len; 60941900Smckusick bcopy(mtod(m, caddr_t), cp, xfer); 61041900Smckusick cnt -= xfer; 61141900Smckusick cp += xfer; 61241900Smckusick } 61341900Smckusick if (cnt > 0) 61441900Smckusick m = m->m_next; 61541900Smckusick } 61640117Smckusick } 61741900Smckusick if (replyh.r_rep != rpc_reply || m == NULL) { 61840117Smckusick nfsstats.rpcinvalid++; 61941900Smckusick m_freem(mp); 62041900Smckusick nfs_sounlock(&nmp->nm_flag); 62141900Smckusick continue; 62238414Smckusick } 62341900Smckusick /* 62441900Smckusick * Loop through the request list to match up the reply 62541900Smckusick * Iff no match, just drop the datagram 62641900Smckusick */ 62741900Smckusick m = mp; 62841900Smckusick rep = nfsreqh.r_next; 62941900Smckusick while (rep != &nfsreqh) { 63041900Smckusick if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) { 63141900Smckusick /* Found it.. */ 63241900Smckusick rep->r_mrep = m; 63341900Smckusick /* 63441900Smckusick * Update timing 63541900Smckusick */ 63641900Smckusick if (rep->r_flags & R_TIMING) { 63741900Smckusick nfs_updatetimer(rep->r_nmp); 63841900Smckusick rep->r_flags &= ~R_TIMING; 63941900Smckusick rep->r_nmp->nm_rtt = -1; 64041900Smckusick } 64141900Smckusick if (rep->r_flags & R_SENT) { 64241900Smckusick rep->r_flags &= ~R_SENT; 64341900Smckusick rep->r_nmp->nm_sent--; 64441900Smckusick } 64540117Smckusick break; 64638414Smckusick } 64741900Smckusick rep = rep->r_next; 64838414Smckusick } 64941900Smckusick nfs_sounlock(&nmp->nm_flag); 65041900Smckusick if (nam) 65141900Smckusick m_freem(nam); 65241900Smckusick /* 65341900Smckusick * If not matched to a request, drop it. 65441900Smckusick * If it's mine, get out. 65541900Smckusick */ 65641900Smckusick if (rep == &nfsreqh) { 65741900Smckusick nfsstats.rpcunexpected++; 65841900Smckusick m_freem(m); 65941900Smckusick } else if (rep == myrep) 66041900Smckusick return (0); 66138414Smckusick } 66238414Smckusick } 66338414Smckusick 66438414Smckusick /* 66538414Smckusick * nfs_request - goes something like this 66638414Smckusick * - fill in request struct 66738414Smckusick * - links it into list 66841900Smckusick * - calls nfs_send() for first transmit 66941900Smckusick * - calls nfs_receive() to get reply 67038414Smckusick * - break down rpc header and return with nfs reply pointed to 67138414Smckusick * by mrep or error 67238414Smckusick * nb: always frees up mreq mbuf list 67338414Smckusick */ 67443351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 67538414Smckusick struct vnode *vp; 67638414Smckusick struct mbuf *mreq; 67738414Smckusick u_long xid; 67841900Smckusick int procnum; 67941900Smckusick struct proc *procp; 68043351Smckusick int tryhard; 68138414Smckusick struct mount *mp; 68238414Smckusick struct mbuf **mrp; 68338414Smckusick struct mbuf **mdp; 68438414Smckusick caddr_t *dposp; 68538414Smckusick { 68638414Smckusick register struct mbuf *m, *mrep; 68738414Smckusick register struct nfsreq *rep; 68838414Smckusick register u_long *p; 68938414Smckusick register int len; 69041900Smckusick struct nfsmount *nmp; 69138414Smckusick struct mbuf *md; 69239344Smckusick struct nfsreq *reph; 69338414Smckusick caddr_t dpos; 69438414Smckusick char *cp2; 69538414Smckusick int t1; 69638414Smckusick int s; 69741900Smckusick int error = 0; 69838414Smckusick 69941900Smckusick nmp = VFSTONFS(mp); 70038414Smckusick m = mreq; 70138414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70238414Smckusick rep->r_xid = xid; 70341900Smckusick rep->r_nmp = nmp; 70438414Smckusick rep->r_vp = vp; 70541900Smckusick rep->r_procp = procp; 70643351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 70743351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 70841900Smckusick rep->r_retry = nmp->nm_retry; 70938414Smckusick else 71040117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71140117Smckusick rep->r_flags = rep->r_rexmit = 0; 71241900Smckusick /* 71341900Smckusick * Three cases: 71441900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 71541900Smckusick * - idempotent requests on SOCK_DGRAM use 0 71641900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 71741900Smckusick * Timeouts are still done on reliable transports to ensure detection 71843351Smckusick * of excessive connection delay. 71941900Smckusick */ 72041900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 72141900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 72241900Smckusick else if (nonidempotent[procnum]) 72341900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 72441900Smckusick else 72541900Smckusick rep->r_timerinit = 0; 72641900Smckusick rep->r_timer = rep->r_timerinit; 72738414Smckusick rep->r_mrep = NULL; 72838414Smckusick len = 0; 72938414Smckusick while (m) { 73038414Smckusick len += m->m_len; 73138414Smckusick m = m->m_next; 73238414Smckusick } 73341900Smckusick mreq->m_pkthdr.len = len; 73441900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 73541900Smckusick /* 73641900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 73741900Smckusick */ 73841900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 73941900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 74041900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 74141900Smckusick } 74241900Smckusick rep->r_mreq = mreq; 74338414Smckusick 74440117Smckusick /* 74540117Smckusick * Do the client side RPC. 74640117Smckusick */ 74740117Smckusick nfsstats.rpcrequests++; 74841900Smckusick /* 74941900Smckusick * Chain request into list of outstanding requests. Be sure 75041900Smckusick * to put it LAST so timer finds oldest requests first. 75141900Smckusick */ 75240117Smckusick s = splnet(); 75339344Smckusick reph = &nfsreqh; 75441900Smckusick reph->r_prev->r_next = rep; 75541900Smckusick rep->r_prev = reph->r_prev; 75639344Smckusick reph->r_prev = rep; 75739344Smckusick rep->r_next = reph; 75840117Smckusick /* 75940117Smckusick * If backing off another request or avoiding congestion, don't 76040117Smckusick * send this one now but let timer do it. If not timing a request, 76140117Smckusick * do it now. 76240117Smckusick */ 76341900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 76441900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 76541900Smckusick nmp->nm_sent++; 76641900Smckusick rep->r_flags |= R_SENT; 76741900Smckusick if (nmp->nm_rtt == -1) { 76841900Smckusick nmp->nm_rtt = 0; 76941900Smckusick rep->r_flags |= R_TIMING; 77041900Smckusick } 77140117Smckusick splx(s); 77241900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 77341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77443351Smckusick nfs_solock(&nmp->nm_flag); 77541900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 77641900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77741900Smckusick nfs_sounlock(&nmp->nm_flag); 77841900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 77941900Smckusick nmp->nm_so->so_error = error = 0; 78041900Smckusick } else 78141900Smckusick splx(s); 78238414Smckusick 78338414Smckusick /* 78440117Smckusick * Wait for the reply from our send or the timer's. 78540117Smckusick */ 78641900Smckusick if (!error) 78741900Smckusick error = nfs_reply(nmp, rep); 78838414Smckusick 78940117Smckusick /* 79040117Smckusick * RPC done, unlink the request. 79140117Smckusick */ 79238414Smckusick s = splnet(); 79338414Smckusick rep->r_prev->r_next = rep->r_next; 79439344Smckusick rep->r_next->r_prev = rep->r_prev; 79538414Smckusick splx(s); 79641900Smckusick 79741900Smckusick /* 79841900Smckusick * If there was a successful reply and a tprintf msg. 79941900Smckusick * tprintf a response. 80041900Smckusick */ 80141900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 80241900Smckusick if (rep->r_procp) 80343061Smarc tprintf(rep->r_procp->p_session, 80441900Smckusick "Nfs server %s, is alive again\n", 80541900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80641900Smckusick else 80743061Smarc tprintf(NULL, "Nfs server %s, is alive again\n", 80841900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80941900Smckusick } 81038414Smckusick m_freem(rep->r_mreq); 81138414Smckusick mrep = md = rep->r_mrep; 81238414Smckusick FREE((caddr_t)rep, M_NFSREQ); 81338414Smckusick if (error) 81438414Smckusick return (error); 81538414Smckusick 81638414Smckusick /* 81738414Smckusick * break down the rpc header and check if ok 81838414Smckusick */ 81938414Smckusick dpos = mtod(md, caddr_t); 82038414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82138414Smckusick p += 2; 82238414Smckusick if (*p++ == rpc_msgdenied) { 82338414Smckusick if (*p == rpc_mismatch) 82438414Smckusick error = EOPNOTSUPP; 82538414Smckusick else 82638414Smckusick error = EACCES; 82738414Smckusick m_freem(mrep); 82838414Smckusick return (error); 82938414Smckusick } 83038414Smckusick /* 83138414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83238414Smckusick * for nfs_reqhead(), but for now just dump it 83338414Smckusick */ 83438414Smckusick if (*++p != 0) { 83538414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 83638414Smckusick nfsm_adv(len); 83738414Smckusick } 83838414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 83938414Smckusick /* 0 == ok */ 84038414Smckusick if (*p == 0) { 84138414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84238414Smckusick if (*p != 0) { 84338414Smckusick error = fxdr_unsigned(int, *p); 84438414Smckusick m_freem(mrep); 84538414Smckusick return (error); 84638414Smckusick } 84738414Smckusick *mrp = mrep; 84838414Smckusick *mdp = md; 84938414Smckusick *dposp = dpos; 85038414Smckusick return (0); 85138414Smckusick } 85238414Smckusick m_freem(mrep); 85338414Smckusick return (EPROTONOSUPPORT); 85438414Smckusick nfsmout: 85538414Smckusick return (error); 85638414Smckusick } 85738414Smckusick 85838414Smckusick /* 85938414Smckusick * Get a request for the server main loop 86038414Smckusick * - receive a request via. nfs_soreceive() 86138414Smckusick * - verify it 86238414Smckusick * - fill in the cred struct. 86338414Smckusick */ 86442243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 86543351Smckusick msk, mtch) 86638414Smckusick struct socket *so; 86738414Smckusick u_long prog; 86838414Smckusick u_long vers; 86938414Smckusick int maxproc; 87038414Smckusick struct mbuf **nam; 87138414Smckusick struct mbuf **mrp; 87238414Smckusick struct mbuf **mdp; 87338414Smckusick caddr_t *dposp; 87438414Smckusick u_long *retxid; 87542243Smckusick u_long *procnum; 87638414Smckusick register struct ucred *cr; 87741900Smckusick struct mbuf *msk, *mtch; 87838414Smckusick { 87938414Smckusick register int i; 88039494Smckusick register u_long *p; 88139494Smckusick register long t1; 88239494Smckusick caddr_t dpos, cp2; 88339494Smckusick int error = 0; 88439494Smckusick struct mbuf *mrep, *md; 88539494Smckusick int len; 88638414Smckusick 88741900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 88841900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 88941900Smckusick } else { 89041900Smckusick mrep = (struct mbuf *)0; 89141900Smckusick do { 89241900Smckusick if (mrep) { 89341900Smckusick m_freem(*nam); 89441900Smckusick m_freem(mrep); 89541900Smckusick } 89641900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89741900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 89841900Smckusick } 89941900Smckusick if (error) 90038414Smckusick return (error); 90138414Smckusick md = mrep; 90238414Smckusick dpos = mtod(mrep, caddr_t); 90338414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 90438414Smckusick *retxid = *p++; 90538414Smckusick if (*p++ != rpc_call) { 90638414Smckusick m_freem(mrep); 90738414Smckusick return (ERPCMISMATCH); 90838414Smckusick } 90938414Smckusick if (*p++ != rpc_vers) { 91038414Smckusick m_freem(mrep); 91138414Smckusick return (ERPCMISMATCH); 91238414Smckusick } 91338414Smckusick if (*p++ != prog) { 91438414Smckusick m_freem(mrep); 91538414Smckusick return (EPROGUNAVAIL); 91638414Smckusick } 91738414Smckusick if (*p++ != vers) { 91838414Smckusick m_freem(mrep); 91938414Smckusick return (EPROGMISMATCH); 92038414Smckusick } 92142243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 92242243Smckusick if (*procnum == NFSPROC_NULL) { 92338414Smckusick *mrp = mrep; 92438414Smckusick return (0); 92538414Smckusick } 92642243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 92738414Smckusick m_freem(mrep); 92838414Smckusick return (EPROCUNAVAIL); 92938414Smckusick } 93041900Smckusick len = fxdr_unsigned(int, *p++); 93141900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 93241900Smckusick m_freem(mrep); 93341900Smckusick return (EBADRPC); 93441900Smckusick } 93539494Smckusick len = fxdr_unsigned(int, *++p); 93641900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 93741900Smckusick m_freem(mrep); 93841900Smckusick return (EBADRPC); 93941900Smckusick } 94039494Smckusick nfsm_adv(nfsm_rndup(len)); 94138414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 94238414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 94338414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 94439494Smckusick len = fxdr_unsigned(int, *p); 94541900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 94638414Smckusick m_freem(mrep); 94738414Smckusick return (EBADRPC); 94838414Smckusick } 94939494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95039494Smckusick for (i = 1; i <= len; i++) 95141900Smckusick if (i < NGROUPS) 95241900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 95341900Smckusick else 95441900Smckusick p++; 95541900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 95638414Smckusick /* 95738414Smckusick * Do we have any use for the verifier. 95838414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 95938414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96038414Smckusick * For now, just skip over it 96138414Smckusick */ 96239494Smckusick len = fxdr_unsigned(int, *++p); 96341900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 96441900Smckusick m_freem(mrep); 96541900Smckusick return (EBADRPC); 96641900Smckusick } 96739494Smckusick if (len > 0) 96839494Smckusick nfsm_adv(nfsm_rndup(len)); 96938414Smckusick *mrp = mrep; 97038414Smckusick *mdp = md; 97138414Smckusick *dposp = dpos; 97238414Smckusick return (0); 97338414Smckusick nfsmout: 97438414Smckusick return (error); 97538414Smckusick } 97638414Smckusick 97738414Smckusick /* 97838414Smckusick * Generate the rpc reply header 97938414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98038414Smckusick */ 98138414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 98238414Smckusick int siz; 98338414Smckusick u_long retxid; 98438414Smckusick int err; 98538414Smckusick struct mbuf **mrq; 98638414Smckusick struct mbuf **mbp; 98738414Smckusick caddr_t *bposp; 98838414Smckusick { 98939494Smckusick register u_long *p; 99039494Smckusick register long t1; 99139494Smckusick caddr_t bpos; 99239494Smckusick struct mbuf *mreq, *mb, *mb2; 99338414Smckusick 99438414Smckusick NFSMGETHDR(mreq); 99538414Smckusick mb = mreq; 99638414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 99741900Smckusick MCLGET(mreq, M_WAIT); 99838414Smckusick p = mtod(mreq, u_long *); 99938414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100038414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 100138414Smckusick *p++ = retxid; 100238414Smckusick *p++ = rpc_reply; 100338414Smckusick if (err == ERPCMISMATCH) { 100438414Smckusick *p++ = rpc_msgdenied; 100538414Smckusick *p++ = rpc_mismatch; 100638414Smckusick *p++ = txdr_unsigned(2); 100738414Smckusick *p = txdr_unsigned(2); 100838414Smckusick } else { 100938414Smckusick *p++ = rpc_msgaccepted; 101038414Smckusick *p++ = 0; 101138414Smckusick *p++ = 0; 101238414Smckusick switch (err) { 101338414Smckusick case EPROGUNAVAIL: 101438414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 101538414Smckusick break; 101638414Smckusick case EPROGMISMATCH: 101738414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 101838414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 101938414Smckusick *p++ = txdr_unsigned(2); 102038414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 102138414Smckusick break; 102238414Smckusick case EPROCUNAVAIL: 102338414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 102438414Smckusick break; 102538414Smckusick default: 102638414Smckusick *p = 0; 102738414Smckusick if (err != VNOVAL) { 102838414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 102938414Smckusick *p = txdr_unsigned(err); 103038414Smckusick } 103138414Smckusick break; 103238414Smckusick }; 103338414Smckusick } 103438414Smckusick *mrq = mreq; 103538414Smckusick *mbp = mb; 103638414Smckusick *bposp = bpos; 103738414Smckusick if (err != 0 && err != VNOVAL) 103838414Smckusick nfsstats.srvrpc_errs++; 103938414Smckusick return (0); 104038414Smckusick } 104138414Smckusick 104238414Smckusick /* 104338414Smckusick * Nfs timer routine 104438414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 104538414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 104640117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 104738414Smckusick */ 104838414Smckusick nfs_timer() 104938414Smckusick { 105038414Smckusick register struct nfsreq *rep; 105138414Smckusick register struct mbuf *m; 105238414Smckusick register struct socket *so; 105341900Smckusick register struct nfsmount *nmp; 105440117Smckusick int s, error; 105538414Smckusick 105638414Smckusick s = splnet(); 105741900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 105841900Smckusick nmp = rep->r_nmp; 105941900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 106041900Smckusick (so = nmp->nm_so) == NULL) 106141900Smckusick continue; 106241900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 106341900Smckusick rep->r_flags |= R_SOFTTERM; 106441900Smckusick continue; 106541900Smckusick } 106640117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 106741900Smckusick nmp->nm_rtt++; 106841900Smckusick /* If not timed out */ 106941900Smckusick if (++rep->r_timer < nmp->nm_rto) 107041900Smckusick continue; 107140117Smckusick /* Do backoff and save new timeout in mount */ 107240117Smckusick if (rep->r_flags & R_TIMING) { 107341900Smckusick nfs_backofftimer(nmp); 107440117Smckusick rep->r_flags &= ~R_TIMING; 107541900Smckusick nmp->nm_rtt = -1; 107640117Smckusick } 107740117Smckusick if (rep->r_flags & R_SENT) { 107840117Smckusick rep->r_flags &= ~R_SENT; 107941900Smckusick nmp->nm_sent--; 108040117Smckusick } 108141900Smckusick 108241900Smckusick /* 108341900Smckusick * Check for too many retries on soft mount. 108441900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 108541900Smckusick */ 108641900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 108740117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 108840117Smckusick 108941900Smckusick /* 109041900Smckusick * Check for server not responding 109141900Smckusick */ 109241900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 109343351Smckusick rep->r_rexmit > NFS_FISHY) { 109441900Smckusick if (rep->r_procp && rep->r_procp->p_session) 109543061Smarc tprintf(rep->r_procp->p_session, 109641900Smckusick "Nfs server %s, not responding\n", 109741900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 109841900Smckusick else 109943061Smarc tprintf(NULL, 110041900Smckusick "Nfs server %s, not responding\n", 110141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110241900Smckusick rep->r_flags |= R_TPRINTFMSG; 110341900Smckusick } 110443351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 110541900Smckusick nfsstats.rpctimeouts++; 110641900Smckusick rep->r_flags |= R_SOFTTERM; 110741900Smckusick continue; 110841900Smckusick } 110943351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 111043351Smckusick continue; 111141900Smckusick 111241900Smckusick /* 111341900Smckusick * If there is enough space and the window allows.. 111441900Smckusick * Resend it 111541900Smckusick */ 111641900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 111741900Smckusick nmp->nm_sent < nmp->nm_window && 111841900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 111941900Smckusick nfsstats.rpcretries++; 112041900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 112141900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 112241900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 112341900Smckusick else 112441900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 112541900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 112641900Smckusick if (error) { 112741900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 112841900Smckusick so->so_error = 0; 112941900Smckusick } else { 113041900Smckusick /* 113141900Smckusick * We need to time the request even though we 113241900Smckusick * are retransmitting. 113341900Smckusick */ 113441900Smckusick nmp->nm_rtt = 0; 113541900Smckusick nmp->nm_sent++; 113641900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 113741900Smckusick rep->r_timer = rep->r_timerinit; 113841900Smckusick } 113941900Smckusick } 114040117Smckusick } 114140117Smckusick splx(s); 114240117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 114340117Smckusick } 114440117Smckusick 114540117Smckusick /* 114640117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 114740117Smckusick * used here. The timer state is held in the nfsmount structure and 114840117Smckusick * a single request is used to clock the response. When successful 114940117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 115040117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 115140117Smckusick * routines. 115240117Smckusick * 115340117Smckusick * Congestion variables are held in the nfshost structure which 115440117Smckusick * is referenced by nfsmounts and shared per-server. This separation 115540117Smckusick * makes it possible to do per-mount timing which allows varying disk 115640117Smckusick * access times to be dealt with, while preserving a network oriented 115740117Smckusick * congestion control scheme. 115840117Smckusick * 115940117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 116040117Smckusick * with adjusted scaling factors. We start with one request, then send 116140117Smckusick * 4 more after each success until the ssthresh limit is reached, then 116240117Smckusick * we increment at a rate proportional to the window. On failure, we 116340117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 116440117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 116540117Smckusick * for now. 116640117Smckusick * 116740117Smckusick * NFS behaves much more like a transport protocol with these changes, 116840117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 116940117Smckusick * implementations. 117040117Smckusick * 117140117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 117240117Smckusick */ 117340117Smckusick 117440117Smckusick /* 117540117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 117640117Smckusick * responds only after performing lookups/diskio/etc, we have to be 117740117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 117841900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 117940117Smckusick */ 118041900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 118140117Smckusick 118241900Smckusick nfs_updatetimer(nmp) 118341900Smckusick register struct nfsmount *nmp; 118440117Smckusick { 118540117Smckusick 118640117Smckusick /* If retransmitted, clear and return */ 118741900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 118841900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 118940117Smckusick return; 119040117Smckusick } 119140117Smckusick /* If have a measurement, do smoothing */ 119241900Smckusick if (nmp->nm_srtt) { 119340117Smckusick register short delta; 119441900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 119541900Smckusick if ((nmp->nm_srtt += delta) <= 0) 119641900Smckusick nmp->nm_srtt = 1; 119740117Smckusick if (delta < 0) 119840117Smckusick delta = -delta; 119941900Smckusick delta -= (nmp->nm_rttvar >> 2); 120041900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 120141900Smckusick nmp->nm_rttvar = 1; 120240117Smckusick /* Else initialize */ 120340117Smckusick } else { 120441900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 120541900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 120641900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 120740117Smckusick } 120840117Smckusick /* Compute new Retransmission TimeOut and clip */ 120941900Smckusick nmp->nm_rto = NFS_RTO(nmp); 121041900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 121141900Smckusick nmp->nm_rto = NFS_MINTIMEO; 121241900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 121341900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 121440117Smckusick 121540117Smckusick /* Update window estimate */ 121641900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 121741900Smckusick nmp->nm_window += 4; 121840117Smckusick else { /* slowly */ 121941900Smckusick register long incr = ++nmp->nm_winext; 122041900Smckusick incr = (incr * incr) / nmp->nm_window; 122140117Smckusick if (incr > 0) { 122241900Smckusick nmp->nm_winext = 0; 122341900Smckusick ++nmp->nm_window; 122440117Smckusick } 122540117Smckusick } 122641900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 122741900Smckusick nmp->nm_window = NFS_MAXWINDOW; 122840117Smckusick } 122940117Smckusick 123041900Smckusick nfs_backofftimer(nmp) 123141900Smckusick register struct nfsmount *nmp; 123240117Smckusick { 123340117Smckusick register unsigned long newrto; 123440117Smckusick 123540117Smckusick /* Clip shift count */ 123641900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 123741900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 123840117Smckusick /* Back off RTO exponentially */ 123941900Smckusick newrto = NFS_RTO(nmp); 124041900Smckusick newrto <<= (nmp->nm_rexmit - 1); 124140117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 124240117Smckusick newrto = NFS_MAXTIMEO; 124341900Smckusick nmp->nm_rto = newrto; 124440117Smckusick 124540117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 124641900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 124741900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 124841900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 124941900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 125041900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 125141900Smckusick nmp->nm_srtt = 0; 125238414Smckusick } 125338414Smckusick } 125438414Smckusick } 125540117Smckusick /* Close down window but remember this point (3/4 current) for later */ 125641900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 125741900Smckusick nmp->nm_window = 1; 125841900Smckusick nmp->nm_winext = 0; 125938414Smckusick } 126038414Smckusick 126138414Smckusick /* 126241900Smckusick * Test for a termination signal pending on procp. 126341900Smckusick * This is used for NFSMNT_INT mounts. 126438414Smckusick */ 126541900Smckusick nfs_sigintr(p) 126641900Smckusick register struct proc *p; 126741900Smckusick { 126841900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 126941900Smckusick NFSINT_SIGMASK)) 127041900Smckusick return (1); 127141900Smckusick else 127241900Smckusick return (0); 127341900Smckusick } 127440117Smckusick 127541900Smckusick /* 127641900Smckusick * Lock a socket against others. 127741900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 127841900Smckusick * and also to avoid race conditions between the processes with nfs requests 127941900Smckusick * in progress when a reconnect is necessary. 128041900Smckusick */ 128143351Smckusick nfs_solock(flagp) 128243351Smckusick register int *flagp; 128338414Smckusick { 128440117Smckusick 128541900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 128641900Smckusick *flagp |= NFSMNT_WANTSCK; 128743351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 128840117Smckusick } 128941900Smckusick *flagp |= NFSMNT_SCKLOCK; 129041900Smckusick } 129140117Smckusick 129241900Smckusick /* 129341900Smckusick * Unlock the stream socket for others. 129441900Smckusick */ 129541900Smckusick nfs_sounlock(flagp) 129643351Smckusick register int *flagp; 129741900Smckusick { 129841900Smckusick 129941900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 130041900Smckusick panic("nfs sounlock"); 130141900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 130241900Smckusick if (*flagp & NFSMNT_WANTSCK) { 130341900Smckusick *flagp &= ~NFSMNT_WANTSCK; 130441900Smckusick wakeup((caddr_t)flagp); 130540117Smckusick } 130638414Smckusick } 130741900Smckusick 130841900Smckusick /* 130941900Smckusick * This function compares two net addresses by family and returns TRUE 131041900Smckusick * if they are the same. 131141900Smckusick * If there is any doubt, return FALSE. 131241900Smckusick */ 131341900Smckusick nfs_netaddr_match(nam1, nam2) 131441900Smckusick struct mbuf *nam1, *nam2; 131541900Smckusick { 131641900Smckusick register struct sockaddr *saddr1, *saddr2; 131741900Smckusick 131841900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 131941900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 132041900Smckusick if (saddr1->sa_family != saddr2->sa_family) 132141900Smckusick return (0); 132241900Smckusick 132341900Smckusick /* 132441900Smckusick * Must do each address family separately since unused fields 132541900Smckusick * are undefined values and not always zeroed. 132641900Smckusick */ 132741900Smckusick switch (saddr1->sa_family) { 132841900Smckusick case AF_INET: 132941900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 133041900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 133141900Smckusick return (1); 133241900Smckusick break; 133341900Smckusick default: 133441900Smckusick break; 133541900Smckusick }; 133641900Smckusick return (0); 133741900Smckusick } 133841900Smckusick 133941900Smckusick /* 134041900Smckusick * Check the hostname fields for nfsd's mask and match fields. 134141900Smckusick * By address family: 134241900Smckusick * - Bitwise AND the mask with the host address field 134341900Smckusick * - Compare for == with match 134441900Smckusick * return TRUE if not equal 134541900Smckusick */ 134641900Smckusick nfs_badnam(nam, msk, mtch) 134741900Smckusick register struct mbuf *nam, *msk, *mtch; 134841900Smckusick { 134941900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 135041900Smckusick case AF_INET: 135141900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 135241900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 135341900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 135441900Smckusick default: 135541900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 135641900Smckusick return (0); 135741900Smckusick }; 135841900Smckusick } 1359