138414Smckusick /* 247574Skarels * Copyright (c) 1989, 1991 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*47737Skarels * @(#)nfs_socket.c 7.21 (Berkeley) 04/02/91 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1738414Smckusick #include "param.h" 1840117Smckusick #include "proc.h" 1938414Smckusick #include "mount.h" 2038414Smckusick #include "kernel.h" 2138414Smckusick #include "malloc.h" 2238414Smckusick #include "mbuf.h" 2338414Smckusick #include "vnode.h" 2438414Smckusick #include "domain.h" 2538414Smckusick #include "protosw.h" 2638414Smckusick #include "socket.h" 2738414Smckusick #include "socketvar.h" 2847574Skarels #include "syslog.h" 29*47737Skarels #include "tprintf.h" 3042877Smckusick #include "../netinet/in.h" 3142877Smckusick #include "../netinet/tcp.h" 3247574Skarels 3338414Smckusick #include "rpcv2.h" 3438414Smckusick #include "nfsv2.h" 3538414Smckusick #include "nfs.h" 3638414Smckusick #include "xdr_subs.h" 3738414Smckusick #include "nfsm_subs.h" 3838414Smckusick #include "nfsmount.h" 3938414Smckusick 4038414Smckusick #define TRUE 1 4143351Smckusick #define FALSE 0 4238414Smckusick 4340117Smckusick /* 4438414Smckusick * External data, mostly RPC constants in XDR form 4538414Smckusick */ 4638414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 4738414Smckusick rpc_msgaccepted, rpc_call; 4838414Smckusick extern u_long nfs_prog, nfs_vers; 4943351Smckusick /* Maybe these should be bits in a u_long ?? */ 5041900Smckusick extern int nonidempotent[NFS_NPROCS]; 5145281Smckusick static int compressrequest[NFS_NPROCS] = { 5245281Smckusick FALSE, 5345281Smckusick TRUE, 5445281Smckusick TRUE, 5545281Smckusick FALSE, 5645281Smckusick TRUE, 5745281Smckusick TRUE, 5845281Smckusick TRUE, 5945281Smckusick FALSE, 6045281Smckusick FALSE, 6145281Smckusick TRUE, 6245281Smckusick TRUE, 6345281Smckusick TRUE, 6445281Smckusick TRUE, 6545281Smckusick TRUE, 6645281Smckusick TRUE, 6745281Smckusick TRUE, 6845281Smckusick TRUE, 6945281Smckusick TRUE, 7045281Smckusick }; 7141900Smckusick int nfs_sbwait(); 7241900Smckusick void nfs_disconnect(); 7345281Smckusick struct mbuf *nfs_compress(), *nfs_uncompress(); 7441900Smckusick 7538414Smckusick int nfsrv_null(), 7638414Smckusick nfsrv_getattr(), 7738414Smckusick nfsrv_setattr(), 7838414Smckusick nfsrv_lookup(), 7938414Smckusick nfsrv_readlink(), 8038414Smckusick nfsrv_read(), 8138414Smckusick nfsrv_write(), 8238414Smckusick nfsrv_create(), 8338414Smckusick nfsrv_remove(), 8438414Smckusick nfsrv_rename(), 8538414Smckusick nfsrv_link(), 8638414Smckusick nfsrv_symlink(), 8738414Smckusick nfsrv_mkdir(), 8838414Smckusick nfsrv_rmdir(), 8938414Smckusick nfsrv_readdir(), 9038414Smckusick nfsrv_statfs(), 9138414Smckusick nfsrv_noop(); 9238414Smckusick 9338414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 9438414Smckusick nfsrv_null, 9538414Smckusick nfsrv_getattr, 9638414Smckusick nfsrv_setattr, 9738414Smckusick nfsrv_noop, 9838414Smckusick nfsrv_lookup, 9938414Smckusick nfsrv_readlink, 10038414Smckusick nfsrv_read, 10138414Smckusick nfsrv_noop, 10238414Smckusick nfsrv_write, 10338414Smckusick nfsrv_create, 10438414Smckusick nfsrv_remove, 10538414Smckusick nfsrv_rename, 10638414Smckusick nfsrv_link, 10738414Smckusick nfsrv_symlink, 10838414Smckusick nfsrv_mkdir, 10938414Smckusick nfsrv_rmdir, 11038414Smckusick nfsrv_readdir, 11138414Smckusick nfsrv_statfs, 11238414Smckusick }; 11338414Smckusick 11440117Smckusick struct nfsreq nfsreqh; 11540117Smckusick int nfsrexmtthresh = NFS_FISHY; 11641900Smckusick int nfs_tcpnodelay = 1; 11738414Smckusick 11838414Smckusick /* 11941900Smckusick * Initialize sockets and congestion for a new NFS connection. 12040117Smckusick * We do not free the sockaddr if error. 12138414Smckusick */ 12241900Smckusick nfs_connect(nmp) 12340117Smckusick register struct nfsmount *nmp; 12440117Smckusick { 12541900Smckusick register struct socket *so; 12641900Smckusick int s, error; 12740117Smckusick struct mbuf *m; 12840117Smckusick 12941900Smckusick nmp->nm_so = (struct socket *)0; 13041900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 13141900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 13240117Smckusick goto bad; 13341900Smckusick so = nmp->nm_so; 13441900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 13540117Smckusick 13641900Smckusick /* 13741900Smckusick * Protocols that do not require connections may be optionally left 13841900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 13941900Smckusick */ 14041900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 14141900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 14241900Smckusick error = ENOTCONN; 14340117Smckusick goto bad; 14440117Smckusick } 14541900Smckusick } else { 14641900Smckusick if (error = soconnect(so, nmp->nm_nam)) 14740117Smckusick goto bad; 14841900Smckusick 14941900Smckusick /* 15041900Smckusick * Wait for the connection to complete. Cribbed from the 15141900Smckusick * connect system call but with the wait at negative prio. 15241900Smckusick */ 15341900Smckusick s = splnet(); 15441900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 15543351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 15641900Smckusick splx(s); 15741900Smckusick if (so->so_error) { 15841900Smckusick error = so->so_error; 15941900Smckusick goto bad; 16041900Smckusick } 16140117Smckusick } 16241900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 16343351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 16441900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16541900Smckusick so->so_snd.sb_timeo = (5 * hz); 16641900Smckusick } else { 16741900Smckusick so->so_rcv.sb_timeo = 0; 16841900Smckusick so->so_snd.sb_timeo = 0; 16941900Smckusick } 17047574Skarels if (error = soreserve(so, 17147574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR), NFS_MAXPACKET), 17247574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR), NFS_MAXPACKET))) 17341900Smckusick goto bad; 17441900Smckusick } else { 17543351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 17641900Smckusick so->so_rcv.sb_timeo = (5 * hz); 17741900Smckusick so->so_snd.sb_timeo = (5 * hz); 17841900Smckusick } else { 17941900Smckusick so->so_rcv.sb_timeo = 0; 18041900Smckusick so->so_snd.sb_timeo = 0; 18141900Smckusick } 18241900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 18341900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18441900Smckusick *mtod(m, int *) = 1; 18541900Smckusick m->m_len = sizeof(int); 18641900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 18741900Smckusick } 18841900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 18941900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 19041900Smckusick nfs_tcpnodelay) { 19141900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 19241900Smckusick *mtod(m, int *) = 1; 19341900Smckusick m->m_len = sizeof(int); 19441900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 19541900Smckusick } 19641900Smckusick if (error = soreserve(so, 19747574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)), 19847574Skarels NFS_MAXPACKET + sizeof(u_long)), 19947574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long)), 20047574Skarels NFS_MAXPACKET + sizeof(u_long)))) 20141900Smckusick goto bad; 20241900Smckusick } 20341900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 20441900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 20540117Smckusick 20641900Smckusick /* Initialize other non-zero congestion variables */ 20741900Smckusick nmp->nm_rto = NFS_TIMEO; 20841900Smckusick nmp->nm_window = 2; /* Initial send window */ 20941900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 21041900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 21141900Smckusick nmp->nm_sent = 0; 21241900Smckusick nmp->nm_currexmit = 0; 21341900Smckusick return (0); 21440117Smckusick 21541900Smckusick bad: 21641900Smckusick nfs_disconnect(nmp); 21741900Smckusick return (error); 21841900Smckusick } 21940117Smckusick 22041900Smckusick /* 22141900Smckusick * Reconnect routine: 22241900Smckusick * Called when a connection is broken on a reliable protocol. 22341900Smckusick * - clean up the old socket 22441900Smckusick * - nfs_connect() again 22541900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 22641900Smckusick * If this fails the mount point is DEAD! 22741900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 22841900Smckusick */ 22941900Smckusick nfs_reconnect(rep, nmp) 23041900Smckusick register struct nfsreq *rep; 23141900Smckusick register struct nfsmount *nmp; 23241900Smckusick { 23341900Smckusick register struct nfsreq *rp; 23441900Smckusick int error; 23540117Smckusick 236*47737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 237*47737Skarels "trying reconnect"); 23841900Smckusick while (error = nfs_connect(nmp)) { 23942243Smckusick #ifdef lint 24042243Smckusick error = error; 24142243Smckusick #endif /* lint */ 24241900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 24341900Smckusick return (EINTR); 24443351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 24540117Smckusick } 246*47737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 247*47737Skarels "reconnected"); 24841900Smckusick 24941900Smckusick /* 25041900Smckusick * Loop through outstanding request list and fix up all requests 25141900Smckusick * on old socket. 25241900Smckusick */ 25341900Smckusick rp = nfsreqh.r_next; 25441900Smckusick while (rp != &nfsreqh) { 25541900Smckusick if (rp->r_nmp == nmp) 25641900Smckusick rp->r_flags |= R_MUSTRESEND; 25741900Smckusick rp = rp->r_next; 25840117Smckusick } 25940117Smckusick return (0); 26040117Smckusick } 26140117Smckusick 26240117Smckusick /* 26340117Smckusick * NFS disconnect. Clean up and unlink. 26440117Smckusick */ 26541900Smckusick void 26640117Smckusick nfs_disconnect(nmp) 26740117Smckusick register struct nfsmount *nmp; 26840117Smckusick { 26941900Smckusick register struct socket *so; 27040117Smckusick 27141900Smckusick if (nmp->nm_so) { 27241900Smckusick so = nmp->nm_so; 27341900Smckusick nmp->nm_so = (struct socket *)0; 27441900Smckusick soshutdown(so, 2); 27541900Smckusick soclose(so); 27640117Smckusick } 27740117Smckusick } 27840117Smckusick 27940117Smckusick /* 28041900Smckusick * This is the nfs send routine. For connection based socket types, it 28141900Smckusick * must be called with an nfs_solock() on the socket. 28241900Smckusick * "rep == NULL" indicates that it has been called from a server. 28340117Smckusick */ 28441900Smckusick nfs_send(so, nam, top, rep) 28538414Smckusick register struct socket *so; 28638414Smckusick struct mbuf *nam; 28741900Smckusick register struct mbuf *top; 28841900Smckusick struct nfsreq *rep; 28938414Smckusick { 29041900Smckusick struct mbuf *sendnam; 29141900Smckusick int error, soflags; 29238414Smckusick 29341900Smckusick if (rep) { 29441900Smckusick if (rep->r_flags & R_SOFTTERM) { 29540117Smckusick m_freem(top); 29641900Smckusick return (EINTR); 29740117Smckusick } 29843062Smckusick if (rep->r_nmp->nm_so == NULL && 29941900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 30041900Smckusick return (error); 30141900Smckusick rep->r_flags &= ~R_MUSTRESEND; 30243062Smckusick so = rep->r_nmp->nm_so; 30341900Smckusick soflags = rep->r_nmp->nm_soflags; 30441900Smckusick } else 30541900Smckusick soflags = so->so_proto->pr_flags; 30641900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 30741900Smckusick sendnam = (struct mbuf *)0; 30841900Smckusick else 30941900Smckusick sendnam = nam; 31041900Smckusick 31141900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 31241900Smckusick (struct mbuf *)0, 0); 31341900Smckusick if (error == EWOULDBLOCK && rep) { 31441900Smckusick if (rep->r_flags & R_SOFTTERM) 31541900Smckusick error = EINTR; 31641900Smckusick else { 31741900Smckusick rep->r_flags |= R_MUSTRESEND; 31841900Smckusick error = 0; 31940117Smckusick } 32038414Smckusick } 32141900Smckusick /* 32241900Smckusick * Ignore socket errors?? 32341900Smckusick */ 32441900Smckusick if (error && error != EINTR && error != ERESTART) 32541900Smckusick error = 0; 32638414Smckusick return (error); 32738414Smckusick } 32838414Smckusick 32938414Smckusick /* 33041900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 33141900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 33241900Smckusick * Mark and consolidate the data into a new mbuf list. 33341900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 33441900Smckusick * small mbufs. 33541900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 33641900Smckusick * we have read any of it, even if the system call has been interrupted. 33738414Smckusick */ 33841900Smckusick nfs_receive(so, aname, mp, rep) 33938414Smckusick register struct socket *so; 34038414Smckusick struct mbuf **aname; 34138414Smckusick struct mbuf **mp; 34241900Smckusick register struct nfsreq *rep; 34338414Smckusick { 34441900Smckusick struct uio auio; 34541900Smckusick struct iovec aio; 34638414Smckusick register struct mbuf *m; 34745281Smckusick struct mbuf *m2, *mnew, **mbp; 34841900Smckusick caddr_t fcp, tcp; 34941900Smckusick u_long len; 35041900Smckusick struct mbuf **getnam; 351*47737Skarels int error, siz, mlen, soflags, rcvflg; 35238414Smckusick 35341900Smckusick /* 35441900Smckusick * Set up arguments for soreceive() 35541900Smckusick */ 35641900Smckusick *mp = (struct mbuf *)0; 35741900Smckusick *aname = (struct mbuf *)0; 35841900Smckusick if (rep) 35941900Smckusick soflags = rep->r_nmp->nm_soflags; 36041900Smckusick else 36141900Smckusick soflags = so->so_proto->pr_flags; 36238414Smckusick 36341900Smckusick /* 36441900Smckusick * For reliable protocols, lock against other senders/receivers 36541900Smckusick * in case a reconnect is necessary. 36641900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 36741900Smckusick * more there is to get. 36841900Smckusick * We must lock the socket against other receivers 36941900Smckusick * until we have an entire rpc request/reply. 37041900Smckusick */ 37141900Smckusick if (soflags & PR_CONNREQUIRED) { 37241900Smckusick tryagain: 37341900Smckusick /* 37441900Smckusick * Check for fatal errors and resending request. 37541900Smckusick */ 37641900Smckusick if (rep) { 37741900Smckusick /* 37841900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 37941900Smckusick * would have changed. NULL indicates a failed 38041900Smckusick * attempt that has essentially shut down this 38141900Smckusick * mount point. 38241900Smckusick */ 38341900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 38441900Smckusick (rep->r_flags & R_SOFTTERM)) 38541900Smckusick return (EINTR); 38641900Smckusick while (rep->r_flags & R_MUSTRESEND) { 38741900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 38841900Smckusick nfsstats.rpcretries++; 38941900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 39041900Smckusick rep)) 39141900Smckusick goto errout; 39240117Smckusick } 39341900Smckusick } 39441900Smckusick if ((soflags & PR_ATOMIC) == 0) { 39541900Smckusick aio.iov_base = (caddr_t) &len; 39641900Smckusick aio.iov_len = sizeof(u_long); 39741900Smckusick auio.uio_iov = &aio; 39841900Smckusick auio.uio_iovcnt = 1; 39941900Smckusick auio.uio_segflg = UIO_SYSSPACE; 40041900Smckusick auio.uio_rw = UIO_READ; 40141900Smckusick auio.uio_offset = 0; 40241900Smckusick auio.uio_resid = sizeof(u_long); 40341900Smckusick do { 404*47737Skarels rcvflg = MSG_WAITALL; 405*47737Skarels error = soreceive(so, (struct mbuf **)0, &auio, 40641900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 407*47737Skarels if (error == EWOULDBLOCK && rep) { 40841900Smckusick if (rep->r_flags & R_SOFTTERM) 40941900Smckusick return (EINTR); 41041900Smckusick if (rep->r_flags & R_MUSTRESEND) 41141900Smckusick goto tryagain; 412*47737Skarels } 41341900Smckusick } while (error == EWOULDBLOCK); 414*47737Skarels if (!error && auio.uio_resid > 0) { 415*47737Skarels if (rep) 416*47737Skarels log(LOG_INFO, 417*47737Skarels "short receive (%d/%d) from nfs server %s\n", 418*47737Skarels sizeof(u_long) - auio.uio_resid, 419*47737Skarels sizeof(u_long), 420*47737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 421*47737Skarels error = EPIPE; 422*47737Skarels } 42340761Skarels if (error) 42441900Smckusick goto errout; 42541900Smckusick len = ntohl(len) & ~0x80000000; 42641900Smckusick /* 42741900Smckusick * This is SERIOUS! We are out of sync with the sender 42841900Smckusick * and forcing a disconnect/reconnect is all I can do. 42941900Smckusick */ 43041900Smckusick if (len > NFS_MAXPACKET) { 431*47737Skarels if (rep) 432*47737Skarels log(LOG_ERR, "%s (%d) from nfs server %s\n", 433*47737Skarels "impossible packet length", 434*47737Skarels len, 435*47737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 436*47737Skarels error = EFBIG; 437*47737Skarels goto errout; 43841900Smckusick } 43941900Smckusick auio.uio_resid = len; 44041900Smckusick do { 441*47737Skarels rcvflg = MSG_WAITALL; 44241900Smckusick error = soreceive(so, (struct mbuf **)0, 44341900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 44441900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 44541900Smckusick error == ERESTART); 446*47737Skarels if (!error && auio.uio_resid > 0) { 447*47737Skarels if (rep) 448*47737Skarels log(LOG_INFO, 449*47737Skarels "short receive (%d/%d) from nfs server %s\n", 450*47737Skarels len - auio.uio_resid, len, 451*47737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 452*47737Skarels error = EPIPE; 453*47737Skarels } 45440117Smckusick } else { 45541900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 45641900Smckusick do { 457*47737Skarels rcvflg = 0; 45841900Smckusick error = soreceive(so, (struct mbuf **)0, 45941900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 46041900Smckusick if (error == EWOULDBLOCK && rep) { 46141900Smckusick if (rep->r_flags & R_SOFTTERM) 46241900Smckusick return (EINTR); 46341900Smckusick if (rep->r_flags & R_MUSTRESEND) 46441900Smckusick goto tryagain; 46541900Smckusick } 46641900Smckusick } while (error == EWOULDBLOCK); 46741900Smckusick if (!error && *mp == NULL) 46841900Smckusick error = EPIPE; 46941900Smckusick len -= auio.uio_resid; 47040117Smckusick } 47141900Smckusick errout: 47241900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 47341900Smckusick m_freem(*mp); 47441900Smckusick *mp = (struct mbuf *)0; 475*47737Skarels if (error != EPIPE && rep) 476*47737Skarels log(LOG_INFO, 477*47737Skarels "receive error %d from nfs server %s\n", 478*47737Skarels error, 479*47737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 48041900Smckusick nfs_disconnect(rep->r_nmp); 48141900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 48241900Smckusick if (!error) 48341900Smckusick goto tryagain; 48440117Smckusick } 48541900Smckusick } else { 48641900Smckusick if (so->so_state & SS_ISCONNECTED) 48741900Smckusick getnam = (struct mbuf **)0; 48841900Smckusick else 48941900Smckusick getnam = aname; 49041900Smckusick auio.uio_resid = len = 1000000; 49141900Smckusick do { 492*47737Skarels rcvflg = 0; 49341900Smckusick error = soreceive(so, getnam, &auio, mp, 49441900Smckusick (struct mbuf **)0, &rcvflg); 49541900Smckusick if (error == EWOULDBLOCK && rep && 49641900Smckusick (rep->r_flags & R_SOFTTERM)) 49741900Smckusick return (EINTR); 49841900Smckusick } while (error == EWOULDBLOCK); 49941900Smckusick len -= auio.uio_resid; 50041900Smckusick } 50141900Smckusick if (error) { 50241900Smckusick m_freem(*mp); 50341900Smckusick *mp = (struct mbuf *)0; 50441900Smckusick } 50541900Smckusick /* 50641900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 50741900Smckusick * These could cause pointer alignment problems, so copy them to 50841900Smckusick * well aligned mbufs. 50941900Smckusick */ 51041900Smckusick m = *mp; 51141900Smckusick mbp = mp; 51241900Smckusick while (m) { 51341900Smckusick /* 51441900Smckusick * All this for something that may never happen. 51541900Smckusick */ 51645281Smckusick if (m->m_next && (m->m_len & 0x3)) { 51741900Smckusick printf("nfs_rcv odd length!\n"); 51842243Smckusick mlen = 0; 51941900Smckusick while (m) { 52045281Smckusick fcp = mtod(m, caddr_t); 52145281Smckusick while (m->m_len > 0) { 52245281Smckusick if (mlen == 0) { 52345281Smckusick MGET(m2, M_WAIT, MT_DATA); 52445281Smckusick if (len >= MINCLSIZE) 52545281Smckusick MCLGET(m2, M_WAIT); 52645281Smckusick m2->m_len = 0; 52745281Smckusick mlen = M_TRAILINGSPACE(m2); 52845281Smckusick tcp = mtod(m2, caddr_t); 52945281Smckusick *mbp = m2; 53045281Smckusick mbp = &m2->m_next; 53145281Smckusick } 53245281Smckusick siz = MIN(mlen, m->m_len); 53345281Smckusick bcopy(fcp, tcp, siz); 53445281Smckusick m2->m_len += siz; 53545281Smckusick mlen -= siz; 53645281Smckusick len -= siz; 53745281Smckusick tcp += siz; 53845281Smckusick m->m_len -= siz; 53945281Smckusick fcp += siz; 54041900Smckusick } 54145281Smckusick MFREE(m, mnew); 54245281Smckusick m = mnew; 54341900Smckusick } 54441900Smckusick break; 54540117Smckusick } 54641900Smckusick len -= m->m_len; 54741900Smckusick mbp = &m->m_next; 54841900Smckusick m = m->m_next; 54938414Smckusick } 55038414Smckusick return (error); 55138414Smckusick } 55238414Smckusick 55338414Smckusick /* 55441900Smckusick * Implement receipt of reply on a socket. 55538414Smckusick * We must search through the list of received datagrams matching them 55638414Smckusick * with outstanding requests using the xid, until ours is found. 55738414Smckusick */ 55841900Smckusick /* ARGSUSED */ 55941900Smckusick nfs_reply(nmp, myrep) 56041900Smckusick struct nfsmount *nmp; 56139344Smckusick struct nfsreq *myrep; 56238414Smckusick { 56338414Smckusick register struct mbuf *m; 56438414Smckusick register struct nfsreq *rep; 56541900Smckusick register int error = 0; 56645281Smckusick u_long rxid; 56741900Smckusick struct mbuf *mp, *nam; 56841900Smckusick char *cp; 56941900Smckusick int cnt, xfer; 57038414Smckusick 57141900Smckusick /* 57241900Smckusick * Loop around until we get our own reply 57341900Smckusick */ 57441900Smckusick for (;;) { 57541900Smckusick /* 57641900Smckusick * Lock against other receivers so that I don't get stuck in 57741900Smckusick * sbwait() after someone else has received my reply for me. 57841900Smckusick * Also necessary for connection based protocols to avoid 57941900Smckusick * race conditions during a reconnect. 58041900Smckusick */ 58143351Smckusick nfs_solock(&nmp->nm_flag); 58241900Smckusick /* Already received, bye bye */ 58341900Smckusick if (myrep->r_mrep != NULL) { 58441900Smckusick nfs_sounlock(&nmp->nm_flag); 58541900Smckusick return (0); 58640117Smckusick } 58741900Smckusick /* 58841900Smckusick * Get the next Rpc reply off the socket 58941900Smckusick */ 59041900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 59141900Smckusick nfs_sounlock(&nmp->nm_flag); 59238414Smckusick 59341900Smckusick /* 59441900Smckusick * Ignore routing errors on connectionless protocols?? 59541900Smckusick */ 59641900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 59741900Smckusick nmp->nm_so->so_error = 0; 59841900Smckusick continue; 59941900Smckusick } 60041900Smckusick 60141900Smckusick /* 60241900Smckusick * Otherwise cleanup and return a fatal error. 60341900Smckusick */ 60441900Smckusick if (myrep->r_flags & R_TIMING) { 60541900Smckusick myrep->r_flags &= ~R_TIMING; 60641900Smckusick nmp->nm_rtt = -1; 60741900Smckusick } 60841900Smckusick if (myrep->r_flags & R_SENT) { 60941900Smckusick myrep->r_flags &= ~R_SENT; 61041900Smckusick nmp->nm_sent--; 61141900Smckusick } 61241900Smckusick return (error); 61338414Smckusick } 61441900Smckusick 61541900Smckusick /* 61641900Smckusick * Get the xid and check that it is an rpc reply 61741900Smckusick */ 61841900Smckusick m = mp; 61945281Smckusick while (m && m->m_len == 0) 62045281Smckusick m = m->m_next; 62145281Smckusick if (m == NULL) { 62240117Smckusick nfsstats.rpcinvalid++; 62341900Smckusick m_freem(mp); 62441900Smckusick nfs_sounlock(&nmp->nm_flag); 62541900Smckusick continue; 62638414Smckusick } 62745281Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED); 62841900Smckusick /* 62941900Smckusick * Loop through the request list to match up the reply 63041900Smckusick * Iff no match, just drop the datagram 63141900Smckusick */ 63241900Smckusick m = mp; 63341900Smckusick rep = nfsreqh.r_next; 63441900Smckusick while (rep != &nfsreqh) { 63545281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 63641900Smckusick /* Found it.. */ 63741900Smckusick rep->r_mrep = m; 63841900Smckusick /* 63941900Smckusick * Update timing 64041900Smckusick */ 64141900Smckusick if (rep->r_flags & R_TIMING) { 64241900Smckusick nfs_updatetimer(rep->r_nmp); 64341900Smckusick rep->r_flags &= ~R_TIMING; 64441900Smckusick rep->r_nmp->nm_rtt = -1; 64541900Smckusick } 64641900Smckusick if (rep->r_flags & R_SENT) { 64741900Smckusick rep->r_flags &= ~R_SENT; 64841900Smckusick rep->r_nmp->nm_sent--; 64941900Smckusick } 65040117Smckusick break; 65138414Smckusick } 65241900Smckusick rep = rep->r_next; 65338414Smckusick } 65441900Smckusick nfs_sounlock(&nmp->nm_flag); 65541900Smckusick if (nam) 65641900Smckusick m_freem(nam); 65741900Smckusick /* 65841900Smckusick * If not matched to a request, drop it. 65941900Smckusick * If it's mine, get out. 66041900Smckusick */ 66141900Smckusick if (rep == &nfsreqh) { 66241900Smckusick nfsstats.rpcunexpected++; 66341900Smckusick m_freem(m); 66441900Smckusick } else if (rep == myrep) 66541900Smckusick return (0); 66638414Smckusick } 66738414Smckusick } 66838414Smckusick 66938414Smckusick /* 67038414Smckusick * nfs_request - goes something like this 67138414Smckusick * - fill in request struct 67238414Smckusick * - links it into list 67341900Smckusick * - calls nfs_send() for first transmit 67441900Smckusick * - calls nfs_receive() to get reply 67538414Smckusick * - break down rpc header and return with nfs reply pointed to 67638414Smckusick * by mrep or error 67738414Smckusick * nb: always frees up mreq mbuf list 67838414Smckusick */ 67943351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 68038414Smckusick struct vnode *vp; 68138414Smckusick struct mbuf *mreq; 68238414Smckusick u_long xid; 68341900Smckusick int procnum; 68441900Smckusick struct proc *procp; 68543351Smckusick int tryhard; 68638414Smckusick struct mount *mp; 68738414Smckusick struct mbuf **mrp; 68838414Smckusick struct mbuf **mdp; 68938414Smckusick caddr_t *dposp; 69038414Smckusick { 69138414Smckusick register struct mbuf *m, *mrep; 69238414Smckusick register struct nfsreq *rep; 69338414Smckusick register u_long *p; 69438414Smckusick register int len; 69541900Smckusick struct nfsmount *nmp; 69638414Smckusick struct mbuf *md; 69739344Smckusick struct nfsreq *reph; 69838414Smckusick caddr_t dpos; 69938414Smckusick char *cp2; 70038414Smckusick int t1; 70145281Smckusick int s, compressed; 70241900Smckusick int error = 0; 70338414Smckusick 70441900Smckusick nmp = VFSTONFS(mp); 70538414Smckusick m = mreq; 70638414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70738414Smckusick rep->r_xid = xid; 70841900Smckusick rep->r_nmp = nmp; 70938414Smckusick rep->r_vp = vp; 71041900Smckusick rep->r_procp = procp; 71143351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 71243351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 71341900Smckusick rep->r_retry = nmp->nm_retry; 71438414Smckusick else 71540117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71640117Smckusick rep->r_flags = rep->r_rexmit = 0; 71741900Smckusick /* 71841900Smckusick * Three cases: 71941900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 72041900Smckusick * - idempotent requests on SOCK_DGRAM use 0 72141900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 72241900Smckusick * Timeouts are still done on reliable transports to ensure detection 72343351Smckusick * of excessive connection delay. 72441900Smckusick */ 72541900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 72641900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 72741900Smckusick else if (nonidempotent[procnum]) 72841900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 72941900Smckusick else 73041900Smckusick rep->r_timerinit = 0; 73141900Smckusick rep->r_timer = rep->r_timerinit; 73238414Smckusick rep->r_mrep = NULL; 73338414Smckusick len = 0; 73438414Smckusick while (m) { 73538414Smckusick len += m->m_len; 73638414Smckusick m = m->m_next; 73738414Smckusick } 73841900Smckusick mreq->m_pkthdr.len = len; 73941900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 74045281Smckusick compressed = 0; 74145281Smckusick m = mreq; 74245281Smckusick if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) { 74345281Smckusick mreq = nfs_compress(mreq); 74445281Smckusick if (mreq != m) { 74545281Smckusick len = mreq->m_pkthdr.len; 74645281Smckusick compressed++; 74745281Smckusick } 74845281Smckusick } 74941900Smckusick /* 75041900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 75141900Smckusick */ 75241900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 75341900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 75441900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 75541900Smckusick } 75641900Smckusick rep->r_mreq = mreq; 75738414Smckusick 75840117Smckusick /* 75940117Smckusick * Do the client side RPC. 76040117Smckusick */ 76140117Smckusick nfsstats.rpcrequests++; 76241900Smckusick /* 76341900Smckusick * Chain request into list of outstanding requests. Be sure 76441900Smckusick * to put it LAST so timer finds oldest requests first. 76541900Smckusick */ 76640117Smckusick s = splnet(); 76739344Smckusick reph = &nfsreqh; 76841900Smckusick reph->r_prev->r_next = rep; 76941900Smckusick rep->r_prev = reph->r_prev; 77039344Smckusick reph->r_prev = rep; 77139344Smckusick rep->r_next = reph; 77240117Smckusick /* 77340117Smckusick * If backing off another request or avoiding congestion, don't 77440117Smckusick * send this one now but let timer do it. If not timing a request, 77540117Smckusick * do it now. 77640117Smckusick */ 77741900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 77841900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 77941900Smckusick nmp->nm_sent++; 78041900Smckusick rep->r_flags |= R_SENT; 78141900Smckusick if (nmp->nm_rtt == -1) { 78241900Smckusick nmp->nm_rtt = 0; 78341900Smckusick rep->r_flags |= R_TIMING; 78441900Smckusick } 78540117Smckusick splx(s); 78641900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 78741900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 78843351Smckusick nfs_solock(&nmp->nm_flag); 78941900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 79041900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 79141900Smckusick nfs_sounlock(&nmp->nm_flag); 79241900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 79341900Smckusick nmp->nm_so->so_error = error = 0; 79441900Smckusick } else 79541900Smckusick splx(s); 79638414Smckusick 79738414Smckusick /* 79840117Smckusick * Wait for the reply from our send or the timer's. 79940117Smckusick */ 80041900Smckusick if (!error) 80141900Smckusick error = nfs_reply(nmp, rep); 80238414Smckusick 80340117Smckusick /* 80440117Smckusick * RPC done, unlink the request. 80540117Smckusick */ 80638414Smckusick s = splnet(); 80738414Smckusick rep->r_prev->r_next = rep->r_next; 80839344Smckusick rep->r_next->r_prev = rep->r_prev; 80938414Smckusick splx(s); 81041900Smckusick 81141900Smckusick /* 81241900Smckusick * If there was a successful reply and a tprintf msg. 81341900Smckusick * tprintf a response. 81441900Smckusick */ 815*47737Skarels if (!error && (rep->r_flags & R_TPRINTFMSG)) 816*47737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 817*47737Skarels "is alive again"); 81838414Smckusick m_freem(rep->r_mreq); 81945281Smckusick mrep = rep->r_mrep; 82038414Smckusick FREE((caddr_t)rep, M_NFSREQ); 82138414Smckusick if (error) 82238414Smckusick return (error); 82338414Smckusick 82445281Smckusick if (compressed) 82545281Smckusick mrep = nfs_uncompress(mrep); 82645281Smckusick md = mrep; 82738414Smckusick /* 82838414Smckusick * break down the rpc header and check if ok 82938414Smckusick */ 83038414Smckusick dpos = mtod(md, caddr_t); 83138414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 83238414Smckusick p += 2; 83338414Smckusick if (*p++ == rpc_msgdenied) { 83438414Smckusick if (*p == rpc_mismatch) 83538414Smckusick error = EOPNOTSUPP; 83638414Smckusick else 83738414Smckusick error = EACCES; 83838414Smckusick m_freem(mrep); 83938414Smckusick return (error); 84038414Smckusick } 84138414Smckusick /* 84238414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 84338414Smckusick * for nfs_reqhead(), but for now just dump it 84438414Smckusick */ 84538414Smckusick if (*++p != 0) { 84638414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 84738414Smckusick nfsm_adv(len); 84838414Smckusick } 84938414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 85038414Smckusick /* 0 == ok */ 85138414Smckusick if (*p == 0) { 85238414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 85338414Smckusick if (*p != 0) { 85438414Smckusick error = fxdr_unsigned(int, *p); 85538414Smckusick m_freem(mrep); 85638414Smckusick return (error); 85738414Smckusick } 85838414Smckusick *mrp = mrep; 85938414Smckusick *mdp = md; 86038414Smckusick *dposp = dpos; 86138414Smckusick return (0); 86238414Smckusick } 86338414Smckusick m_freem(mrep); 86438414Smckusick return (EPROTONOSUPPORT); 86538414Smckusick nfsmout: 86638414Smckusick return (error); 86738414Smckusick } 86838414Smckusick 86938414Smckusick /* 87038414Smckusick * Get a request for the server main loop 87138414Smckusick * - receive a request via. nfs_soreceive() 87238414Smckusick * - verify it 87338414Smckusick * - fill in the cred struct. 87438414Smckusick */ 87542243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 87645281Smckusick msk, mtch, wascomp) 87738414Smckusick struct socket *so; 87838414Smckusick u_long prog; 87938414Smckusick u_long vers; 88038414Smckusick int maxproc; 88138414Smckusick struct mbuf **nam; 88238414Smckusick struct mbuf **mrp; 88338414Smckusick struct mbuf **mdp; 88438414Smckusick caddr_t *dposp; 88538414Smckusick u_long *retxid; 88642243Smckusick u_long *procnum; 88738414Smckusick register struct ucred *cr; 88841900Smckusick struct mbuf *msk, *mtch; 88945281Smckusick int *wascomp; 89038414Smckusick { 89138414Smckusick register int i; 89239494Smckusick register u_long *p; 89339494Smckusick register long t1; 89439494Smckusick caddr_t dpos, cp2; 89539494Smckusick int error = 0; 89639494Smckusick struct mbuf *mrep, *md; 89739494Smckusick int len; 89838414Smckusick 89941900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 90041900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90141900Smckusick } else { 90241900Smckusick mrep = (struct mbuf *)0; 90341900Smckusick do { 90441900Smckusick if (mrep) { 90541900Smckusick m_freem(*nam); 90641900Smckusick m_freem(mrep); 90741900Smckusick } 90841900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90941900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 91041900Smckusick } 91141900Smckusick if (error) 91238414Smckusick return (error); 91338414Smckusick md = mrep; 91445281Smckusick mrep = nfs_uncompress(mrep); 91545281Smckusick if (mrep != md) { 91645281Smckusick *wascomp = 1; 91745281Smckusick md = mrep; 91845281Smckusick } else 91945281Smckusick *wascomp = 0; 92038414Smckusick dpos = mtod(mrep, caddr_t); 92138414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 92238414Smckusick *retxid = *p++; 92338414Smckusick if (*p++ != rpc_call) { 92438414Smckusick m_freem(mrep); 92538414Smckusick return (ERPCMISMATCH); 92638414Smckusick } 92738414Smckusick if (*p++ != rpc_vers) { 92838414Smckusick m_freem(mrep); 92938414Smckusick return (ERPCMISMATCH); 93038414Smckusick } 93138414Smckusick if (*p++ != prog) { 93238414Smckusick m_freem(mrep); 93338414Smckusick return (EPROGUNAVAIL); 93438414Smckusick } 93538414Smckusick if (*p++ != vers) { 93638414Smckusick m_freem(mrep); 93738414Smckusick return (EPROGMISMATCH); 93838414Smckusick } 93942243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 94042243Smckusick if (*procnum == NFSPROC_NULL) { 94138414Smckusick *mrp = mrep; 94238414Smckusick return (0); 94338414Smckusick } 94442243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 94538414Smckusick m_freem(mrep); 94638414Smckusick return (EPROCUNAVAIL); 94738414Smckusick } 94841900Smckusick len = fxdr_unsigned(int, *p++); 94941900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 95041900Smckusick m_freem(mrep); 95141900Smckusick return (EBADRPC); 95241900Smckusick } 95339494Smckusick len = fxdr_unsigned(int, *++p); 95441900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 95541900Smckusick m_freem(mrep); 95641900Smckusick return (EBADRPC); 95741900Smckusick } 95839494Smckusick nfsm_adv(nfsm_rndup(len)); 95938414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 96038414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 96138414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 96239494Smckusick len = fxdr_unsigned(int, *p); 96341900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 96438414Smckusick m_freem(mrep); 96538414Smckusick return (EBADRPC); 96638414Smckusick } 96739494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 96839494Smckusick for (i = 1; i <= len; i++) 96941900Smckusick if (i < NGROUPS) 97041900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 97141900Smckusick else 97241900Smckusick p++; 97341900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 97438414Smckusick /* 97538414Smckusick * Do we have any use for the verifier. 97638414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 97738414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 97838414Smckusick * For now, just skip over it 97938414Smckusick */ 98039494Smckusick len = fxdr_unsigned(int, *++p); 98141900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 98241900Smckusick m_freem(mrep); 98341900Smckusick return (EBADRPC); 98441900Smckusick } 98539494Smckusick if (len > 0) 98639494Smckusick nfsm_adv(nfsm_rndup(len)); 98738414Smckusick *mrp = mrep; 98838414Smckusick *mdp = md; 98938414Smckusick *dposp = dpos; 99038414Smckusick return (0); 99138414Smckusick nfsmout: 99238414Smckusick return (error); 99338414Smckusick } 99438414Smckusick 99538414Smckusick /* 99638414Smckusick * Generate the rpc reply header 99738414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 99838414Smckusick */ 99938414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 100038414Smckusick int siz; 100138414Smckusick u_long retxid; 100238414Smckusick int err; 100338414Smckusick struct mbuf **mrq; 100438414Smckusick struct mbuf **mbp; 100538414Smckusick caddr_t *bposp; 100638414Smckusick { 100739494Smckusick register u_long *p; 100839494Smckusick register long t1; 100939494Smckusick caddr_t bpos; 101039494Smckusick struct mbuf *mreq, *mb, *mb2; 101138414Smckusick 101238414Smckusick NFSMGETHDR(mreq); 101338414Smckusick mb = mreq; 101438414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 101541900Smckusick MCLGET(mreq, M_WAIT); 101638414Smckusick p = mtod(mreq, u_long *); 101738414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 101838414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 101938414Smckusick *p++ = retxid; 102038414Smckusick *p++ = rpc_reply; 102138414Smckusick if (err == ERPCMISMATCH) { 102238414Smckusick *p++ = rpc_msgdenied; 102338414Smckusick *p++ = rpc_mismatch; 102438414Smckusick *p++ = txdr_unsigned(2); 102538414Smckusick *p = txdr_unsigned(2); 102638414Smckusick } else { 102738414Smckusick *p++ = rpc_msgaccepted; 102838414Smckusick *p++ = 0; 102938414Smckusick *p++ = 0; 103038414Smckusick switch (err) { 103138414Smckusick case EPROGUNAVAIL: 103238414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 103338414Smckusick break; 103438414Smckusick case EPROGMISMATCH: 103538414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 103638414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 103738414Smckusick *p++ = txdr_unsigned(2); 103838414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 103938414Smckusick break; 104038414Smckusick case EPROCUNAVAIL: 104138414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 104238414Smckusick break; 104338414Smckusick default: 104438414Smckusick *p = 0; 104538414Smckusick if (err != VNOVAL) { 104638414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 104738414Smckusick *p = txdr_unsigned(err); 104838414Smckusick } 104938414Smckusick break; 105038414Smckusick }; 105138414Smckusick } 105238414Smckusick *mrq = mreq; 105338414Smckusick *mbp = mb; 105438414Smckusick *bposp = bpos; 105538414Smckusick if (err != 0 && err != VNOVAL) 105638414Smckusick nfsstats.srvrpc_errs++; 105738414Smckusick return (0); 105838414Smckusick } 105938414Smckusick 106038414Smckusick /* 106138414Smckusick * Nfs timer routine 106238414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 106338414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 106440117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 106538414Smckusick */ 106638414Smckusick nfs_timer() 106738414Smckusick { 106838414Smckusick register struct nfsreq *rep; 106938414Smckusick register struct mbuf *m; 107038414Smckusick register struct socket *so; 107141900Smckusick register struct nfsmount *nmp; 107240117Smckusick int s, error; 107338414Smckusick 107438414Smckusick s = splnet(); 107541900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 107641900Smckusick nmp = rep->r_nmp; 107741900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 107841900Smckusick (so = nmp->nm_so) == NULL) 107941900Smckusick continue; 108041900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 108141900Smckusick rep->r_flags |= R_SOFTTERM; 108241900Smckusick continue; 108341900Smckusick } 108440117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 108541900Smckusick nmp->nm_rtt++; 108641900Smckusick /* If not timed out */ 108741900Smckusick if (++rep->r_timer < nmp->nm_rto) 108841900Smckusick continue; 108940117Smckusick /* Do backoff and save new timeout in mount */ 109040117Smckusick if (rep->r_flags & R_TIMING) { 109141900Smckusick nfs_backofftimer(nmp); 109240117Smckusick rep->r_flags &= ~R_TIMING; 109341900Smckusick nmp->nm_rtt = -1; 109440117Smckusick } 109540117Smckusick if (rep->r_flags & R_SENT) { 109640117Smckusick rep->r_flags &= ~R_SENT; 109741900Smckusick nmp->nm_sent--; 109840117Smckusick } 109941900Smckusick 110041900Smckusick /* 110141900Smckusick * Check for too many retries on soft mount. 110241900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 110341900Smckusick */ 110441900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 110540117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 110640117Smckusick 110741900Smckusick /* 110841900Smckusick * Check for server not responding 110941900Smckusick */ 111041900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 111143351Smckusick rep->r_rexmit > NFS_FISHY) { 1112*47737Skarels nfs_msg(rep->r_procp, 1113*47737Skarels nmp->nm_mountp->mnt_stat.f_mntfromname, 1114*47737Skarels "not responding"); 111541900Smckusick rep->r_flags |= R_TPRINTFMSG; 111641900Smckusick } 111743351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 111841900Smckusick nfsstats.rpctimeouts++; 111941900Smckusick rep->r_flags |= R_SOFTTERM; 112041900Smckusick continue; 112141900Smckusick } 112243351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 112343351Smckusick continue; 112441900Smckusick 112541900Smckusick /* 112641900Smckusick * If there is enough space and the window allows.. 112741900Smckusick * Resend it 112841900Smckusick */ 112941900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 113041900Smckusick nmp->nm_sent < nmp->nm_window && 113141900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 113241900Smckusick nfsstats.rpcretries++; 113341900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 113441900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113541900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113641900Smckusick else 113741900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113841900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 113941900Smckusick if (error) { 114041900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 114141900Smckusick so->so_error = 0; 114241900Smckusick } else { 114341900Smckusick /* 114441900Smckusick * We need to time the request even though we 114541900Smckusick * are retransmitting. 114641900Smckusick */ 114741900Smckusick nmp->nm_rtt = 0; 114841900Smckusick nmp->nm_sent++; 114941900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 115041900Smckusick rep->r_timer = rep->r_timerinit; 115141900Smckusick } 115241900Smckusick } 115340117Smckusick } 115440117Smckusick splx(s); 115540117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115640117Smckusick } 115740117Smckusick 115840117Smckusick /* 115940117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 116040117Smckusick * used here. The timer state is held in the nfsmount structure and 116140117Smckusick * a single request is used to clock the response. When successful 116240117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 116340117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116440117Smckusick * routines. 116540117Smckusick * 116640117Smckusick * Congestion variables are held in the nfshost structure which 116740117Smckusick * is referenced by nfsmounts and shared per-server. This separation 116840117Smckusick * makes it possible to do per-mount timing which allows varying disk 116940117Smckusick * access times to be dealt with, while preserving a network oriented 117040117Smckusick * congestion control scheme. 117140117Smckusick * 117240117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 117340117Smckusick * with adjusted scaling factors. We start with one request, then send 117440117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117540117Smckusick * we increment at a rate proportional to the window. On failure, we 117640117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117740117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 117840117Smckusick * for now. 117940117Smckusick * 118040117Smckusick * NFS behaves much more like a transport protocol with these changes, 118140117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 118240117Smckusick * implementations. 118340117Smckusick * 118440117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118540117Smckusick */ 118640117Smckusick 118740117Smckusick /* 118840117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 118940117Smckusick * responds only after performing lookups/diskio/etc, we have to be 119040117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 119141900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 119240117Smckusick */ 119341900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119440117Smckusick 119541900Smckusick nfs_updatetimer(nmp) 119641900Smckusick register struct nfsmount *nmp; 119740117Smckusick { 119840117Smckusick 119940117Smckusick /* If retransmitted, clear and return */ 120041900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 120141900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 120240117Smckusick return; 120340117Smckusick } 120440117Smckusick /* If have a measurement, do smoothing */ 120541900Smckusick if (nmp->nm_srtt) { 120640117Smckusick register short delta; 120741900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 120841900Smckusick if ((nmp->nm_srtt += delta) <= 0) 120941900Smckusick nmp->nm_srtt = 1; 121040117Smckusick if (delta < 0) 121140117Smckusick delta = -delta; 121241900Smckusick delta -= (nmp->nm_rttvar >> 2); 121341900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 121441900Smckusick nmp->nm_rttvar = 1; 121540117Smckusick /* Else initialize */ 121640117Smckusick } else { 121741900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 121841900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 121941900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 122040117Smckusick } 122140117Smckusick /* Compute new Retransmission TimeOut and clip */ 122241900Smckusick nmp->nm_rto = NFS_RTO(nmp); 122341900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 122441900Smckusick nmp->nm_rto = NFS_MINTIMEO; 122541900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122641900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122740117Smckusick 122840117Smckusick /* Update window estimate */ 122941900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 123041900Smckusick nmp->nm_window += 4; 123140117Smckusick else { /* slowly */ 123241900Smckusick register long incr = ++nmp->nm_winext; 123341900Smckusick incr = (incr * incr) / nmp->nm_window; 123440117Smckusick if (incr > 0) { 123541900Smckusick nmp->nm_winext = 0; 123641900Smckusick ++nmp->nm_window; 123740117Smckusick } 123840117Smckusick } 123941900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 124041900Smckusick nmp->nm_window = NFS_MAXWINDOW; 124140117Smckusick } 124240117Smckusick 124341900Smckusick nfs_backofftimer(nmp) 124441900Smckusick register struct nfsmount *nmp; 124540117Smckusick { 124640117Smckusick register unsigned long newrto; 124740117Smckusick 124840117Smckusick /* Clip shift count */ 124941900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 125041900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 125140117Smckusick /* Back off RTO exponentially */ 125241900Smckusick newrto = NFS_RTO(nmp); 125341900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125440117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125540117Smckusick newrto = NFS_MAXTIMEO; 125641900Smckusick nmp->nm_rto = newrto; 125740117Smckusick 125840117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 125941900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 126041900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 126141900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 126241900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 126341900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 126441900Smckusick nmp->nm_srtt = 0; 126538414Smckusick } 126638414Smckusick } 126738414Smckusick } 126840117Smckusick /* Close down window but remember this point (3/4 current) for later */ 126941900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 127041900Smckusick nmp->nm_window = 1; 127141900Smckusick nmp->nm_winext = 0; 127238414Smckusick } 127338414Smckusick 127438414Smckusick /* 127541900Smckusick * Test for a termination signal pending on procp. 127641900Smckusick * This is used for NFSMNT_INT mounts. 127738414Smckusick */ 127841900Smckusick nfs_sigintr(p) 127941900Smckusick register struct proc *p; 128041900Smckusick { 128141900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 128241900Smckusick NFSINT_SIGMASK)) 128341900Smckusick return (1); 128441900Smckusick else 128541900Smckusick return (0); 128641900Smckusick } 128740117Smckusick 1288*47737Skarels nfs_msg(p, server, msg) 1289*47737Skarels struct proc *p; 1290*47737Skarels char *server, *msg; 1291*47737Skarels { 1292*47737Skarels tpr_t tpr; 1293*47737Skarels 1294*47737Skarels if (p) 1295*47737Skarels tpr = tprintf_open(p); 1296*47737Skarels else 1297*47737Skarels tpr = NULL; 1298*47737Skarels tprintf(tpr, "nfs server %s: %s\n", server, msg); 1299*47737Skarels tprintf_close(tpr); 1300*47737Skarels } 1301*47737Skarels 130241900Smckusick /* 130341900Smckusick * Lock a socket against others. 130441900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 130541900Smckusick * and also to avoid race conditions between the processes with nfs requests 130641900Smckusick * in progress when a reconnect is necessary. 130741900Smckusick */ 130843351Smckusick nfs_solock(flagp) 130943351Smckusick register int *flagp; 131038414Smckusick { 131140117Smckusick 131241900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 131341900Smckusick *flagp |= NFSMNT_WANTSCK; 131443351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 131540117Smckusick } 131641900Smckusick *flagp |= NFSMNT_SCKLOCK; 131741900Smckusick } 131840117Smckusick 131941900Smckusick /* 132041900Smckusick * Unlock the stream socket for others. 132141900Smckusick */ 132241900Smckusick nfs_sounlock(flagp) 132343351Smckusick register int *flagp; 132441900Smckusick { 132541900Smckusick 132641900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 132741900Smckusick panic("nfs sounlock"); 132841900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 132941900Smckusick if (*flagp & NFSMNT_WANTSCK) { 133041900Smckusick *flagp &= ~NFSMNT_WANTSCK; 133141900Smckusick wakeup((caddr_t)flagp); 133240117Smckusick } 133338414Smckusick } 133441900Smckusick 133541900Smckusick /* 133641900Smckusick * This function compares two net addresses by family and returns TRUE 133741900Smckusick * if they are the same. 133841900Smckusick * If there is any doubt, return FALSE. 133941900Smckusick */ 134041900Smckusick nfs_netaddr_match(nam1, nam2) 134141900Smckusick struct mbuf *nam1, *nam2; 134241900Smckusick { 134341900Smckusick register struct sockaddr *saddr1, *saddr2; 134441900Smckusick 134541900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 134641900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 134741900Smckusick if (saddr1->sa_family != saddr2->sa_family) 134841900Smckusick return (0); 134941900Smckusick 135041900Smckusick /* 135141900Smckusick * Must do each address family separately since unused fields 135241900Smckusick * are undefined values and not always zeroed. 135341900Smckusick */ 135441900Smckusick switch (saddr1->sa_family) { 135541900Smckusick case AF_INET: 135641900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 135741900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 135841900Smckusick return (1); 135941900Smckusick break; 136041900Smckusick default: 136141900Smckusick break; 136241900Smckusick }; 136341900Smckusick return (0); 136441900Smckusick } 136541900Smckusick 136641900Smckusick /* 136741900Smckusick * Check the hostname fields for nfsd's mask and match fields. 136841900Smckusick * By address family: 136941900Smckusick * - Bitwise AND the mask with the host address field 137041900Smckusick * - Compare for == with match 137141900Smckusick * return TRUE if not equal 137241900Smckusick */ 137341900Smckusick nfs_badnam(nam, msk, mtch) 137441900Smckusick register struct mbuf *nam, *msk, *mtch; 137541900Smckusick { 137641900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 137741900Smckusick case AF_INET: 137841900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 137941900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 138041900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 138141900Smckusick default: 138241900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 138341900Smckusick return (0); 138441900Smckusick }; 138541900Smckusick } 1386