138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*45281Smckusick * @(#)nfs_socket.c 7.19 (Berkeley) 10/01/90 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1738414Smckusick #include "types.h" 1838414Smckusick #include "param.h" 1938414Smckusick #include "uio.h" 2038414Smckusick #include "user.h" 2140117Smckusick #include "proc.h" 2240117Smckusick #include "signal.h" 2338414Smckusick #include "mount.h" 2438414Smckusick #include "kernel.h" 2538414Smckusick #include "malloc.h" 2638414Smckusick #include "mbuf.h" 2738414Smckusick #include "vnode.h" 2838414Smckusick #include "domain.h" 2938414Smckusick #include "protosw.h" 3038414Smckusick #include "socket.h" 3138414Smckusick #include "socketvar.h" 3242877Smckusick #include "../netinet/in.h" 3342877Smckusick #include "../netinet/tcp.h" 3438414Smckusick #include "rpcv2.h" 3538414Smckusick #include "nfsv2.h" 3638414Smckusick #include "nfs.h" 3738414Smckusick #include "xdr_subs.h" 3838414Smckusick #include "nfsm_subs.h" 3938414Smckusick #include "nfsmount.h" 4038414Smckusick 4140117Smckusick #include "syslog.h" 4240117Smckusick 4338414Smckusick #define TRUE 1 4443351Smckusick #define FALSE 0 4538414Smckusick 4640117Smckusick /* 4738414Smckusick * External data, mostly RPC constants in XDR form 4838414Smckusick */ 4938414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 5038414Smckusick rpc_msgaccepted, rpc_call; 5138414Smckusick extern u_long nfs_prog, nfs_vers; 5243351Smckusick /* Maybe these should be bits in a u_long ?? */ 5341900Smckusick extern int nonidempotent[NFS_NPROCS]; 54*45281Smckusick static int compressrequest[NFS_NPROCS] = { 55*45281Smckusick FALSE, 56*45281Smckusick TRUE, 57*45281Smckusick TRUE, 58*45281Smckusick FALSE, 59*45281Smckusick TRUE, 60*45281Smckusick TRUE, 61*45281Smckusick TRUE, 62*45281Smckusick FALSE, 63*45281Smckusick FALSE, 64*45281Smckusick TRUE, 65*45281Smckusick TRUE, 66*45281Smckusick TRUE, 67*45281Smckusick TRUE, 68*45281Smckusick TRUE, 69*45281Smckusick TRUE, 70*45281Smckusick TRUE, 71*45281Smckusick TRUE, 72*45281Smckusick TRUE, 73*45281Smckusick }; 7441900Smckusick int nfs_sbwait(); 7541900Smckusick void nfs_disconnect(); 76*45281Smckusick struct mbuf *nfs_compress(), *nfs_uncompress(); 7741900Smckusick 7838414Smckusick int nfsrv_null(), 7938414Smckusick nfsrv_getattr(), 8038414Smckusick nfsrv_setattr(), 8138414Smckusick nfsrv_lookup(), 8238414Smckusick nfsrv_readlink(), 8338414Smckusick nfsrv_read(), 8438414Smckusick nfsrv_write(), 8538414Smckusick nfsrv_create(), 8638414Smckusick nfsrv_remove(), 8738414Smckusick nfsrv_rename(), 8838414Smckusick nfsrv_link(), 8938414Smckusick nfsrv_symlink(), 9038414Smckusick nfsrv_mkdir(), 9138414Smckusick nfsrv_rmdir(), 9238414Smckusick nfsrv_readdir(), 9338414Smckusick nfsrv_statfs(), 9438414Smckusick nfsrv_noop(); 9538414Smckusick 9638414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 9738414Smckusick nfsrv_null, 9838414Smckusick nfsrv_getattr, 9938414Smckusick nfsrv_setattr, 10038414Smckusick nfsrv_noop, 10138414Smckusick nfsrv_lookup, 10238414Smckusick nfsrv_readlink, 10338414Smckusick nfsrv_read, 10438414Smckusick nfsrv_noop, 10538414Smckusick nfsrv_write, 10638414Smckusick nfsrv_create, 10738414Smckusick nfsrv_remove, 10838414Smckusick nfsrv_rename, 10938414Smckusick nfsrv_link, 11038414Smckusick nfsrv_symlink, 11138414Smckusick nfsrv_mkdir, 11238414Smckusick nfsrv_rmdir, 11338414Smckusick nfsrv_readdir, 11438414Smckusick nfsrv_statfs, 11538414Smckusick }; 11638414Smckusick 11740117Smckusick struct nfsreq nfsreqh; 11840117Smckusick int nfsrexmtthresh = NFS_FISHY; 11941900Smckusick int nfs_tcpnodelay = 1; 12038414Smckusick 12138414Smckusick /* 12241900Smckusick * Initialize sockets and congestion for a new NFS connection. 12340117Smckusick * We do not free the sockaddr if error. 12438414Smckusick */ 12541900Smckusick nfs_connect(nmp) 12640117Smckusick register struct nfsmount *nmp; 12740117Smckusick { 12841900Smckusick register struct socket *so; 12941900Smckusick int s, error; 13040117Smckusick struct mbuf *m; 13140117Smckusick 13241900Smckusick nmp->nm_so = (struct socket *)0; 13341900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 13441900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 13540117Smckusick goto bad; 13641900Smckusick so = nmp->nm_so; 13741900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 13840117Smckusick 13941900Smckusick /* 14041900Smckusick * Protocols that do not require connections may be optionally left 14141900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 14241900Smckusick */ 14341900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 14441900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 14541900Smckusick error = ENOTCONN; 14640117Smckusick goto bad; 14740117Smckusick } 14841900Smckusick } else { 14941900Smckusick if (error = soconnect(so, nmp->nm_nam)) 15040117Smckusick goto bad; 15141900Smckusick 15241900Smckusick /* 15341900Smckusick * Wait for the connection to complete. Cribbed from the 15441900Smckusick * connect system call but with the wait at negative prio. 15541900Smckusick */ 15641900Smckusick s = splnet(); 15741900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 15843351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 15941900Smckusick splx(s); 16041900Smckusick if (so->so_error) { 16141900Smckusick error = so->so_error; 16241900Smckusick goto bad; 16341900Smckusick } 16440117Smckusick } 16541900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 16643351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 16741900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16841900Smckusick so->so_snd.sb_timeo = (5 * hz); 16941900Smckusick } else { 17041900Smckusick so->so_rcv.sb_timeo = 0; 17141900Smckusick so->so_snd.sb_timeo = 0; 17241900Smckusick } 17341900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 17443351Smckusick nmp->nm_rsize + NFS_MAXPKTHDR)) 17541900Smckusick goto bad; 17641900Smckusick } else { 17743351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 17841900Smckusick so->so_rcv.sb_timeo = (5 * hz); 17941900Smckusick so->so_snd.sb_timeo = (5 * hz); 18041900Smckusick } else { 18141900Smckusick so->so_rcv.sb_timeo = 0; 18241900Smckusick so->so_snd.sb_timeo = 0; 18341900Smckusick } 18441900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 18541900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18641900Smckusick *mtod(m, int *) = 1; 18741900Smckusick m->m_len = sizeof(int); 18841900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 18941900Smckusick } 19041900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 19141900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 19241900Smckusick nfs_tcpnodelay) { 19341900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 19441900Smckusick *mtod(m, int *) = 1; 19541900Smckusick m->m_len = sizeof(int); 19641900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 19741900Smckusick } 19841900Smckusick if (error = soreserve(so, 19943351Smckusick nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long), 20041900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 20141900Smckusick goto bad; 20241900Smckusick } 20341900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 20441900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 20540117Smckusick 20641900Smckusick /* Initialize other non-zero congestion variables */ 20741900Smckusick nmp->nm_rto = NFS_TIMEO; 20841900Smckusick nmp->nm_window = 2; /* Initial send window */ 20941900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 21041900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 21141900Smckusick nmp->nm_sent = 0; 21241900Smckusick nmp->nm_currexmit = 0; 21341900Smckusick return (0); 21440117Smckusick 21541900Smckusick bad: 21641900Smckusick nfs_disconnect(nmp); 21741900Smckusick return (error); 21841900Smckusick } 21940117Smckusick 22041900Smckusick /* 22141900Smckusick * Reconnect routine: 22241900Smckusick * Called when a connection is broken on a reliable protocol. 22341900Smckusick * - clean up the old socket 22441900Smckusick * - nfs_connect() again 22541900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 22641900Smckusick * If this fails the mount point is DEAD! 22741900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 22841900Smckusick */ 22941900Smckusick nfs_reconnect(rep, nmp) 23041900Smckusick register struct nfsreq *rep; 23141900Smckusick register struct nfsmount *nmp; 23241900Smckusick { 23341900Smckusick register struct nfsreq *rp; 23441900Smckusick int error; 23540117Smckusick 23641900Smckusick if (rep->r_procp) 23743061Smarc tprintf(rep->r_procp->p_session, 23841900Smckusick "Nfs server %s, trying reconnect\n", 23941900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24041900Smckusick else 24143061Smarc tprintf(NULL, "Nfs server %s, trying a reconnect\n", 24241900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24341900Smckusick while (error = nfs_connect(nmp)) { 24442243Smckusick #ifdef lint 24542243Smckusick error = error; 24642243Smckusick #endif /* lint */ 24741900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 24841900Smckusick return (EINTR); 24943351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 25040117Smckusick } 25141900Smckusick if (rep->r_procp) 25243061Smarc tprintf(rep->r_procp->p_session, 25341900Smckusick "Nfs server %s, reconnected\n", 25441900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 25541900Smckusick else 25643061Smarc tprintf(NULL, "Nfs server %s, reconnected\n", 25741900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 25841900Smckusick 25941900Smckusick /* 26041900Smckusick * Loop through outstanding request list and fix up all requests 26141900Smckusick * on old socket. 26241900Smckusick */ 26341900Smckusick rp = nfsreqh.r_next; 26441900Smckusick while (rp != &nfsreqh) { 26541900Smckusick if (rp->r_nmp == nmp) 26641900Smckusick rp->r_flags |= R_MUSTRESEND; 26741900Smckusick rp = rp->r_next; 26840117Smckusick } 26940117Smckusick return (0); 27040117Smckusick } 27140117Smckusick 27240117Smckusick /* 27340117Smckusick * NFS disconnect. Clean up and unlink. 27440117Smckusick */ 27541900Smckusick void 27640117Smckusick nfs_disconnect(nmp) 27740117Smckusick register struct nfsmount *nmp; 27840117Smckusick { 27941900Smckusick register struct socket *so; 28040117Smckusick 28141900Smckusick if (nmp->nm_so) { 28241900Smckusick so = nmp->nm_so; 28341900Smckusick nmp->nm_so = (struct socket *)0; 28441900Smckusick soshutdown(so, 2); 28541900Smckusick soclose(so); 28640117Smckusick } 28740117Smckusick } 28840117Smckusick 28940117Smckusick /* 29041900Smckusick * This is the nfs send routine. For connection based socket types, it 29141900Smckusick * must be called with an nfs_solock() on the socket. 29241900Smckusick * "rep == NULL" indicates that it has been called from a server. 29340117Smckusick */ 29441900Smckusick nfs_send(so, nam, top, rep) 29538414Smckusick register struct socket *so; 29638414Smckusick struct mbuf *nam; 29741900Smckusick register struct mbuf *top; 29841900Smckusick struct nfsreq *rep; 29938414Smckusick { 30041900Smckusick struct mbuf *sendnam; 30141900Smckusick int error, soflags; 30238414Smckusick 30341900Smckusick if (rep) { 30441900Smckusick if (rep->r_flags & R_SOFTTERM) { 30540117Smckusick m_freem(top); 30641900Smckusick return (EINTR); 30740117Smckusick } 30843062Smckusick if (rep->r_nmp->nm_so == NULL && 30941900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 31041900Smckusick return (error); 31141900Smckusick rep->r_flags &= ~R_MUSTRESEND; 31243062Smckusick so = rep->r_nmp->nm_so; 31341900Smckusick soflags = rep->r_nmp->nm_soflags; 31441900Smckusick } else 31541900Smckusick soflags = so->so_proto->pr_flags; 31641900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 31741900Smckusick sendnam = (struct mbuf *)0; 31841900Smckusick else 31941900Smckusick sendnam = nam; 32041900Smckusick 32141900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 32241900Smckusick (struct mbuf *)0, 0); 32341900Smckusick if (error == EWOULDBLOCK && rep) { 32441900Smckusick if (rep->r_flags & R_SOFTTERM) 32541900Smckusick error = EINTR; 32641900Smckusick else { 32741900Smckusick rep->r_flags |= R_MUSTRESEND; 32841900Smckusick error = 0; 32940117Smckusick } 33038414Smckusick } 33141900Smckusick /* 33241900Smckusick * Ignore socket errors?? 33341900Smckusick */ 33441900Smckusick if (error && error != EINTR && error != ERESTART) 33541900Smckusick error = 0; 33638414Smckusick return (error); 33738414Smckusick } 33838414Smckusick 33938414Smckusick /* 34041900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 34141900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 34241900Smckusick * Mark and consolidate the data into a new mbuf list. 34341900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 34441900Smckusick * small mbufs. 34541900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 34641900Smckusick * we have read any of it, even if the system call has been interrupted. 34738414Smckusick */ 34841900Smckusick nfs_receive(so, aname, mp, rep) 34938414Smckusick register struct socket *so; 35038414Smckusick struct mbuf **aname; 35138414Smckusick struct mbuf **mp; 35241900Smckusick register struct nfsreq *rep; 35338414Smckusick { 35441900Smckusick struct uio auio; 35541900Smckusick struct iovec aio; 35638414Smckusick register struct mbuf *m; 357*45281Smckusick struct mbuf *m2, *mnew, **mbp; 35841900Smckusick caddr_t fcp, tcp; 35941900Smckusick u_long len; 36041900Smckusick struct mbuf **getnam; 36141900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 36238414Smckusick 36341900Smckusick /* 36441900Smckusick * Set up arguments for soreceive() 36541900Smckusick */ 36641900Smckusick *mp = (struct mbuf *)0; 36741900Smckusick *aname = (struct mbuf *)0; 36841900Smckusick if (rep) 36941900Smckusick soflags = rep->r_nmp->nm_soflags; 37041900Smckusick else 37141900Smckusick soflags = so->so_proto->pr_flags; 37238414Smckusick 37341900Smckusick /* 37441900Smckusick * For reliable protocols, lock against other senders/receivers 37541900Smckusick * in case a reconnect is necessary. 37641900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 37741900Smckusick * more there is to get. 37841900Smckusick * We must lock the socket against other receivers 37941900Smckusick * until we have an entire rpc request/reply. 38041900Smckusick */ 38141900Smckusick if (soflags & PR_CONNREQUIRED) { 38241900Smckusick tryagain: 38341900Smckusick /* 38441900Smckusick * Check for fatal errors and resending request. 38541900Smckusick */ 38641900Smckusick if (rep) { 38741900Smckusick /* 38841900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 38941900Smckusick * would have changed. NULL indicates a failed 39041900Smckusick * attempt that has essentially shut down this 39141900Smckusick * mount point. 39241900Smckusick */ 39341900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 39441900Smckusick (rep->r_flags & R_SOFTTERM)) 39541900Smckusick return (EINTR); 39641900Smckusick while (rep->r_flags & R_MUSTRESEND) { 39741900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 39841900Smckusick nfsstats.rpcretries++; 39941900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 40041900Smckusick rep)) 40141900Smckusick goto errout; 40240117Smckusick } 40341900Smckusick } 40441900Smckusick if ((soflags & PR_ATOMIC) == 0) { 40541900Smckusick aio.iov_base = (caddr_t) &len; 40641900Smckusick aio.iov_len = sizeof(u_long); 40741900Smckusick auio.uio_iov = &aio; 40841900Smckusick auio.uio_iovcnt = 1; 40941900Smckusick auio.uio_segflg = UIO_SYSSPACE; 41041900Smckusick auio.uio_rw = UIO_READ; 41141900Smckusick auio.uio_offset = 0; 41241900Smckusick auio.uio_resid = sizeof(u_long); 41341900Smckusick do { 41441900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 41541900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 41641900Smckusick if (error == EWOULDBLOCK && rep) { 41741900Smckusick if (rep->r_flags & R_SOFTTERM) 41841900Smckusick return (EINTR); 41941900Smckusick if (rep->r_flags & R_MUSTRESEND) 42041900Smckusick goto tryagain; 42141900Smckusick } 42241900Smckusick } while (error == EWOULDBLOCK); 42341900Smckusick if (!error && auio.uio_resid > 0) 42441900Smckusick error = EPIPE; 42540761Skarels if (error) 42641900Smckusick goto errout; 42741900Smckusick len = ntohl(len) & ~0x80000000; 42841900Smckusick /* 42941900Smckusick * This is SERIOUS! We are out of sync with the sender 43041900Smckusick * and forcing a disconnect/reconnect is all I can do. 43141900Smckusick */ 43241900Smckusick if (len > NFS_MAXPACKET) { 43341900Smckusick error = EFBIG; 43441900Smckusick goto errout; 43541900Smckusick } 43641900Smckusick auio.uio_resid = len; 43741900Smckusick do { 43841900Smckusick error = soreceive(so, (struct mbuf **)0, 43941900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 44041900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 44141900Smckusick error == ERESTART); 44241900Smckusick if (!error && auio.uio_resid > 0) 44341900Smckusick error = EPIPE; 44440117Smckusick } else { 44541900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 44641900Smckusick do { 44741900Smckusick error = soreceive(so, (struct mbuf **)0, 44841900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 44941900Smckusick if (error == EWOULDBLOCK && rep) { 45041900Smckusick if (rep->r_flags & R_SOFTTERM) 45141900Smckusick return (EINTR); 45241900Smckusick if (rep->r_flags & R_MUSTRESEND) 45341900Smckusick goto tryagain; 45441900Smckusick } 45541900Smckusick } while (error == EWOULDBLOCK); 45641900Smckusick if (!error && *mp == NULL) 45741900Smckusick error = EPIPE; 45841900Smckusick len -= auio.uio_resid; 45940117Smckusick } 46041900Smckusick errout: 46141900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 46241900Smckusick m_freem(*mp); 46341900Smckusick *mp = (struct mbuf *)0; 46441900Smckusick nfs_disconnect(rep->r_nmp); 46541900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 46641900Smckusick if (!error) 46741900Smckusick goto tryagain; 46840117Smckusick } 46941900Smckusick } else { 47041900Smckusick if (so->so_state & SS_ISCONNECTED) 47141900Smckusick getnam = (struct mbuf **)0; 47241900Smckusick else 47341900Smckusick getnam = aname; 47441900Smckusick auio.uio_resid = len = 1000000; 47541900Smckusick do { 47641900Smckusick error = soreceive(so, getnam, &auio, mp, 47741900Smckusick (struct mbuf **)0, &rcvflg); 47841900Smckusick if (error == EWOULDBLOCK && rep && 47941900Smckusick (rep->r_flags & R_SOFTTERM)) 48041900Smckusick return (EINTR); 48141900Smckusick } while (error == EWOULDBLOCK); 48241900Smckusick len -= auio.uio_resid; 48341900Smckusick } 48441900Smckusick if (error) { 48541900Smckusick m_freem(*mp); 48641900Smckusick *mp = (struct mbuf *)0; 48741900Smckusick } 48841900Smckusick /* 48941900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 49041900Smckusick * These could cause pointer alignment problems, so copy them to 49141900Smckusick * well aligned mbufs. 49241900Smckusick */ 49341900Smckusick m = *mp; 49441900Smckusick mbp = mp; 49541900Smckusick while (m) { 49641900Smckusick /* 49741900Smckusick * All this for something that may never happen. 49841900Smckusick */ 499*45281Smckusick if (m->m_next && (m->m_len & 0x3)) { 50041900Smckusick printf("nfs_rcv odd length!\n"); 50142243Smckusick mlen = 0; 50241900Smckusick while (m) { 503*45281Smckusick fcp = mtod(m, caddr_t); 504*45281Smckusick while (m->m_len > 0) { 505*45281Smckusick if (mlen == 0) { 506*45281Smckusick MGET(m2, M_WAIT, MT_DATA); 507*45281Smckusick if (len >= MINCLSIZE) 508*45281Smckusick MCLGET(m2, M_WAIT); 509*45281Smckusick m2->m_len = 0; 510*45281Smckusick mlen = M_TRAILINGSPACE(m2); 511*45281Smckusick tcp = mtod(m2, caddr_t); 512*45281Smckusick *mbp = m2; 513*45281Smckusick mbp = &m2->m_next; 514*45281Smckusick } 515*45281Smckusick siz = MIN(mlen, m->m_len); 516*45281Smckusick bcopy(fcp, tcp, siz); 517*45281Smckusick m2->m_len += siz; 518*45281Smckusick mlen -= siz; 519*45281Smckusick len -= siz; 520*45281Smckusick tcp += siz; 521*45281Smckusick m->m_len -= siz; 522*45281Smckusick fcp += siz; 52341900Smckusick } 524*45281Smckusick MFREE(m, mnew); 525*45281Smckusick m = mnew; 52641900Smckusick } 52741900Smckusick break; 52840117Smckusick } 52941900Smckusick len -= m->m_len; 53041900Smckusick mbp = &m->m_next; 53141900Smckusick m = m->m_next; 53238414Smckusick } 53338414Smckusick return (error); 53438414Smckusick } 53538414Smckusick 53638414Smckusick /* 53741900Smckusick * Implement receipt of reply on a socket. 53838414Smckusick * We must search through the list of received datagrams matching them 53938414Smckusick * with outstanding requests using the xid, until ours is found. 54038414Smckusick */ 54141900Smckusick /* ARGSUSED */ 54241900Smckusick nfs_reply(nmp, myrep) 54341900Smckusick struct nfsmount *nmp; 54439344Smckusick struct nfsreq *myrep; 54538414Smckusick { 54638414Smckusick register struct mbuf *m; 54738414Smckusick register struct nfsreq *rep; 54841900Smckusick register int error = 0; 549*45281Smckusick u_long rxid; 55041900Smckusick struct mbuf *mp, *nam; 55141900Smckusick char *cp; 55241900Smckusick int cnt, xfer; 55338414Smckusick 55441900Smckusick /* 55541900Smckusick * Loop around until we get our own reply 55641900Smckusick */ 55741900Smckusick for (;;) { 55841900Smckusick /* 55941900Smckusick * Lock against other receivers so that I don't get stuck in 56041900Smckusick * sbwait() after someone else has received my reply for me. 56141900Smckusick * Also necessary for connection based protocols to avoid 56241900Smckusick * race conditions during a reconnect. 56341900Smckusick */ 56443351Smckusick nfs_solock(&nmp->nm_flag); 56541900Smckusick /* Already received, bye bye */ 56641900Smckusick if (myrep->r_mrep != NULL) { 56741900Smckusick nfs_sounlock(&nmp->nm_flag); 56841900Smckusick return (0); 56940117Smckusick } 57041900Smckusick /* 57141900Smckusick * Get the next Rpc reply off the socket 57241900Smckusick */ 57341900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 57441900Smckusick nfs_sounlock(&nmp->nm_flag); 57538414Smckusick 57641900Smckusick /* 57741900Smckusick * Ignore routing errors on connectionless protocols?? 57841900Smckusick */ 57941900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 58041900Smckusick nmp->nm_so->so_error = 0; 58141900Smckusick continue; 58241900Smckusick } 58341900Smckusick 58441900Smckusick /* 58541900Smckusick * Otherwise cleanup and return a fatal error. 58641900Smckusick */ 58741900Smckusick if (myrep->r_flags & R_TIMING) { 58841900Smckusick myrep->r_flags &= ~R_TIMING; 58941900Smckusick nmp->nm_rtt = -1; 59041900Smckusick } 59141900Smckusick if (myrep->r_flags & R_SENT) { 59241900Smckusick myrep->r_flags &= ~R_SENT; 59341900Smckusick nmp->nm_sent--; 59441900Smckusick } 59541900Smckusick return (error); 59638414Smckusick } 59741900Smckusick 59841900Smckusick /* 59941900Smckusick * Get the xid and check that it is an rpc reply 60041900Smckusick */ 60141900Smckusick m = mp; 602*45281Smckusick while (m && m->m_len == 0) 603*45281Smckusick m = m->m_next; 604*45281Smckusick if (m == NULL) { 60540117Smckusick nfsstats.rpcinvalid++; 60641900Smckusick m_freem(mp); 60741900Smckusick nfs_sounlock(&nmp->nm_flag); 60841900Smckusick continue; 60938414Smckusick } 610*45281Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED); 61141900Smckusick /* 61241900Smckusick * Loop through the request list to match up the reply 61341900Smckusick * Iff no match, just drop the datagram 61441900Smckusick */ 61541900Smckusick m = mp; 61641900Smckusick rep = nfsreqh.r_next; 61741900Smckusick while (rep != &nfsreqh) { 618*45281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 61941900Smckusick /* Found it.. */ 62041900Smckusick rep->r_mrep = m; 62141900Smckusick /* 62241900Smckusick * Update timing 62341900Smckusick */ 62441900Smckusick if (rep->r_flags & R_TIMING) { 62541900Smckusick nfs_updatetimer(rep->r_nmp); 62641900Smckusick rep->r_flags &= ~R_TIMING; 62741900Smckusick rep->r_nmp->nm_rtt = -1; 62841900Smckusick } 62941900Smckusick if (rep->r_flags & R_SENT) { 63041900Smckusick rep->r_flags &= ~R_SENT; 63141900Smckusick rep->r_nmp->nm_sent--; 63241900Smckusick } 63340117Smckusick break; 63438414Smckusick } 63541900Smckusick rep = rep->r_next; 63638414Smckusick } 63741900Smckusick nfs_sounlock(&nmp->nm_flag); 63841900Smckusick if (nam) 63941900Smckusick m_freem(nam); 64041900Smckusick /* 64141900Smckusick * If not matched to a request, drop it. 64241900Smckusick * If it's mine, get out. 64341900Smckusick */ 64441900Smckusick if (rep == &nfsreqh) { 64541900Smckusick nfsstats.rpcunexpected++; 64641900Smckusick m_freem(m); 64741900Smckusick } else if (rep == myrep) 64841900Smckusick return (0); 64938414Smckusick } 65038414Smckusick } 65138414Smckusick 65238414Smckusick /* 65338414Smckusick * nfs_request - goes something like this 65438414Smckusick * - fill in request struct 65538414Smckusick * - links it into list 65641900Smckusick * - calls nfs_send() for first transmit 65741900Smckusick * - calls nfs_receive() to get reply 65838414Smckusick * - break down rpc header and return with nfs reply pointed to 65938414Smckusick * by mrep or error 66038414Smckusick * nb: always frees up mreq mbuf list 66138414Smckusick */ 66243351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 66338414Smckusick struct vnode *vp; 66438414Smckusick struct mbuf *mreq; 66538414Smckusick u_long xid; 66641900Smckusick int procnum; 66741900Smckusick struct proc *procp; 66843351Smckusick int tryhard; 66938414Smckusick struct mount *mp; 67038414Smckusick struct mbuf **mrp; 67138414Smckusick struct mbuf **mdp; 67238414Smckusick caddr_t *dposp; 67338414Smckusick { 67438414Smckusick register struct mbuf *m, *mrep; 67538414Smckusick register struct nfsreq *rep; 67638414Smckusick register u_long *p; 67738414Smckusick register int len; 67841900Smckusick struct nfsmount *nmp; 67938414Smckusick struct mbuf *md; 68039344Smckusick struct nfsreq *reph; 68138414Smckusick caddr_t dpos; 68238414Smckusick char *cp2; 68338414Smckusick int t1; 684*45281Smckusick int s, compressed; 68541900Smckusick int error = 0; 68638414Smckusick 68741900Smckusick nmp = VFSTONFS(mp); 68838414Smckusick m = mreq; 68938414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 69038414Smckusick rep->r_xid = xid; 69141900Smckusick rep->r_nmp = nmp; 69238414Smckusick rep->r_vp = vp; 69341900Smckusick rep->r_procp = procp; 69443351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 69543351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 69641900Smckusick rep->r_retry = nmp->nm_retry; 69738414Smckusick else 69840117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 69940117Smckusick rep->r_flags = rep->r_rexmit = 0; 70041900Smckusick /* 70141900Smckusick * Three cases: 70241900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 70341900Smckusick * - idempotent requests on SOCK_DGRAM use 0 70441900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 70541900Smckusick * Timeouts are still done on reliable transports to ensure detection 70643351Smckusick * of excessive connection delay. 70741900Smckusick */ 70841900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 70941900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 71041900Smckusick else if (nonidempotent[procnum]) 71141900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 71241900Smckusick else 71341900Smckusick rep->r_timerinit = 0; 71441900Smckusick rep->r_timer = rep->r_timerinit; 71538414Smckusick rep->r_mrep = NULL; 71638414Smckusick len = 0; 71738414Smckusick while (m) { 71838414Smckusick len += m->m_len; 71938414Smckusick m = m->m_next; 72038414Smckusick } 72141900Smckusick mreq->m_pkthdr.len = len; 72241900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 723*45281Smckusick compressed = 0; 724*45281Smckusick m = mreq; 725*45281Smckusick if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) { 726*45281Smckusick mreq = nfs_compress(mreq); 727*45281Smckusick if (mreq != m) { 728*45281Smckusick len = mreq->m_pkthdr.len; 729*45281Smckusick compressed++; 730*45281Smckusick } 731*45281Smckusick } 73241900Smckusick /* 73341900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 73441900Smckusick */ 73541900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 73641900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 73741900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 73841900Smckusick } 73941900Smckusick rep->r_mreq = mreq; 74038414Smckusick 74140117Smckusick /* 74240117Smckusick * Do the client side RPC. 74340117Smckusick */ 74440117Smckusick nfsstats.rpcrequests++; 74541900Smckusick /* 74641900Smckusick * Chain request into list of outstanding requests. Be sure 74741900Smckusick * to put it LAST so timer finds oldest requests first. 74841900Smckusick */ 74940117Smckusick s = splnet(); 75039344Smckusick reph = &nfsreqh; 75141900Smckusick reph->r_prev->r_next = rep; 75241900Smckusick rep->r_prev = reph->r_prev; 75339344Smckusick reph->r_prev = rep; 75439344Smckusick rep->r_next = reph; 75540117Smckusick /* 75640117Smckusick * If backing off another request or avoiding congestion, don't 75740117Smckusick * send this one now but let timer do it. If not timing a request, 75840117Smckusick * do it now. 75940117Smckusick */ 76041900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 76141900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 76241900Smckusick nmp->nm_sent++; 76341900Smckusick rep->r_flags |= R_SENT; 76441900Smckusick if (nmp->nm_rtt == -1) { 76541900Smckusick nmp->nm_rtt = 0; 76641900Smckusick rep->r_flags |= R_TIMING; 76741900Smckusick } 76840117Smckusick splx(s); 76941900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 77041900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77143351Smckusick nfs_solock(&nmp->nm_flag); 77241900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 77341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77441900Smckusick nfs_sounlock(&nmp->nm_flag); 77541900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 77641900Smckusick nmp->nm_so->so_error = error = 0; 77741900Smckusick } else 77841900Smckusick splx(s); 77938414Smckusick 78038414Smckusick /* 78140117Smckusick * Wait for the reply from our send or the timer's. 78240117Smckusick */ 78341900Smckusick if (!error) 78441900Smckusick error = nfs_reply(nmp, rep); 78538414Smckusick 78640117Smckusick /* 78740117Smckusick * RPC done, unlink the request. 78840117Smckusick */ 78938414Smckusick s = splnet(); 79038414Smckusick rep->r_prev->r_next = rep->r_next; 79139344Smckusick rep->r_next->r_prev = rep->r_prev; 79238414Smckusick splx(s); 79341900Smckusick 79441900Smckusick /* 79541900Smckusick * If there was a successful reply and a tprintf msg. 79641900Smckusick * tprintf a response. 79741900Smckusick */ 79841900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 79941900Smckusick if (rep->r_procp) 80043061Smarc tprintf(rep->r_procp->p_session, 80141900Smckusick "Nfs server %s, is alive again\n", 80241900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80341900Smckusick else 80443061Smarc tprintf(NULL, "Nfs server %s, is alive again\n", 80541900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80641900Smckusick } 80738414Smckusick m_freem(rep->r_mreq); 808*45281Smckusick mrep = rep->r_mrep; 80938414Smckusick FREE((caddr_t)rep, M_NFSREQ); 81038414Smckusick if (error) 81138414Smckusick return (error); 81238414Smckusick 813*45281Smckusick if (compressed) 814*45281Smckusick mrep = nfs_uncompress(mrep); 815*45281Smckusick md = mrep; 81638414Smckusick /* 81738414Smckusick * break down the rpc header and check if ok 81838414Smckusick */ 81938414Smckusick dpos = mtod(md, caddr_t); 82038414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82138414Smckusick p += 2; 82238414Smckusick if (*p++ == rpc_msgdenied) { 82338414Smckusick if (*p == rpc_mismatch) 82438414Smckusick error = EOPNOTSUPP; 82538414Smckusick else 82638414Smckusick error = EACCES; 82738414Smckusick m_freem(mrep); 82838414Smckusick return (error); 82938414Smckusick } 83038414Smckusick /* 83138414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83238414Smckusick * for nfs_reqhead(), but for now just dump it 83338414Smckusick */ 83438414Smckusick if (*++p != 0) { 83538414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 83638414Smckusick nfsm_adv(len); 83738414Smckusick } 83838414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 83938414Smckusick /* 0 == ok */ 84038414Smckusick if (*p == 0) { 84138414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84238414Smckusick if (*p != 0) { 84338414Smckusick error = fxdr_unsigned(int, *p); 84438414Smckusick m_freem(mrep); 84538414Smckusick return (error); 84638414Smckusick } 84738414Smckusick *mrp = mrep; 84838414Smckusick *mdp = md; 84938414Smckusick *dposp = dpos; 85038414Smckusick return (0); 85138414Smckusick } 85238414Smckusick m_freem(mrep); 85338414Smckusick return (EPROTONOSUPPORT); 85438414Smckusick nfsmout: 85538414Smckusick return (error); 85638414Smckusick } 85738414Smckusick 85838414Smckusick /* 85938414Smckusick * Get a request for the server main loop 86038414Smckusick * - receive a request via. nfs_soreceive() 86138414Smckusick * - verify it 86238414Smckusick * - fill in the cred struct. 86338414Smckusick */ 86442243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 865*45281Smckusick msk, mtch, wascomp) 86638414Smckusick struct socket *so; 86738414Smckusick u_long prog; 86838414Smckusick u_long vers; 86938414Smckusick int maxproc; 87038414Smckusick struct mbuf **nam; 87138414Smckusick struct mbuf **mrp; 87238414Smckusick struct mbuf **mdp; 87338414Smckusick caddr_t *dposp; 87438414Smckusick u_long *retxid; 87542243Smckusick u_long *procnum; 87638414Smckusick register struct ucred *cr; 87741900Smckusick struct mbuf *msk, *mtch; 878*45281Smckusick int *wascomp; 87938414Smckusick { 88038414Smckusick register int i; 88139494Smckusick register u_long *p; 88239494Smckusick register long t1; 88339494Smckusick caddr_t dpos, cp2; 88439494Smckusick int error = 0; 88539494Smckusick struct mbuf *mrep, *md; 88639494Smckusick int len; 88738414Smckusick 88841900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 88941900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89041900Smckusick } else { 89141900Smckusick mrep = (struct mbuf *)0; 89241900Smckusick do { 89341900Smckusick if (mrep) { 89441900Smckusick m_freem(*nam); 89541900Smckusick m_freem(mrep); 89641900Smckusick } 89741900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89841900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 89941900Smckusick } 90041900Smckusick if (error) 90138414Smckusick return (error); 90238414Smckusick md = mrep; 903*45281Smckusick mrep = nfs_uncompress(mrep); 904*45281Smckusick if (mrep != md) { 905*45281Smckusick *wascomp = 1; 906*45281Smckusick md = mrep; 907*45281Smckusick } else 908*45281Smckusick *wascomp = 0; 90938414Smckusick dpos = mtod(mrep, caddr_t); 91038414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 91138414Smckusick *retxid = *p++; 91238414Smckusick if (*p++ != rpc_call) { 91338414Smckusick m_freem(mrep); 91438414Smckusick return (ERPCMISMATCH); 91538414Smckusick } 91638414Smckusick if (*p++ != rpc_vers) { 91738414Smckusick m_freem(mrep); 91838414Smckusick return (ERPCMISMATCH); 91938414Smckusick } 92038414Smckusick if (*p++ != prog) { 92138414Smckusick m_freem(mrep); 92238414Smckusick return (EPROGUNAVAIL); 92338414Smckusick } 92438414Smckusick if (*p++ != vers) { 92538414Smckusick m_freem(mrep); 92638414Smckusick return (EPROGMISMATCH); 92738414Smckusick } 92842243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 92942243Smckusick if (*procnum == NFSPROC_NULL) { 93038414Smckusick *mrp = mrep; 93138414Smckusick return (0); 93238414Smckusick } 93342243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 93438414Smckusick m_freem(mrep); 93538414Smckusick return (EPROCUNAVAIL); 93638414Smckusick } 93741900Smckusick len = fxdr_unsigned(int, *p++); 93841900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 93941900Smckusick m_freem(mrep); 94041900Smckusick return (EBADRPC); 94141900Smckusick } 94239494Smckusick len = fxdr_unsigned(int, *++p); 94341900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 94441900Smckusick m_freem(mrep); 94541900Smckusick return (EBADRPC); 94641900Smckusick } 94739494Smckusick nfsm_adv(nfsm_rndup(len)); 94838414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 94938414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 95038414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 95139494Smckusick len = fxdr_unsigned(int, *p); 95241900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 95338414Smckusick m_freem(mrep); 95438414Smckusick return (EBADRPC); 95538414Smckusick } 95639494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95739494Smckusick for (i = 1; i <= len; i++) 95841900Smckusick if (i < NGROUPS) 95941900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 96041900Smckusick else 96141900Smckusick p++; 96241900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 96338414Smckusick /* 96438414Smckusick * Do we have any use for the verifier. 96538414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96638414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96738414Smckusick * For now, just skip over it 96838414Smckusick */ 96939494Smckusick len = fxdr_unsigned(int, *++p); 97041900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 97141900Smckusick m_freem(mrep); 97241900Smckusick return (EBADRPC); 97341900Smckusick } 97439494Smckusick if (len > 0) 97539494Smckusick nfsm_adv(nfsm_rndup(len)); 97638414Smckusick *mrp = mrep; 97738414Smckusick *mdp = md; 97838414Smckusick *dposp = dpos; 97938414Smckusick return (0); 98038414Smckusick nfsmout: 98138414Smckusick return (error); 98238414Smckusick } 98338414Smckusick 98438414Smckusick /* 98538414Smckusick * Generate the rpc reply header 98638414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98738414Smckusick */ 98838414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 98938414Smckusick int siz; 99038414Smckusick u_long retxid; 99138414Smckusick int err; 99238414Smckusick struct mbuf **mrq; 99338414Smckusick struct mbuf **mbp; 99438414Smckusick caddr_t *bposp; 99538414Smckusick { 99639494Smckusick register u_long *p; 99739494Smckusick register long t1; 99839494Smckusick caddr_t bpos; 99939494Smckusick struct mbuf *mreq, *mb, *mb2; 100038414Smckusick 100138414Smckusick NFSMGETHDR(mreq); 100238414Smckusick mb = mreq; 100338414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 100441900Smckusick MCLGET(mreq, M_WAIT); 100538414Smckusick p = mtod(mreq, u_long *); 100638414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100738414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 100838414Smckusick *p++ = retxid; 100938414Smckusick *p++ = rpc_reply; 101038414Smckusick if (err == ERPCMISMATCH) { 101138414Smckusick *p++ = rpc_msgdenied; 101238414Smckusick *p++ = rpc_mismatch; 101338414Smckusick *p++ = txdr_unsigned(2); 101438414Smckusick *p = txdr_unsigned(2); 101538414Smckusick } else { 101638414Smckusick *p++ = rpc_msgaccepted; 101738414Smckusick *p++ = 0; 101838414Smckusick *p++ = 0; 101938414Smckusick switch (err) { 102038414Smckusick case EPROGUNAVAIL: 102138414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 102238414Smckusick break; 102338414Smckusick case EPROGMISMATCH: 102438414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102538414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102638414Smckusick *p++ = txdr_unsigned(2); 102738414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 102838414Smckusick break; 102938414Smckusick case EPROCUNAVAIL: 103038414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 103138414Smckusick break; 103238414Smckusick default: 103338414Smckusick *p = 0; 103438414Smckusick if (err != VNOVAL) { 103538414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103638414Smckusick *p = txdr_unsigned(err); 103738414Smckusick } 103838414Smckusick break; 103938414Smckusick }; 104038414Smckusick } 104138414Smckusick *mrq = mreq; 104238414Smckusick *mbp = mb; 104338414Smckusick *bposp = bpos; 104438414Smckusick if (err != 0 && err != VNOVAL) 104538414Smckusick nfsstats.srvrpc_errs++; 104638414Smckusick return (0); 104738414Smckusick } 104838414Smckusick 104938414Smckusick /* 105038414Smckusick * Nfs timer routine 105138414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 105238414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 105340117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 105438414Smckusick */ 105538414Smckusick nfs_timer() 105638414Smckusick { 105738414Smckusick register struct nfsreq *rep; 105838414Smckusick register struct mbuf *m; 105938414Smckusick register struct socket *so; 106041900Smckusick register struct nfsmount *nmp; 106140117Smckusick int s, error; 106238414Smckusick 106338414Smckusick s = splnet(); 106441900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 106541900Smckusick nmp = rep->r_nmp; 106641900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 106741900Smckusick (so = nmp->nm_so) == NULL) 106841900Smckusick continue; 106941900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 107041900Smckusick rep->r_flags |= R_SOFTTERM; 107141900Smckusick continue; 107241900Smckusick } 107340117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 107441900Smckusick nmp->nm_rtt++; 107541900Smckusick /* If not timed out */ 107641900Smckusick if (++rep->r_timer < nmp->nm_rto) 107741900Smckusick continue; 107840117Smckusick /* Do backoff and save new timeout in mount */ 107940117Smckusick if (rep->r_flags & R_TIMING) { 108041900Smckusick nfs_backofftimer(nmp); 108140117Smckusick rep->r_flags &= ~R_TIMING; 108241900Smckusick nmp->nm_rtt = -1; 108340117Smckusick } 108440117Smckusick if (rep->r_flags & R_SENT) { 108540117Smckusick rep->r_flags &= ~R_SENT; 108641900Smckusick nmp->nm_sent--; 108740117Smckusick } 108841900Smckusick 108941900Smckusick /* 109041900Smckusick * Check for too many retries on soft mount. 109141900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 109241900Smckusick */ 109341900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 109440117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 109540117Smckusick 109641900Smckusick /* 109741900Smckusick * Check for server not responding 109841900Smckusick */ 109941900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 110043351Smckusick rep->r_rexmit > NFS_FISHY) { 110141900Smckusick if (rep->r_procp && rep->r_procp->p_session) 110243061Smarc tprintf(rep->r_procp->p_session, 110341900Smckusick "Nfs server %s, not responding\n", 110441900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110541900Smckusick else 110643061Smarc tprintf(NULL, 110741900Smckusick "Nfs server %s, not responding\n", 110841900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110941900Smckusick rep->r_flags |= R_TPRINTFMSG; 111041900Smckusick } 111143351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 111241900Smckusick nfsstats.rpctimeouts++; 111341900Smckusick rep->r_flags |= R_SOFTTERM; 111441900Smckusick continue; 111541900Smckusick } 111643351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 111743351Smckusick continue; 111841900Smckusick 111941900Smckusick /* 112041900Smckusick * If there is enough space and the window allows.. 112141900Smckusick * Resend it 112241900Smckusick */ 112341900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 112441900Smckusick nmp->nm_sent < nmp->nm_window && 112541900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 112641900Smckusick nfsstats.rpcretries++; 112741900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 112841900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 112941900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113041900Smckusick else 113141900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113241900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 113341900Smckusick if (error) { 113441900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 113541900Smckusick so->so_error = 0; 113641900Smckusick } else { 113741900Smckusick /* 113841900Smckusick * We need to time the request even though we 113941900Smckusick * are retransmitting. 114041900Smckusick */ 114141900Smckusick nmp->nm_rtt = 0; 114241900Smckusick nmp->nm_sent++; 114341900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 114441900Smckusick rep->r_timer = rep->r_timerinit; 114541900Smckusick } 114641900Smckusick } 114740117Smckusick } 114840117Smckusick splx(s); 114940117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115040117Smckusick } 115140117Smckusick 115240117Smckusick /* 115340117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 115440117Smckusick * used here. The timer state is held in the nfsmount structure and 115540117Smckusick * a single request is used to clock the response. When successful 115640117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 115740117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 115840117Smckusick * routines. 115940117Smckusick * 116040117Smckusick * Congestion variables are held in the nfshost structure which 116140117Smckusick * is referenced by nfsmounts and shared per-server. This separation 116240117Smckusick * makes it possible to do per-mount timing which allows varying disk 116340117Smckusick * access times to be dealt with, while preserving a network oriented 116440117Smckusick * congestion control scheme. 116540117Smckusick * 116640117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 116740117Smckusick * with adjusted scaling factors. We start with one request, then send 116840117Smckusick * 4 more after each success until the ssthresh limit is reached, then 116940117Smckusick * we increment at a rate proportional to the window. On failure, we 117040117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117140117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 117240117Smckusick * for now. 117340117Smckusick * 117440117Smckusick * NFS behaves much more like a transport protocol with these changes, 117540117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 117640117Smckusick * implementations. 117740117Smckusick * 117840117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 117940117Smckusick */ 118040117Smckusick 118140117Smckusick /* 118240117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 118340117Smckusick * responds only after performing lookups/diskio/etc, we have to be 118440117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 118541900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 118640117Smckusick */ 118741900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 118840117Smckusick 118941900Smckusick nfs_updatetimer(nmp) 119041900Smckusick register struct nfsmount *nmp; 119140117Smckusick { 119240117Smckusick 119340117Smckusick /* If retransmitted, clear and return */ 119441900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 119541900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 119640117Smckusick return; 119740117Smckusick } 119840117Smckusick /* If have a measurement, do smoothing */ 119941900Smckusick if (nmp->nm_srtt) { 120040117Smckusick register short delta; 120141900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 120241900Smckusick if ((nmp->nm_srtt += delta) <= 0) 120341900Smckusick nmp->nm_srtt = 1; 120440117Smckusick if (delta < 0) 120540117Smckusick delta = -delta; 120641900Smckusick delta -= (nmp->nm_rttvar >> 2); 120741900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 120841900Smckusick nmp->nm_rttvar = 1; 120940117Smckusick /* Else initialize */ 121040117Smckusick } else { 121141900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 121241900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 121341900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 121440117Smckusick } 121540117Smckusick /* Compute new Retransmission TimeOut and clip */ 121641900Smckusick nmp->nm_rto = NFS_RTO(nmp); 121741900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 121841900Smckusick nmp->nm_rto = NFS_MINTIMEO; 121941900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122041900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122140117Smckusick 122240117Smckusick /* Update window estimate */ 122341900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 122441900Smckusick nmp->nm_window += 4; 122540117Smckusick else { /* slowly */ 122641900Smckusick register long incr = ++nmp->nm_winext; 122741900Smckusick incr = (incr * incr) / nmp->nm_window; 122840117Smckusick if (incr > 0) { 122941900Smckusick nmp->nm_winext = 0; 123041900Smckusick ++nmp->nm_window; 123140117Smckusick } 123240117Smckusick } 123341900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 123441900Smckusick nmp->nm_window = NFS_MAXWINDOW; 123540117Smckusick } 123640117Smckusick 123741900Smckusick nfs_backofftimer(nmp) 123841900Smckusick register struct nfsmount *nmp; 123940117Smckusick { 124040117Smckusick register unsigned long newrto; 124140117Smckusick 124240117Smckusick /* Clip shift count */ 124341900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 124441900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 124540117Smckusick /* Back off RTO exponentially */ 124641900Smckusick newrto = NFS_RTO(nmp); 124741900Smckusick newrto <<= (nmp->nm_rexmit - 1); 124840117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 124940117Smckusick newrto = NFS_MAXTIMEO; 125041900Smckusick nmp->nm_rto = newrto; 125140117Smckusick 125240117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 125341900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 125441900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 125541900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 125641900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 125741900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 125841900Smckusick nmp->nm_srtt = 0; 125938414Smckusick } 126038414Smckusick } 126138414Smckusick } 126240117Smckusick /* Close down window but remember this point (3/4 current) for later */ 126341900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 126441900Smckusick nmp->nm_window = 1; 126541900Smckusick nmp->nm_winext = 0; 126638414Smckusick } 126738414Smckusick 126838414Smckusick /* 126941900Smckusick * Test for a termination signal pending on procp. 127041900Smckusick * This is used for NFSMNT_INT mounts. 127138414Smckusick */ 127241900Smckusick nfs_sigintr(p) 127341900Smckusick register struct proc *p; 127441900Smckusick { 127541900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 127641900Smckusick NFSINT_SIGMASK)) 127741900Smckusick return (1); 127841900Smckusick else 127941900Smckusick return (0); 128041900Smckusick } 128140117Smckusick 128241900Smckusick /* 128341900Smckusick * Lock a socket against others. 128441900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 128541900Smckusick * and also to avoid race conditions between the processes with nfs requests 128641900Smckusick * in progress when a reconnect is necessary. 128741900Smckusick */ 128843351Smckusick nfs_solock(flagp) 128943351Smckusick register int *flagp; 129038414Smckusick { 129140117Smckusick 129241900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 129341900Smckusick *flagp |= NFSMNT_WANTSCK; 129443351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 129540117Smckusick } 129641900Smckusick *flagp |= NFSMNT_SCKLOCK; 129741900Smckusick } 129840117Smckusick 129941900Smckusick /* 130041900Smckusick * Unlock the stream socket for others. 130141900Smckusick */ 130241900Smckusick nfs_sounlock(flagp) 130343351Smckusick register int *flagp; 130441900Smckusick { 130541900Smckusick 130641900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 130741900Smckusick panic("nfs sounlock"); 130841900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 130941900Smckusick if (*flagp & NFSMNT_WANTSCK) { 131041900Smckusick *flagp &= ~NFSMNT_WANTSCK; 131141900Smckusick wakeup((caddr_t)flagp); 131240117Smckusick } 131338414Smckusick } 131441900Smckusick 131541900Smckusick /* 131641900Smckusick * This function compares two net addresses by family and returns TRUE 131741900Smckusick * if they are the same. 131841900Smckusick * If there is any doubt, return FALSE. 131941900Smckusick */ 132041900Smckusick nfs_netaddr_match(nam1, nam2) 132141900Smckusick struct mbuf *nam1, *nam2; 132241900Smckusick { 132341900Smckusick register struct sockaddr *saddr1, *saddr2; 132441900Smckusick 132541900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 132641900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 132741900Smckusick if (saddr1->sa_family != saddr2->sa_family) 132841900Smckusick return (0); 132941900Smckusick 133041900Smckusick /* 133141900Smckusick * Must do each address family separately since unused fields 133241900Smckusick * are undefined values and not always zeroed. 133341900Smckusick */ 133441900Smckusick switch (saddr1->sa_family) { 133541900Smckusick case AF_INET: 133641900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 133741900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 133841900Smckusick return (1); 133941900Smckusick break; 134041900Smckusick default: 134141900Smckusick break; 134241900Smckusick }; 134341900Smckusick return (0); 134441900Smckusick } 134541900Smckusick 134641900Smckusick /* 134741900Smckusick * Check the hostname fields for nfsd's mask and match fields. 134841900Smckusick * By address family: 134941900Smckusick * - Bitwise AND the mask with the host address field 135041900Smckusick * - Compare for == with match 135141900Smckusick * return TRUE if not equal 135241900Smckusick */ 135341900Smckusick nfs_badnam(nam, msk, mtch) 135441900Smckusick register struct mbuf *nam, *msk, *mtch; 135541900Smckusick { 135641900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 135741900Smckusick case AF_INET: 135841900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 135941900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 136041900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 136141900Smckusick default: 136241900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 136341900Smckusick return (0); 136441900Smckusick }; 136541900Smckusick } 1366