138414Smckusick /* 2*47574Skarels * Copyright (c) 1989, 1991 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 844511Sbostic * %sccs.include.redist.c% 938414Smckusick * 10*47574Skarels * @(#)nfs_socket.c 7.20 (Berkeley) 03/19/91 1138414Smckusick */ 1238414Smckusick 1338414Smckusick /* 1441900Smckusick * Socket operations for use by nfs 1538414Smckusick */ 1638414Smckusick 1738414Smckusick #include "param.h" 1840117Smckusick #include "proc.h" 1938414Smckusick #include "mount.h" 2038414Smckusick #include "kernel.h" 2138414Smckusick #include "malloc.h" 2238414Smckusick #include "mbuf.h" 2338414Smckusick #include "vnode.h" 2438414Smckusick #include "domain.h" 2538414Smckusick #include "protosw.h" 2638414Smckusick #include "socket.h" 2738414Smckusick #include "socketvar.h" 28*47574Skarels #include "syslog.h" 2942877Smckusick #include "../netinet/in.h" 3042877Smckusick #include "../netinet/tcp.h" 31*47574Skarels 3238414Smckusick #include "rpcv2.h" 3338414Smckusick #include "nfsv2.h" 3438414Smckusick #include "nfs.h" 3538414Smckusick #include "xdr_subs.h" 3638414Smckusick #include "nfsm_subs.h" 3738414Smckusick #include "nfsmount.h" 3838414Smckusick 3938414Smckusick #define TRUE 1 4043351Smckusick #define FALSE 0 4138414Smckusick 4240117Smckusick /* 4338414Smckusick * External data, mostly RPC constants in XDR form 4438414Smckusick */ 4538414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 4638414Smckusick rpc_msgaccepted, rpc_call; 4738414Smckusick extern u_long nfs_prog, nfs_vers; 4843351Smckusick /* Maybe these should be bits in a u_long ?? */ 4941900Smckusick extern int nonidempotent[NFS_NPROCS]; 5045281Smckusick static int compressrequest[NFS_NPROCS] = { 5145281Smckusick FALSE, 5245281Smckusick TRUE, 5345281Smckusick TRUE, 5445281Smckusick FALSE, 5545281Smckusick TRUE, 5645281Smckusick TRUE, 5745281Smckusick TRUE, 5845281Smckusick FALSE, 5945281Smckusick FALSE, 6045281Smckusick TRUE, 6145281Smckusick TRUE, 6245281Smckusick TRUE, 6345281Smckusick TRUE, 6445281Smckusick TRUE, 6545281Smckusick TRUE, 6645281Smckusick TRUE, 6745281Smckusick TRUE, 6845281Smckusick TRUE, 6945281Smckusick }; 7041900Smckusick int nfs_sbwait(); 7141900Smckusick void nfs_disconnect(); 7245281Smckusick struct mbuf *nfs_compress(), *nfs_uncompress(); 7341900Smckusick 7438414Smckusick int nfsrv_null(), 7538414Smckusick nfsrv_getattr(), 7638414Smckusick nfsrv_setattr(), 7738414Smckusick nfsrv_lookup(), 7838414Smckusick nfsrv_readlink(), 7938414Smckusick nfsrv_read(), 8038414Smckusick nfsrv_write(), 8138414Smckusick nfsrv_create(), 8238414Smckusick nfsrv_remove(), 8338414Smckusick nfsrv_rename(), 8438414Smckusick nfsrv_link(), 8538414Smckusick nfsrv_symlink(), 8638414Smckusick nfsrv_mkdir(), 8738414Smckusick nfsrv_rmdir(), 8838414Smckusick nfsrv_readdir(), 8938414Smckusick nfsrv_statfs(), 9038414Smckusick nfsrv_noop(); 9138414Smckusick 9238414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 9338414Smckusick nfsrv_null, 9438414Smckusick nfsrv_getattr, 9538414Smckusick nfsrv_setattr, 9638414Smckusick nfsrv_noop, 9738414Smckusick nfsrv_lookup, 9838414Smckusick nfsrv_readlink, 9938414Smckusick nfsrv_read, 10038414Smckusick nfsrv_noop, 10138414Smckusick nfsrv_write, 10238414Smckusick nfsrv_create, 10338414Smckusick nfsrv_remove, 10438414Smckusick nfsrv_rename, 10538414Smckusick nfsrv_link, 10638414Smckusick nfsrv_symlink, 10738414Smckusick nfsrv_mkdir, 10838414Smckusick nfsrv_rmdir, 10938414Smckusick nfsrv_readdir, 11038414Smckusick nfsrv_statfs, 11138414Smckusick }; 11238414Smckusick 11340117Smckusick struct nfsreq nfsreqh; 11440117Smckusick int nfsrexmtthresh = NFS_FISHY; 11541900Smckusick int nfs_tcpnodelay = 1; 11638414Smckusick 11738414Smckusick /* 11841900Smckusick * Initialize sockets and congestion for a new NFS connection. 11940117Smckusick * We do not free the sockaddr if error. 12038414Smckusick */ 12141900Smckusick nfs_connect(nmp) 12240117Smckusick register struct nfsmount *nmp; 12340117Smckusick { 12441900Smckusick register struct socket *so; 12541900Smckusick int s, error; 12640117Smckusick struct mbuf *m; 12740117Smckusick 12841900Smckusick nmp->nm_so = (struct socket *)0; 12941900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 13041900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 13140117Smckusick goto bad; 13241900Smckusick so = nmp->nm_so; 13341900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 13440117Smckusick 13541900Smckusick /* 13641900Smckusick * Protocols that do not require connections may be optionally left 13741900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 13841900Smckusick */ 13941900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 14041900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 14141900Smckusick error = ENOTCONN; 14240117Smckusick goto bad; 14340117Smckusick } 14441900Smckusick } else { 14541900Smckusick if (error = soconnect(so, nmp->nm_nam)) 14640117Smckusick goto bad; 14741900Smckusick 14841900Smckusick /* 14941900Smckusick * Wait for the connection to complete. Cribbed from the 15041900Smckusick * connect system call but with the wait at negative prio. 15141900Smckusick */ 15241900Smckusick s = splnet(); 15341900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 15443351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 15541900Smckusick splx(s); 15641900Smckusick if (so->so_error) { 15741900Smckusick error = so->so_error; 15841900Smckusick goto bad; 15941900Smckusick } 16040117Smckusick } 16141900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 16243351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 16341900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16441900Smckusick so->so_snd.sb_timeo = (5 * hz); 16541900Smckusick } else { 16641900Smckusick so->so_rcv.sb_timeo = 0; 16741900Smckusick so->so_snd.sb_timeo = 0; 16841900Smckusick } 169*47574Skarels if (error = soreserve(so, 170*47574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR), NFS_MAXPACKET), 171*47574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR), NFS_MAXPACKET))) 17241900Smckusick goto bad; 17341900Smckusick } else { 17443351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 17541900Smckusick so->so_rcv.sb_timeo = (5 * hz); 17641900Smckusick so->so_snd.sb_timeo = (5 * hz); 17741900Smckusick } else { 17841900Smckusick so->so_rcv.sb_timeo = 0; 17941900Smckusick so->so_snd.sb_timeo = 0; 18041900Smckusick } 18141900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 18241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18341900Smckusick *mtod(m, int *) = 1; 18441900Smckusick m->m_len = sizeof(int); 18541900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 18641900Smckusick } 18741900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 18841900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 18941900Smckusick nfs_tcpnodelay) { 19041900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 19141900Smckusick *mtod(m, int *) = 1; 19241900Smckusick m->m_len = sizeof(int); 19341900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 19441900Smckusick } 19541900Smckusick if (error = soreserve(so, 196*47574Skarels min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)), 197*47574Skarels NFS_MAXPACKET + sizeof(u_long)), 198*47574Skarels min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long)), 199*47574Skarels NFS_MAXPACKET + sizeof(u_long)))) 20041900Smckusick goto bad; 20141900Smckusick } 20241900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 20341900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 20440117Smckusick 20541900Smckusick /* Initialize other non-zero congestion variables */ 20641900Smckusick nmp->nm_rto = NFS_TIMEO; 20741900Smckusick nmp->nm_window = 2; /* Initial send window */ 20841900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 20941900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 21041900Smckusick nmp->nm_sent = 0; 21141900Smckusick nmp->nm_currexmit = 0; 21241900Smckusick return (0); 21340117Smckusick 21441900Smckusick bad: 21541900Smckusick nfs_disconnect(nmp); 21641900Smckusick return (error); 21741900Smckusick } 21840117Smckusick 21941900Smckusick /* 22041900Smckusick * Reconnect routine: 22141900Smckusick * Called when a connection is broken on a reliable protocol. 22241900Smckusick * - clean up the old socket 22341900Smckusick * - nfs_connect() again 22441900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 22541900Smckusick * If this fails the mount point is DEAD! 22641900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 22741900Smckusick */ 22841900Smckusick nfs_reconnect(rep, nmp) 22941900Smckusick register struct nfsreq *rep; 23041900Smckusick register struct nfsmount *nmp; 23141900Smckusick { 23241900Smckusick register struct nfsreq *rp; 23341900Smckusick int error; 23440117Smckusick 23541900Smckusick if (rep->r_procp) 23643061Smarc tprintf(rep->r_procp->p_session, 23741900Smckusick "Nfs server %s, trying reconnect\n", 23841900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23941900Smckusick else 24043061Smarc tprintf(NULL, "Nfs server %s, trying a reconnect\n", 24141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24241900Smckusick while (error = nfs_connect(nmp)) { 24342243Smckusick #ifdef lint 24442243Smckusick error = error; 24542243Smckusick #endif /* lint */ 24641900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 24741900Smckusick return (EINTR); 24843351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 24940117Smckusick } 25041900Smckusick if (rep->r_procp) 25143061Smarc tprintf(rep->r_procp->p_session, 25241900Smckusick "Nfs server %s, reconnected\n", 25341900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 25441900Smckusick else 25543061Smarc tprintf(NULL, "Nfs server %s, reconnected\n", 25641900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 25741900Smckusick 25841900Smckusick /* 25941900Smckusick * Loop through outstanding request list and fix up all requests 26041900Smckusick * on old socket. 26141900Smckusick */ 26241900Smckusick rp = nfsreqh.r_next; 26341900Smckusick while (rp != &nfsreqh) { 26441900Smckusick if (rp->r_nmp == nmp) 26541900Smckusick rp->r_flags |= R_MUSTRESEND; 26641900Smckusick rp = rp->r_next; 26740117Smckusick } 26840117Smckusick return (0); 26940117Smckusick } 27040117Smckusick 27140117Smckusick /* 27240117Smckusick * NFS disconnect. Clean up and unlink. 27340117Smckusick */ 27441900Smckusick void 27540117Smckusick nfs_disconnect(nmp) 27640117Smckusick register struct nfsmount *nmp; 27740117Smckusick { 27841900Smckusick register struct socket *so; 27940117Smckusick 28041900Smckusick if (nmp->nm_so) { 28141900Smckusick so = nmp->nm_so; 28241900Smckusick nmp->nm_so = (struct socket *)0; 28341900Smckusick soshutdown(so, 2); 28441900Smckusick soclose(so); 28540117Smckusick } 28640117Smckusick } 28740117Smckusick 28840117Smckusick /* 28941900Smckusick * This is the nfs send routine. For connection based socket types, it 29041900Smckusick * must be called with an nfs_solock() on the socket. 29141900Smckusick * "rep == NULL" indicates that it has been called from a server. 29240117Smckusick */ 29341900Smckusick nfs_send(so, nam, top, rep) 29438414Smckusick register struct socket *so; 29538414Smckusick struct mbuf *nam; 29641900Smckusick register struct mbuf *top; 29741900Smckusick struct nfsreq *rep; 29838414Smckusick { 29941900Smckusick struct mbuf *sendnam; 30041900Smckusick int error, soflags; 30138414Smckusick 30241900Smckusick if (rep) { 30341900Smckusick if (rep->r_flags & R_SOFTTERM) { 30440117Smckusick m_freem(top); 30541900Smckusick return (EINTR); 30640117Smckusick } 30743062Smckusick if (rep->r_nmp->nm_so == NULL && 30841900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 30941900Smckusick return (error); 31041900Smckusick rep->r_flags &= ~R_MUSTRESEND; 31143062Smckusick so = rep->r_nmp->nm_so; 31241900Smckusick soflags = rep->r_nmp->nm_soflags; 31341900Smckusick } else 31441900Smckusick soflags = so->so_proto->pr_flags; 31541900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 31641900Smckusick sendnam = (struct mbuf *)0; 31741900Smckusick else 31841900Smckusick sendnam = nam; 31941900Smckusick 32041900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 32141900Smckusick (struct mbuf *)0, 0); 32241900Smckusick if (error == EWOULDBLOCK && rep) { 32341900Smckusick if (rep->r_flags & R_SOFTTERM) 32441900Smckusick error = EINTR; 32541900Smckusick else { 32641900Smckusick rep->r_flags |= R_MUSTRESEND; 32741900Smckusick error = 0; 32840117Smckusick } 32938414Smckusick } 33041900Smckusick /* 33141900Smckusick * Ignore socket errors?? 33241900Smckusick */ 33341900Smckusick if (error && error != EINTR && error != ERESTART) 33441900Smckusick error = 0; 33538414Smckusick return (error); 33638414Smckusick } 33738414Smckusick 33838414Smckusick /* 33941900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 34041900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 34141900Smckusick * Mark and consolidate the data into a new mbuf list. 34241900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 34341900Smckusick * small mbufs. 34441900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 34541900Smckusick * we have read any of it, even if the system call has been interrupted. 34638414Smckusick */ 34741900Smckusick nfs_receive(so, aname, mp, rep) 34838414Smckusick register struct socket *so; 34938414Smckusick struct mbuf **aname; 35038414Smckusick struct mbuf **mp; 35141900Smckusick register struct nfsreq *rep; 35238414Smckusick { 35341900Smckusick struct uio auio; 35441900Smckusick struct iovec aio; 35538414Smckusick register struct mbuf *m; 35645281Smckusick struct mbuf *m2, *mnew, **mbp; 35741900Smckusick caddr_t fcp, tcp; 35841900Smckusick u_long len; 35941900Smckusick struct mbuf **getnam; 36041900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 36138414Smckusick 36241900Smckusick /* 36341900Smckusick * Set up arguments for soreceive() 36441900Smckusick */ 36541900Smckusick *mp = (struct mbuf *)0; 36641900Smckusick *aname = (struct mbuf *)0; 36741900Smckusick if (rep) 36841900Smckusick soflags = rep->r_nmp->nm_soflags; 36941900Smckusick else 37041900Smckusick soflags = so->so_proto->pr_flags; 37138414Smckusick 37241900Smckusick /* 37341900Smckusick * For reliable protocols, lock against other senders/receivers 37441900Smckusick * in case a reconnect is necessary. 37541900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 37641900Smckusick * more there is to get. 37741900Smckusick * We must lock the socket against other receivers 37841900Smckusick * until we have an entire rpc request/reply. 37941900Smckusick */ 38041900Smckusick if (soflags & PR_CONNREQUIRED) { 38141900Smckusick tryagain: 38241900Smckusick /* 38341900Smckusick * Check for fatal errors and resending request. 38441900Smckusick */ 38541900Smckusick if (rep) { 38641900Smckusick /* 38741900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 38841900Smckusick * would have changed. NULL indicates a failed 38941900Smckusick * attempt that has essentially shut down this 39041900Smckusick * mount point. 39141900Smckusick */ 39241900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 39341900Smckusick (rep->r_flags & R_SOFTTERM)) 39441900Smckusick return (EINTR); 39541900Smckusick while (rep->r_flags & R_MUSTRESEND) { 39641900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 39741900Smckusick nfsstats.rpcretries++; 39841900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 39941900Smckusick rep)) 40041900Smckusick goto errout; 40140117Smckusick } 40241900Smckusick } 40341900Smckusick if ((soflags & PR_ATOMIC) == 0) { 40441900Smckusick aio.iov_base = (caddr_t) &len; 40541900Smckusick aio.iov_len = sizeof(u_long); 40641900Smckusick auio.uio_iov = &aio; 40741900Smckusick auio.uio_iovcnt = 1; 40841900Smckusick auio.uio_segflg = UIO_SYSSPACE; 40941900Smckusick auio.uio_rw = UIO_READ; 41041900Smckusick auio.uio_offset = 0; 41141900Smckusick auio.uio_resid = sizeof(u_long); 41241900Smckusick do { 41341900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 41441900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 41541900Smckusick if (error == EWOULDBLOCK && rep) { 41641900Smckusick if (rep->r_flags & R_SOFTTERM) 41741900Smckusick return (EINTR); 41841900Smckusick if (rep->r_flags & R_MUSTRESEND) 41941900Smckusick goto tryagain; 42041900Smckusick } 42141900Smckusick } while (error == EWOULDBLOCK); 42241900Smckusick if (!error && auio.uio_resid > 0) 42341900Smckusick error = EPIPE; 42440761Skarels if (error) 42541900Smckusick goto errout; 42641900Smckusick len = ntohl(len) & ~0x80000000; 42741900Smckusick /* 42841900Smckusick * This is SERIOUS! We are out of sync with the sender 42941900Smckusick * and forcing a disconnect/reconnect is all I can do. 43041900Smckusick */ 43141900Smckusick if (len > NFS_MAXPACKET) { 43241900Smckusick error = EFBIG; 43341900Smckusick goto errout; 43441900Smckusick } 43541900Smckusick auio.uio_resid = len; 43641900Smckusick do { 43741900Smckusick error = soreceive(so, (struct mbuf **)0, 43841900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 43941900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 44041900Smckusick error == ERESTART); 44141900Smckusick if (!error && auio.uio_resid > 0) 44241900Smckusick error = EPIPE; 44340117Smckusick } else { 44441900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 44541900Smckusick do { 44641900Smckusick error = soreceive(so, (struct mbuf **)0, 44741900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 44841900Smckusick if (error == EWOULDBLOCK && rep) { 44941900Smckusick if (rep->r_flags & R_SOFTTERM) 45041900Smckusick return (EINTR); 45141900Smckusick if (rep->r_flags & R_MUSTRESEND) 45241900Smckusick goto tryagain; 45341900Smckusick } 45441900Smckusick } while (error == EWOULDBLOCK); 45541900Smckusick if (!error && *mp == NULL) 45641900Smckusick error = EPIPE; 45741900Smckusick len -= auio.uio_resid; 45840117Smckusick } 45941900Smckusick errout: 46041900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 46141900Smckusick m_freem(*mp); 46241900Smckusick *mp = (struct mbuf *)0; 46341900Smckusick nfs_disconnect(rep->r_nmp); 46441900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 46541900Smckusick if (!error) 46641900Smckusick goto tryagain; 46740117Smckusick } 46841900Smckusick } else { 46941900Smckusick if (so->so_state & SS_ISCONNECTED) 47041900Smckusick getnam = (struct mbuf **)0; 47141900Smckusick else 47241900Smckusick getnam = aname; 47341900Smckusick auio.uio_resid = len = 1000000; 47441900Smckusick do { 47541900Smckusick error = soreceive(so, getnam, &auio, mp, 47641900Smckusick (struct mbuf **)0, &rcvflg); 47741900Smckusick if (error == EWOULDBLOCK && rep && 47841900Smckusick (rep->r_flags & R_SOFTTERM)) 47941900Smckusick return (EINTR); 48041900Smckusick } while (error == EWOULDBLOCK); 48141900Smckusick len -= auio.uio_resid; 48241900Smckusick } 48341900Smckusick if (error) { 48441900Smckusick m_freem(*mp); 48541900Smckusick *mp = (struct mbuf *)0; 48641900Smckusick } 48741900Smckusick /* 48841900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 48941900Smckusick * These could cause pointer alignment problems, so copy them to 49041900Smckusick * well aligned mbufs. 49141900Smckusick */ 49241900Smckusick m = *mp; 49341900Smckusick mbp = mp; 49441900Smckusick while (m) { 49541900Smckusick /* 49641900Smckusick * All this for something that may never happen. 49741900Smckusick */ 49845281Smckusick if (m->m_next && (m->m_len & 0x3)) { 49941900Smckusick printf("nfs_rcv odd length!\n"); 50042243Smckusick mlen = 0; 50141900Smckusick while (m) { 50245281Smckusick fcp = mtod(m, caddr_t); 50345281Smckusick while (m->m_len > 0) { 50445281Smckusick if (mlen == 0) { 50545281Smckusick MGET(m2, M_WAIT, MT_DATA); 50645281Smckusick if (len >= MINCLSIZE) 50745281Smckusick MCLGET(m2, M_WAIT); 50845281Smckusick m2->m_len = 0; 50945281Smckusick mlen = M_TRAILINGSPACE(m2); 51045281Smckusick tcp = mtod(m2, caddr_t); 51145281Smckusick *mbp = m2; 51245281Smckusick mbp = &m2->m_next; 51345281Smckusick } 51445281Smckusick siz = MIN(mlen, m->m_len); 51545281Smckusick bcopy(fcp, tcp, siz); 51645281Smckusick m2->m_len += siz; 51745281Smckusick mlen -= siz; 51845281Smckusick len -= siz; 51945281Smckusick tcp += siz; 52045281Smckusick m->m_len -= siz; 52145281Smckusick fcp += siz; 52241900Smckusick } 52345281Smckusick MFREE(m, mnew); 52445281Smckusick m = mnew; 52541900Smckusick } 52641900Smckusick break; 52740117Smckusick } 52841900Smckusick len -= m->m_len; 52941900Smckusick mbp = &m->m_next; 53041900Smckusick m = m->m_next; 53138414Smckusick } 53238414Smckusick return (error); 53338414Smckusick } 53438414Smckusick 53538414Smckusick /* 53641900Smckusick * Implement receipt of reply on a socket. 53738414Smckusick * We must search through the list of received datagrams matching them 53838414Smckusick * with outstanding requests using the xid, until ours is found. 53938414Smckusick */ 54041900Smckusick /* ARGSUSED */ 54141900Smckusick nfs_reply(nmp, myrep) 54241900Smckusick struct nfsmount *nmp; 54339344Smckusick struct nfsreq *myrep; 54438414Smckusick { 54538414Smckusick register struct mbuf *m; 54638414Smckusick register struct nfsreq *rep; 54741900Smckusick register int error = 0; 54845281Smckusick u_long rxid; 54941900Smckusick struct mbuf *mp, *nam; 55041900Smckusick char *cp; 55141900Smckusick int cnt, xfer; 55238414Smckusick 55341900Smckusick /* 55441900Smckusick * Loop around until we get our own reply 55541900Smckusick */ 55641900Smckusick for (;;) { 55741900Smckusick /* 55841900Smckusick * Lock against other receivers so that I don't get stuck in 55941900Smckusick * sbwait() after someone else has received my reply for me. 56041900Smckusick * Also necessary for connection based protocols to avoid 56141900Smckusick * race conditions during a reconnect. 56241900Smckusick */ 56343351Smckusick nfs_solock(&nmp->nm_flag); 56441900Smckusick /* Already received, bye bye */ 56541900Smckusick if (myrep->r_mrep != NULL) { 56641900Smckusick nfs_sounlock(&nmp->nm_flag); 56741900Smckusick return (0); 56840117Smckusick } 56941900Smckusick /* 57041900Smckusick * Get the next Rpc reply off the socket 57141900Smckusick */ 57241900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 57341900Smckusick nfs_sounlock(&nmp->nm_flag); 57438414Smckusick 57541900Smckusick /* 57641900Smckusick * Ignore routing errors on connectionless protocols?? 57741900Smckusick */ 57841900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 57941900Smckusick nmp->nm_so->so_error = 0; 58041900Smckusick continue; 58141900Smckusick } 58241900Smckusick 58341900Smckusick /* 58441900Smckusick * Otherwise cleanup and return a fatal error. 58541900Smckusick */ 58641900Smckusick if (myrep->r_flags & R_TIMING) { 58741900Smckusick myrep->r_flags &= ~R_TIMING; 58841900Smckusick nmp->nm_rtt = -1; 58941900Smckusick } 59041900Smckusick if (myrep->r_flags & R_SENT) { 59141900Smckusick myrep->r_flags &= ~R_SENT; 59241900Smckusick nmp->nm_sent--; 59341900Smckusick } 59441900Smckusick return (error); 59538414Smckusick } 59641900Smckusick 59741900Smckusick /* 59841900Smckusick * Get the xid and check that it is an rpc reply 59941900Smckusick */ 60041900Smckusick m = mp; 60145281Smckusick while (m && m->m_len == 0) 60245281Smckusick m = m->m_next; 60345281Smckusick if (m == NULL) { 60440117Smckusick nfsstats.rpcinvalid++; 60541900Smckusick m_freem(mp); 60641900Smckusick nfs_sounlock(&nmp->nm_flag); 60741900Smckusick continue; 60838414Smckusick } 60945281Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED); 61041900Smckusick /* 61141900Smckusick * Loop through the request list to match up the reply 61241900Smckusick * Iff no match, just drop the datagram 61341900Smckusick */ 61441900Smckusick m = mp; 61541900Smckusick rep = nfsreqh.r_next; 61641900Smckusick while (rep != &nfsreqh) { 61745281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) { 61841900Smckusick /* Found it.. */ 61941900Smckusick rep->r_mrep = m; 62041900Smckusick /* 62141900Smckusick * Update timing 62241900Smckusick */ 62341900Smckusick if (rep->r_flags & R_TIMING) { 62441900Smckusick nfs_updatetimer(rep->r_nmp); 62541900Smckusick rep->r_flags &= ~R_TIMING; 62641900Smckusick rep->r_nmp->nm_rtt = -1; 62741900Smckusick } 62841900Smckusick if (rep->r_flags & R_SENT) { 62941900Smckusick rep->r_flags &= ~R_SENT; 63041900Smckusick rep->r_nmp->nm_sent--; 63141900Smckusick } 63240117Smckusick break; 63338414Smckusick } 63441900Smckusick rep = rep->r_next; 63538414Smckusick } 63641900Smckusick nfs_sounlock(&nmp->nm_flag); 63741900Smckusick if (nam) 63841900Smckusick m_freem(nam); 63941900Smckusick /* 64041900Smckusick * If not matched to a request, drop it. 64141900Smckusick * If it's mine, get out. 64241900Smckusick */ 64341900Smckusick if (rep == &nfsreqh) { 64441900Smckusick nfsstats.rpcunexpected++; 64541900Smckusick m_freem(m); 64641900Smckusick } else if (rep == myrep) 64741900Smckusick return (0); 64838414Smckusick } 64938414Smckusick } 65038414Smckusick 65138414Smckusick /* 65238414Smckusick * nfs_request - goes something like this 65338414Smckusick * - fill in request struct 65438414Smckusick * - links it into list 65541900Smckusick * - calls nfs_send() for first transmit 65641900Smckusick * - calls nfs_receive() to get reply 65738414Smckusick * - break down rpc header and return with nfs reply pointed to 65838414Smckusick * by mrep or error 65938414Smckusick * nb: always frees up mreq mbuf list 66038414Smckusick */ 66143351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 66238414Smckusick struct vnode *vp; 66338414Smckusick struct mbuf *mreq; 66438414Smckusick u_long xid; 66541900Smckusick int procnum; 66641900Smckusick struct proc *procp; 66743351Smckusick int tryhard; 66838414Smckusick struct mount *mp; 66938414Smckusick struct mbuf **mrp; 67038414Smckusick struct mbuf **mdp; 67138414Smckusick caddr_t *dposp; 67238414Smckusick { 67338414Smckusick register struct mbuf *m, *mrep; 67438414Smckusick register struct nfsreq *rep; 67538414Smckusick register u_long *p; 67638414Smckusick register int len; 67741900Smckusick struct nfsmount *nmp; 67838414Smckusick struct mbuf *md; 67939344Smckusick struct nfsreq *reph; 68038414Smckusick caddr_t dpos; 68138414Smckusick char *cp2; 68238414Smckusick int t1; 68345281Smckusick int s, compressed; 68441900Smckusick int error = 0; 68538414Smckusick 68641900Smckusick nmp = VFSTONFS(mp); 68738414Smckusick m = mreq; 68838414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 68938414Smckusick rep->r_xid = xid; 69041900Smckusick rep->r_nmp = nmp; 69138414Smckusick rep->r_vp = vp; 69241900Smckusick rep->r_procp = procp; 69343351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 69443351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 69541900Smckusick rep->r_retry = nmp->nm_retry; 69638414Smckusick else 69740117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 69840117Smckusick rep->r_flags = rep->r_rexmit = 0; 69941900Smckusick /* 70041900Smckusick * Three cases: 70141900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 70241900Smckusick * - idempotent requests on SOCK_DGRAM use 0 70341900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 70441900Smckusick * Timeouts are still done on reliable transports to ensure detection 70543351Smckusick * of excessive connection delay. 70641900Smckusick */ 70741900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 70841900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 70941900Smckusick else if (nonidempotent[procnum]) 71041900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 71141900Smckusick else 71241900Smckusick rep->r_timerinit = 0; 71341900Smckusick rep->r_timer = rep->r_timerinit; 71438414Smckusick rep->r_mrep = NULL; 71538414Smckusick len = 0; 71638414Smckusick while (m) { 71738414Smckusick len += m->m_len; 71838414Smckusick m = m->m_next; 71938414Smckusick } 72041900Smckusick mreq->m_pkthdr.len = len; 72141900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 72245281Smckusick compressed = 0; 72345281Smckusick m = mreq; 72445281Smckusick if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) { 72545281Smckusick mreq = nfs_compress(mreq); 72645281Smckusick if (mreq != m) { 72745281Smckusick len = mreq->m_pkthdr.len; 72845281Smckusick compressed++; 72945281Smckusick } 73045281Smckusick } 73141900Smckusick /* 73241900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 73341900Smckusick */ 73441900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 73541900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 73641900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 73741900Smckusick } 73841900Smckusick rep->r_mreq = mreq; 73938414Smckusick 74040117Smckusick /* 74140117Smckusick * Do the client side RPC. 74240117Smckusick */ 74340117Smckusick nfsstats.rpcrequests++; 74441900Smckusick /* 74541900Smckusick * Chain request into list of outstanding requests. Be sure 74641900Smckusick * to put it LAST so timer finds oldest requests first. 74741900Smckusick */ 74840117Smckusick s = splnet(); 74939344Smckusick reph = &nfsreqh; 75041900Smckusick reph->r_prev->r_next = rep; 75141900Smckusick rep->r_prev = reph->r_prev; 75239344Smckusick reph->r_prev = rep; 75339344Smckusick rep->r_next = reph; 75440117Smckusick /* 75540117Smckusick * If backing off another request or avoiding congestion, don't 75640117Smckusick * send this one now but let timer do it. If not timing a request, 75740117Smckusick * do it now. 75840117Smckusick */ 75941900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 76041900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 76141900Smckusick nmp->nm_sent++; 76241900Smckusick rep->r_flags |= R_SENT; 76341900Smckusick if (nmp->nm_rtt == -1) { 76441900Smckusick nmp->nm_rtt = 0; 76541900Smckusick rep->r_flags |= R_TIMING; 76641900Smckusick } 76740117Smckusick splx(s); 76841900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 76941900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77043351Smckusick nfs_solock(&nmp->nm_flag); 77141900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 77241900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77341900Smckusick nfs_sounlock(&nmp->nm_flag); 77441900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 77541900Smckusick nmp->nm_so->so_error = error = 0; 77641900Smckusick } else 77741900Smckusick splx(s); 77838414Smckusick 77938414Smckusick /* 78040117Smckusick * Wait for the reply from our send or the timer's. 78140117Smckusick */ 78241900Smckusick if (!error) 78341900Smckusick error = nfs_reply(nmp, rep); 78438414Smckusick 78540117Smckusick /* 78640117Smckusick * RPC done, unlink the request. 78740117Smckusick */ 78838414Smckusick s = splnet(); 78938414Smckusick rep->r_prev->r_next = rep->r_next; 79039344Smckusick rep->r_next->r_prev = rep->r_prev; 79138414Smckusick splx(s); 79241900Smckusick 79341900Smckusick /* 79441900Smckusick * If there was a successful reply and a tprintf msg. 79541900Smckusick * tprintf a response. 79641900Smckusick */ 79741900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 79841900Smckusick if (rep->r_procp) 79943061Smarc tprintf(rep->r_procp->p_session, 80041900Smckusick "Nfs server %s, is alive again\n", 80141900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80241900Smckusick else 80343061Smarc tprintf(NULL, "Nfs server %s, is alive again\n", 80441900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 80541900Smckusick } 80638414Smckusick m_freem(rep->r_mreq); 80745281Smckusick mrep = rep->r_mrep; 80838414Smckusick FREE((caddr_t)rep, M_NFSREQ); 80938414Smckusick if (error) 81038414Smckusick return (error); 81138414Smckusick 81245281Smckusick if (compressed) 81345281Smckusick mrep = nfs_uncompress(mrep); 81445281Smckusick md = mrep; 81538414Smckusick /* 81638414Smckusick * break down the rpc header and check if ok 81738414Smckusick */ 81838414Smckusick dpos = mtod(md, caddr_t); 81938414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82038414Smckusick p += 2; 82138414Smckusick if (*p++ == rpc_msgdenied) { 82238414Smckusick if (*p == rpc_mismatch) 82338414Smckusick error = EOPNOTSUPP; 82438414Smckusick else 82538414Smckusick error = EACCES; 82638414Smckusick m_freem(mrep); 82738414Smckusick return (error); 82838414Smckusick } 82938414Smckusick /* 83038414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83138414Smckusick * for nfs_reqhead(), but for now just dump it 83238414Smckusick */ 83338414Smckusick if (*++p != 0) { 83438414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 83538414Smckusick nfsm_adv(len); 83638414Smckusick } 83738414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 83838414Smckusick /* 0 == ok */ 83938414Smckusick if (*p == 0) { 84038414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84138414Smckusick if (*p != 0) { 84238414Smckusick error = fxdr_unsigned(int, *p); 84338414Smckusick m_freem(mrep); 84438414Smckusick return (error); 84538414Smckusick } 84638414Smckusick *mrp = mrep; 84738414Smckusick *mdp = md; 84838414Smckusick *dposp = dpos; 84938414Smckusick return (0); 85038414Smckusick } 85138414Smckusick m_freem(mrep); 85238414Smckusick return (EPROTONOSUPPORT); 85338414Smckusick nfsmout: 85438414Smckusick return (error); 85538414Smckusick } 85638414Smckusick 85738414Smckusick /* 85838414Smckusick * Get a request for the server main loop 85938414Smckusick * - receive a request via. nfs_soreceive() 86038414Smckusick * - verify it 86138414Smckusick * - fill in the cred struct. 86238414Smckusick */ 86342243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 86445281Smckusick msk, mtch, wascomp) 86538414Smckusick struct socket *so; 86638414Smckusick u_long prog; 86738414Smckusick u_long vers; 86838414Smckusick int maxproc; 86938414Smckusick struct mbuf **nam; 87038414Smckusick struct mbuf **mrp; 87138414Smckusick struct mbuf **mdp; 87238414Smckusick caddr_t *dposp; 87338414Smckusick u_long *retxid; 87442243Smckusick u_long *procnum; 87538414Smckusick register struct ucred *cr; 87641900Smckusick struct mbuf *msk, *mtch; 87745281Smckusick int *wascomp; 87838414Smckusick { 87938414Smckusick register int i; 88039494Smckusick register u_long *p; 88139494Smckusick register long t1; 88239494Smckusick caddr_t dpos, cp2; 88339494Smckusick int error = 0; 88439494Smckusick struct mbuf *mrep, *md; 88539494Smckusick int len; 88638414Smckusick 88741900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 88841900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 88941900Smckusick } else { 89041900Smckusick mrep = (struct mbuf *)0; 89141900Smckusick do { 89241900Smckusick if (mrep) { 89341900Smckusick m_freem(*nam); 89441900Smckusick m_freem(mrep); 89541900Smckusick } 89641900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89741900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 89841900Smckusick } 89941900Smckusick if (error) 90038414Smckusick return (error); 90138414Smckusick md = mrep; 90245281Smckusick mrep = nfs_uncompress(mrep); 90345281Smckusick if (mrep != md) { 90445281Smckusick *wascomp = 1; 90545281Smckusick md = mrep; 90645281Smckusick } else 90745281Smckusick *wascomp = 0; 90838414Smckusick dpos = mtod(mrep, caddr_t); 90938414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 91038414Smckusick *retxid = *p++; 91138414Smckusick if (*p++ != rpc_call) { 91238414Smckusick m_freem(mrep); 91338414Smckusick return (ERPCMISMATCH); 91438414Smckusick } 91538414Smckusick if (*p++ != rpc_vers) { 91638414Smckusick m_freem(mrep); 91738414Smckusick return (ERPCMISMATCH); 91838414Smckusick } 91938414Smckusick if (*p++ != prog) { 92038414Smckusick m_freem(mrep); 92138414Smckusick return (EPROGUNAVAIL); 92238414Smckusick } 92338414Smckusick if (*p++ != vers) { 92438414Smckusick m_freem(mrep); 92538414Smckusick return (EPROGMISMATCH); 92638414Smckusick } 92742243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 92842243Smckusick if (*procnum == NFSPROC_NULL) { 92938414Smckusick *mrp = mrep; 93038414Smckusick return (0); 93138414Smckusick } 93242243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 93338414Smckusick m_freem(mrep); 93438414Smckusick return (EPROCUNAVAIL); 93538414Smckusick } 93641900Smckusick len = fxdr_unsigned(int, *p++); 93741900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 93841900Smckusick m_freem(mrep); 93941900Smckusick return (EBADRPC); 94041900Smckusick } 94139494Smckusick len = fxdr_unsigned(int, *++p); 94241900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 94341900Smckusick m_freem(mrep); 94441900Smckusick return (EBADRPC); 94541900Smckusick } 94639494Smckusick nfsm_adv(nfsm_rndup(len)); 94738414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 94838414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 94938414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 95039494Smckusick len = fxdr_unsigned(int, *p); 95141900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 95238414Smckusick m_freem(mrep); 95338414Smckusick return (EBADRPC); 95438414Smckusick } 95539494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95639494Smckusick for (i = 1; i <= len; i++) 95741900Smckusick if (i < NGROUPS) 95841900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 95941900Smckusick else 96041900Smckusick p++; 96141900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 96238414Smckusick /* 96338414Smckusick * Do we have any use for the verifier. 96438414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96538414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96638414Smckusick * For now, just skip over it 96738414Smckusick */ 96839494Smckusick len = fxdr_unsigned(int, *++p); 96941900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 97041900Smckusick m_freem(mrep); 97141900Smckusick return (EBADRPC); 97241900Smckusick } 97339494Smckusick if (len > 0) 97439494Smckusick nfsm_adv(nfsm_rndup(len)); 97538414Smckusick *mrp = mrep; 97638414Smckusick *mdp = md; 97738414Smckusick *dposp = dpos; 97838414Smckusick return (0); 97938414Smckusick nfsmout: 98038414Smckusick return (error); 98138414Smckusick } 98238414Smckusick 98338414Smckusick /* 98438414Smckusick * Generate the rpc reply header 98538414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98638414Smckusick */ 98738414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 98838414Smckusick int siz; 98938414Smckusick u_long retxid; 99038414Smckusick int err; 99138414Smckusick struct mbuf **mrq; 99238414Smckusick struct mbuf **mbp; 99338414Smckusick caddr_t *bposp; 99438414Smckusick { 99539494Smckusick register u_long *p; 99639494Smckusick register long t1; 99739494Smckusick caddr_t bpos; 99839494Smckusick struct mbuf *mreq, *mb, *mb2; 99938414Smckusick 100038414Smckusick NFSMGETHDR(mreq); 100138414Smckusick mb = mreq; 100238414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 100341900Smckusick MCLGET(mreq, M_WAIT); 100438414Smckusick p = mtod(mreq, u_long *); 100538414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100638414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 100738414Smckusick *p++ = retxid; 100838414Smckusick *p++ = rpc_reply; 100938414Smckusick if (err == ERPCMISMATCH) { 101038414Smckusick *p++ = rpc_msgdenied; 101138414Smckusick *p++ = rpc_mismatch; 101238414Smckusick *p++ = txdr_unsigned(2); 101338414Smckusick *p = txdr_unsigned(2); 101438414Smckusick } else { 101538414Smckusick *p++ = rpc_msgaccepted; 101638414Smckusick *p++ = 0; 101738414Smckusick *p++ = 0; 101838414Smckusick switch (err) { 101938414Smckusick case EPROGUNAVAIL: 102038414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 102138414Smckusick break; 102238414Smckusick case EPROGMISMATCH: 102338414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102438414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102538414Smckusick *p++ = txdr_unsigned(2); 102638414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 102738414Smckusick break; 102838414Smckusick case EPROCUNAVAIL: 102938414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 103038414Smckusick break; 103138414Smckusick default: 103238414Smckusick *p = 0; 103338414Smckusick if (err != VNOVAL) { 103438414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103538414Smckusick *p = txdr_unsigned(err); 103638414Smckusick } 103738414Smckusick break; 103838414Smckusick }; 103938414Smckusick } 104038414Smckusick *mrq = mreq; 104138414Smckusick *mbp = mb; 104238414Smckusick *bposp = bpos; 104338414Smckusick if (err != 0 && err != VNOVAL) 104438414Smckusick nfsstats.srvrpc_errs++; 104538414Smckusick return (0); 104638414Smckusick } 104738414Smckusick 104838414Smckusick /* 104938414Smckusick * Nfs timer routine 105038414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 105138414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 105240117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 105338414Smckusick */ 105438414Smckusick nfs_timer() 105538414Smckusick { 105638414Smckusick register struct nfsreq *rep; 105738414Smckusick register struct mbuf *m; 105838414Smckusick register struct socket *so; 105941900Smckusick register struct nfsmount *nmp; 106040117Smckusick int s, error; 106138414Smckusick 106238414Smckusick s = splnet(); 106341900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 106441900Smckusick nmp = rep->r_nmp; 106541900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 106641900Smckusick (so = nmp->nm_so) == NULL) 106741900Smckusick continue; 106841900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 106941900Smckusick rep->r_flags |= R_SOFTTERM; 107041900Smckusick continue; 107141900Smckusick } 107240117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 107341900Smckusick nmp->nm_rtt++; 107441900Smckusick /* If not timed out */ 107541900Smckusick if (++rep->r_timer < nmp->nm_rto) 107641900Smckusick continue; 107740117Smckusick /* Do backoff and save new timeout in mount */ 107840117Smckusick if (rep->r_flags & R_TIMING) { 107941900Smckusick nfs_backofftimer(nmp); 108040117Smckusick rep->r_flags &= ~R_TIMING; 108141900Smckusick nmp->nm_rtt = -1; 108240117Smckusick } 108340117Smckusick if (rep->r_flags & R_SENT) { 108440117Smckusick rep->r_flags &= ~R_SENT; 108541900Smckusick nmp->nm_sent--; 108640117Smckusick } 108741900Smckusick 108841900Smckusick /* 108941900Smckusick * Check for too many retries on soft mount. 109041900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 109141900Smckusick */ 109241900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 109340117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 109440117Smckusick 109541900Smckusick /* 109641900Smckusick * Check for server not responding 109741900Smckusick */ 109841900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 109943351Smckusick rep->r_rexmit > NFS_FISHY) { 110041900Smckusick if (rep->r_procp && rep->r_procp->p_session) 110143061Smarc tprintf(rep->r_procp->p_session, 110241900Smckusick "Nfs server %s, not responding\n", 110341900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110441900Smckusick else 110543061Smarc tprintf(NULL, 110641900Smckusick "Nfs server %s, not responding\n", 110741900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110841900Smckusick rep->r_flags |= R_TPRINTFMSG; 110941900Smckusick } 111043351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 111141900Smckusick nfsstats.rpctimeouts++; 111241900Smckusick rep->r_flags |= R_SOFTTERM; 111341900Smckusick continue; 111441900Smckusick } 111543351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 111643351Smckusick continue; 111741900Smckusick 111841900Smckusick /* 111941900Smckusick * If there is enough space and the window allows.. 112041900Smckusick * Resend it 112141900Smckusick */ 112241900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 112341900Smckusick nmp->nm_sent < nmp->nm_window && 112441900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 112541900Smckusick nfsstats.rpcretries++; 112641900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 112741900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 112841900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 112941900Smckusick else 113041900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113141900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 113241900Smckusick if (error) { 113341900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 113441900Smckusick so->so_error = 0; 113541900Smckusick } else { 113641900Smckusick /* 113741900Smckusick * We need to time the request even though we 113841900Smckusick * are retransmitting. 113941900Smckusick */ 114041900Smckusick nmp->nm_rtt = 0; 114141900Smckusick nmp->nm_sent++; 114241900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 114341900Smckusick rep->r_timer = rep->r_timerinit; 114441900Smckusick } 114541900Smckusick } 114640117Smckusick } 114740117Smckusick splx(s); 114840117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 114940117Smckusick } 115040117Smckusick 115140117Smckusick /* 115240117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 115340117Smckusick * used here. The timer state is held in the nfsmount structure and 115440117Smckusick * a single request is used to clock the response. When successful 115540117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 115640117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 115740117Smckusick * routines. 115840117Smckusick * 115940117Smckusick * Congestion variables are held in the nfshost structure which 116040117Smckusick * is referenced by nfsmounts and shared per-server. This separation 116140117Smckusick * makes it possible to do per-mount timing which allows varying disk 116240117Smckusick * access times to be dealt with, while preserving a network oriented 116340117Smckusick * congestion control scheme. 116440117Smckusick * 116540117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 116640117Smckusick * with adjusted scaling factors. We start with one request, then send 116740117Smckusick * 4 more after each success until the ssthresh limit is reached, then 116840117Smckusick * we increment at a rate proportional to the window. On failure, we 116940117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117040117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 117140117Smckusick * for now. 117240117Smckusick * 117340117Smckusick * NFS behaves much more like a transport protocol with these changes, 117440117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 117540117Smckusick * implementations. 117640117Smckusick * 117740117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 117840117Smckusick */ 117940117Smckusick 118040117Smckusick /* 118140117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 118240117Smckusick * responds only after performing lookups/diskio/etc, we have to be 118340117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 118441900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 118540117Smckusick */ 118641900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 118740117Smckusick 118841900Smckusick nfs_updatetimer(nmp) 118941900Smckusick register struct nfsmount *nmp; 119040117Smckusick { 119140117Smckusick 119240117Smckusick /* If retransmitted, clear and return */ 119341900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 119441900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 119540117Smckusick return; 119640117Smckusick } 119740117Smckusick /* If have a measurement, do smoothing */ 119841900Smckusick if (nmp->nm_srtt) { 119940117Smckusick register short delta; 120041900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 120141900Smckusick if ((nmp->nm_srtt += delta) <= 0) 120241900Smckusick nmp->nm_srtt = 1; 120340117Smckusick if (delta < 0) 120440117Smckusick delta = -delta; 120541900Smckusick delta -= (nmp->nm_rttvar >> 2); 120641900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 120741900Smckusick nmp->nm_rttvar = 1; 120840117Smckusick /* Else initialize */ 120940117Smckusick } else { 121041900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 121141900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 121241900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 121340117Smckusick } 121440117Smckusick /* Compute new Retransmission TimeOut and clip */ 121541900Smckusick nmp->nm_rto = NFS_RTO(nmp); 121641900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 121741900Smckusick nmp->nm_rto = NFS_MINTIMEO; 121841900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 121941900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122040117Smckusick 122140117Smckusick /* Update window estimate */ 122241900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 122341900Smckusick nmp->nm_window += 4; 122440117Smckusick else { /* slowly */ 122541900Smckusick register long incr = ++nmp->nm_winext; 122641900Smckusick incr = (incr * incr) / nmp->nm_window; 122740117Smckusick if (incr > 0) { 122841900Smckusick nmp->nm_winext = 0; 122941900Smckusick ++nmp->nm_window; 123040117Smckusick } 123140117Smckusick } 123241900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 123341900Smckusick nmp->nm_window = NFS_MAXWINDOW; 123440117Smckusick } 123540117Smckusick 123641900Smckusick nfs_backofftimer(nmp) 123741900Smckusick register struct nfsmount *nmp; 123840117Smckusick { 123940117Smckusick register unsigned long newrto; 124040117Smckusick 124140117Smckusick /* Clip shift count */ 124241900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 124341900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 124440117Smckusick /* Back off RTO exponentially */ 124541900Smckusick newrto = NFS_RTO(nmp); 124641900Smckusick newrto <<= (nmp->nm_rexmit - 1); 124740117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 124840117Smckusick newrto = NFS_MAXTIMEO; 124941900Smckusick nmp->nm_rto = newrto; 125040117Smckusick 125140117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 125241900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 125341900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 125441900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 125541900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 125641900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 125741900Smckusick nmp->nm_srtt = 0; 125838414Smckusick } 125938414Smckusick } 126038414Smckusick } 126140117Smckusick /* Close down window but remember this point (3/4 current) for later */ 126241900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 126341900Smckusick nmp->nm_window = 1; 126441900Smckusick nmp->nm_winext = 0; 126538414Smckusick } 126638414Smckusick 126738414Smckusick /* 126841900Smckusick * Test for a termination signal pending on procp. 126941900Smckusick * This is used for NFSMNT_INT mounts. 127038414Smckusick */ 127141900Smckusick nfs_sigintr(p) 127241900Smckusick register struct proc *p; 127341900Smckusick { 127441900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 127541900Smckusick NFSINT_SIGMASK)) 127641900Smckusick return (1); 127741900Smckusick else 127841900Smckusick return (0); 127941900Smckusick } 128040117Smckusick 128141900Smckusick /* 128241900Smckusick * Lock a socket against others. 128341900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 128441900Smckusick * and also to avoid race conditions between the processes with nfs requests 128541900Smckusick * in progress when a reconnect is necessary. 128641900Smckusick */ 128743351Smckusick nfs_solock(flagp) 128843351Smckusick register int *flagp; 128938414Smckusick { 129040117Smckusick 129141900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 129241900Smckusick *flagp |= NFSMNT_WANTSCK; 129343351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 129440117Smckusick } 129541900Smckusick *flagp |= NFSMNT_SCKLOCK; 129641900Smckusick } 129740117Smckusick 129841900Smckusick /* 129941900Smckusick * Unlock the stream socket for others. 130041900Smckusick */ 130141900Smckusick nfs_sounlock(flagp) 130243351Smckusick register int *flagp; 130341900Smckusick { 130441900Smckusick 130541900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 130641900Smckusick panic("nfs sounlock"); 130741900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 130841900Smckusick if (*flagp & NFSMNT_WANTSCK) { 130941900Smckusick *flagp &= ~NFSMNT_WANTSCK; 131041900Smckusick wakeup((caddr_t)flagp); 131140117Smckusick } 131238414Smckusick } 131341900Smckusick 131441900Smckusick /* 131541900Smckusick * This function compares two net addresses by family and returns TRUE 131641900Smckusick * if they are the same. 131741900Smckusick * If there is any doubt, return FALSE. 131841900Smckusick */ 131941900Smckusick nfs_netaddr_match(nam1, nam2) 132041900Smckusick struct mbuf *nam1, *nam2; 132141900Smckusick { 132241900Smckusick register struct sockaddr *saddr1, *saddr2; 132341900Smckusick 132441900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 132541900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 132641900Smckusick if (saddr1->sa_family != saddr2->sa_family) 132741900Smckusick return (0); 132841900Smckusick 132941900Smckusick /* 133041900Smckusick * Must do each address family separately since unused fields 133141900Smckusick * are undefined values and not always zeroed. 133241900Smckusick */ 133341900Smckusick switch (saddr1->sa_family) { 133441900Smckusick case AF_INET: 133541900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 133641900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 133741900Smckusick return (1); 133841900Smckusick break; 133941900Smckusick default: 134041900Smckusick break; 134141900Smckusick }; 134241900Smckusick return (0); 134341900Smckusick } 134441900Smckusick 134541900Smckusick /* 134641900Smckusick * Check the hostname fields for nfsd's mask and match fields. 134741900Smckusick * By address family: 134841900Smckusick * - Bitwise AND the mask with the host address field 134941900Smckusick * - Compare for == with match 135041900Smckusick * return TRUE if not equal 135141900Smckusick */ 135241900Smckusick nfs_badnam(nam, msk, mtch) 135341900Smckusick register struct mbuf *nam, *msk, *mtch; 135441900Smckusick { 135541900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 135641900Smckusick case AF_INET: 135741900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 135841900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 135941900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 136041900Smckusick default: 136141900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 136241900Smckusick return (0); 136341900Smckusick }; 136441900Smckusick } 1365