138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 838414Smckusick * Redistribution and use in source and binary forms are permitted 938414Smckusick * provided that the above copyright notice and this paragraph are 1038414Smckusick * duplicated in all such forms and that any documentation, 1138414Smckusick * advertising materials, and other materials related to such 1238414Smckusick * distribution and use acknowledge that the software was developed 1338414Smckusick * by the University of California, Berkeley. The name of the 1438414Smckusick * University may not be used to endorse or promote products derived 1538414Smckusick * from this software without specific prior written permission. 1638414Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1738414Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1838414Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1938414Smckusick * 20*43351Smckusick * @(#)nfs_socket.c 7.17 (Berkeley) 06/21/90 2138414Smckusick */ 2238414Smckusick 2338414Smckusick /* 2441900Smckusick * Socket operations for use by nfs 2538414Smckusick */ 2638414Smckusick 2738414Smckusick #include "types.h" 2838414Smckusick #include "param.h" 2938414Smckusick #include "uio.h" 3038414Smckusick #include "user.h" 3140117Smckusick #include "proc.h" 3240117Smckusick #include "signal.h" 3338414Smckusick #include "mount.h" 3438414Smckusick #include "kernel.h" 3538414Smckusick #include "malloc.h" 3638414Smckusick #include "mbuf.h" 3738414Smckusick #include "vnode.h" 3838414Smckusick #include "domain.h" 3938414Smckusick #include "protosw.h" 4038414Smckusick #include "socket.h" 4138414Smckusick #include "socketvar.h" 4242877Smckusick #include "../netinet/in.h" 4342877Smckusick #include "../netinet/tcp.h" 4438414Smckusick #include "rpcv2.h" 4538414Smckusick #include "nfsv2.h" 4638414Smckusick #include "nfs.h" 4738414Smckusick #include "xdr_subs.h" 4838414Smckusick #include "nfsm_subs.h" 4938414Smckusick #include "nfsmount.h" 5038414Smckusick 5140117Smckusick #include "syslog.h" 5240117Smckusick 5338414Smckusick #define TRUE 1 54*43351Smckusick #define FALSE 0 5538414Smckusick 5640117Smckusick /* 5738414Smckusick * External data, mostly RPC constants in XDR form 5838414Smckusick */ 5938414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 6038414Smckusick rpc_msgaccepted, rpc_call; 6138414Smckusick extern u_long nfs_prog, nfs_vers; 62*43351Smckusick /* Maybe these should be bits in a u_long ?? */ 6341900Smckusick extern int nonidempotent[NFS_NPROCS]; 6441900Smckusick int nfs_sbwait(); 6541900Smckusick void nfs_disconnect(); 6641900Smckusick 6738414Smckusick int nfsrv_null(), 6838414Smckusick nfsrv_getattr(), 6938414Smckusick nfsrv_setattr(), 7038414Smckusick nfsrv_lookup(), 7138414Smckusick nfsrv_readlink(), 7238414Smckusick nfsrv_read(), 7338414Smckusick nfsrv_write(), 7438414Smckusick nfsrv_create(), 7538414Smckusick nfsrv_remove(), 7638414Smckusick nfsrv_rename(), 7738414Smckusick nfsrv_link(), 7838414Smckusick nfsrv_symlink(), 7938414Smckusick nfsrv_mkdir(), 8038414Smckusick nfsrv_rmdir(), 8138414Smckusick nfsrv_readdir(), 8238414Smckusick nfsrv_statfs(), 8338414Smckusick nfsrv_noop(); 8438414Smckusick 8538414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 8638414Smckusick nfsrv_null, 8738414Smckusick nfsrv_getattr, 8838414Smckusick nfsrv_setattr, 8938414Smckusick nfsrv_noop, 9038414Smckusick nfsrv_lookup, 9138414Smckusick nfsrv_readlink, 9238414Smckusick nfsrv_read, 9338414Smckusick nfsrv_noop, 9438414Smckusick nfsrv_write, 9538414Smckusick nfsrv_create, 9638414Smckusick nfsrv_remove, 9738414Smckusick nfsrv_rename, 9838414Smckusick nfsrv_link, 9938414Smckusick nfsrv_symlink, 10038414Smckusick nfsrv_mkdir, 10138414Smckusick nfsrv_rmdir, 10238414Smckusick nfsrv_readdir, 10338414Smckusick nfsrv_statfs, 10438414Smckusick }; 10538414Smckusick 10640117Smckusick struct nfsreq nfsreqh; 10740117Smckusick int nfsrexmtthresh = NFS_FISHY; 10841900Smckusick int nfs_tcpnodelay = 1; 10938414Smckusick 11038414Smckusick /* 11141900Smckusick * Initialize sockets and congestion for a new NFS connection. 11240117Smckusick * We do not free the sockaddr if error. 11338414Smckusick */ 11441900Smckusick nfs_connect(nmp) 11540117Smckusick register struct nfsmount *nmp; 11640117Smckusick { 11741900Smckusick register struct socket *so; 11841900Smckusick int s, error; 11940117Smckusick struct mbuf *m; 12040117Smckusick 12141900Smckusick nmp->nm_so = (struct socket *)0; 12241900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 12341900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 12440117Smckusick goto bad; 12541900Smckusick so = nmp->nm_so; 12641900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 12740117Smckusick 12841900Smckusick /* 12941900Smckusick * Protocols that do not require connections may be optionally left 13041900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 13141900Smckusick */ 13241900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 13341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 13441900Smckusick error = ENOTCONN; 13540117Smckusick goto bad; 13640117Smckusick } 13741900Smckusick } else { 13841900Smckusick if (error = soconnect(so, nmp->nm_nam)) 13940117Smckusick goto bad; 14041900Smckusick 14141900Smckusick /* 14241900Smckusick * Wait for the connection to complete. Cribbed from the 14341900Smckusick * connect system call but with the wait at negative prio. 14441900Smckusick */ 14541900Smckusick s = splnet(); 14641900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 147*43351Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0); 14841900Smckusick splx(s); 14941900Smckusick if (so->so_error) { 15041900Smckusick error = so->so_error; 15141900Smckusick goto bad; 15241900Smckusick } 15340117Smckusick } 15441900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 155*43351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 15641900Smckusick so->so_rcv.sb_timeo = (5 * hz); 15741900Smckusick so->so_snd.sb_timeo = (5 * hz); 15841900Smckusick } else { 15941900Smckusick so->so_rcv.sb_timeo = 0; 16041900Smckusick so->so_snd.sb_timeo = 0; 16141900Smckusick } 16241900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 163*43351Smckusick nmp->nm_rsize + NFS_MAXPKTHDR)) 16441900Smckusick goto bad; 16541900Smckusick } else { 166*43351Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) { 16741900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16841900Smckusick so->so_snd.sb_timeo = (5 * hz); 16941900Smckusick } else { 17041900Smckusick so->so_rcv.sb_timeo = 0; 17141900Smckusick so->so_snd.sb_timeo = 0; 17241900Smckusick } 17341900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 17441900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 17541900Smckusick *mtod(m, int *) = 1; 17641900Smckusick m->m_len = sizeof(int); 17741900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 17841900Smckusick } 17941900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 18041900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 18141900Smckusick nfs_tcpnodelay) { 18241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18341900Smckusick *mtod(m, int *) = 1; 18441900Smckusick m->m_len = sizeof(int); 18541900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 18641900Smckusick } 18741900Smckusick if (error = soreserve(so, 188*43351Smckusick nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long), 18941900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 19041900Smckusick goto bad; 19141900Smckusick } 19241900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 19341900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 19440117Smckusick 19541900Smckusick /* Initialize other non-zero congestion variables */ 19641900Smckusick nmp->nm_rto = NFS_TIMEO; 19741900Smckusick nmp->nm_window = 2; /* Initial send window */ 19841900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 19941900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 20041900Smckusick nmp->nm_sent = 0; 20141900Smckusick nmp->nm_currexmit = 0; 20241900Smckusick return (0); 20340117Smckusick 20441900Smckusick bad: 20541900Smckusick nfs_disconnect(nmp); 20641900Smckusick return (error); 20741900Smckusick } 20840117Smckusick 20941900Smckusick /* 21041900Smckusick * Reconnect routine: 21141900Smckusick * Called when a connection is broken on a reliable protocol. 21241900Smckusick * - clean up the old socket 21341900Smckusick * - nfs_connect() again 21441900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 21541900Smckusick * If this fails the mount point is DEAD! 21641900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 21741900Smckusick */ 21841900Smckusick nfs_reconnect(rep, nmp) 21941900Smckusick register struct nfsreq *rep; 22041900Smckusick register struct nfsmount *nmp; 22141900Smckusick { 22241900Smckusick register struct nfsreq *rp; 22341900Smckusick int error; 22440117Smckusick 22541900Smckusick if (rep->r_procp) 22643061Smarc tprintf(rep->r_procp->p_session, 22741900Smckusick "Nfs server %s, trying reconnect\n", 22841900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 22941900Smckusick else 23043061Smarc tprintf(NULL, "Nfs server %s, trying a reconnect\n", 23141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23241900Smckusick while (error = nfs_connect(nmp)) { 23342243Smckusick #ifdef lint 23442243Smckusick error = error; 23542243Smckusick #endif /* lint */ 23641900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 23741900Smckusick return (EINTR); 238*43351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 23940117Smckusick } 24041900Smckusick if (rep->r_procp) 24143061Smarc tprintf(rep->r_procp->p_session, 24241900Smckusick "Nfs server %s, reconnected\n", 24341900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24441900Smckusick else 24543061Smarc tprintf(NULL, "Nfs server %s, reconnected\n", 24641900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24741900Smckusick 24841900Smckusick /* 24941900Smckusick * Loop through outstanding request list and fix up all requests 25041900Smckusick * on old socket. 25141900Smckusick */ 25241900Smckusick rp = nfsreqh.r_next; 25341900Smckusick while (rp != &nfsreqh) { 25441900Smckusick if (rp->r_nmp == nmp) 25541900Smckusick rp->r_flags |= R_MUSTRESEND; 25641900Smckusick rp = rp->r_next; 25740117Smckusick } 25840117Smckusick return (0); 25940117Smckusick } 26040117Smckusick 26140117Smckusick /* 26240117Smckusick * NFS disconnect. Clean up and unlink. 26340117Smckusick */ 26441900Smckusick void 26540117Smckusick nfs_disconnect(nmp) 26640117Smckusick register struct nfsmount *nmp; 26740117Smckusick { 26841900Smckusick register struct socket *so; 26940117Smckusick 27041900Smckusick if (nmp->nm_so) { 27141900Smckusick so = nmp->nm_so; 27241900Smckusick nmp->nm_so = (struct socket *)0; 27341900Smckusick soshutdown(so, 2); 27441900Smckusick soclose(so); 27540117Smckusick } 27640117Smckusick } 27740117Smckusick 27840117Smckusick /* 27941900Smckusick * This is the nfs send routine. For connection based socket types, it 28041900Smckusick * must be called with an nfs_solock() on the socket. 28141900Smckusick * "rep == NULL" indicates that it has been called from a server. 28240117Smckusick */ 28341900Smckusick nfs_send(so, nam, top, rep) 28438414Smckusick register struct socket *so; 28538414Smckusick struct mbuf *nam; 28641900Smckusick register struct mbuf *top; 28741900Smckusick struct nfsreq *rep; 28838414Smckusick { 28941900Smckusick struct mbuf *sendnam; 29041900Smckusick int error, soflags; 29138414Smckusick 29241900Smckusick if (rep) { 29341900Smckusick if (rep->r_flags & R_SOFTTERM) { 29440117Smckusick m_freem(top); 29541900Smckusick return (EINTR); 29640117Smckusick } 29743062Smckusick if (rep->r_nmp->nm_so == NULL && 29841900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 29941900Smckusick return (error); 30041900Smckusick rep->r_flags &= ~R_MUSTRESEND; 30143062Smckusick so = rep->r_nmp->nm_so; 30241900Smckusick soflags = rep->r_nmp->nm_soflags; 30341900Smckusick } else 30441900Smckusick soflags = so->so_proto->pr_flags; 30541900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 30641900Smckusick sendnam = (struct mbuf *)0; 30741900Smckusick else 30841900Smckusick sendnam = nam; 30941900Smckusick 31041900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 31141900Smckusick (struct mbuf *)0, 0); 31241900Smckusick if (error == EWOULDBLOCK && rep) { 31341900Smckusick if (rep->r_flags & R_SOFTTERM) 31441900Smckusick error = EINTR; 31541900Smckusick else { 31641900Smckusick rep->r_flags |= R_MUSTRESEND; 31741900Smckusick error = 0; 31840117Smckusick } 31938414Smckusick } 32041900Smckusick /* 32141900Smckusick * Ignore socket errors?? 32241900Smckusick */ 32341900Smckusick if (error && error != EINTR && error != ERESTART) 32441900Smckusick error = 0; 32538414Smckusick return (error); 32638414Smckusick } 32738414Smckusick 32838414Smckusick /* 32941900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 33041900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 33141900Smckusick * Mark and consolidate the data into a new mbuf list. 33241900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 33341900Smckusick * small mbufs. 33441900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 33541900Smckusick * we have read any of it, even if the system call has been interrupted. 33638414Smckusick */ 33741900Smckusick nfs_receive(so, aname, mp, rep) 33838414Smckusick register struct socket *so; 33938414Smckusick struct mbuf **aname; 34038414Smckusick struct mbuf **mp; 34141900Smckusick register struct nfsreq *rep; 34238414Smckusick { 34341900Smckusick struct uio auio; 34441900Smckusick struct iovec aio; 34538414Smckusick register struct mbuf *m; 34641900Smckusick struct mbuf *m2, *m3, *mnew, **mbp; 34741900Smckusick caddr_t fcp, tcp; 34841900Smckusick u_long len; 34941900Smckusick struct mbuf **getnam; 35041900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 35138414Smckusick 35241900Smckusick /* 35341900Smckusick * Set up arguments for soreceive() 35441900Smckusick */ 35541900Smckusick *mp = (struct mbuf *)0; 35641900Smckusick *aname = (struct mbuf *)0; 35741900Smckusick if (rep) 35841900Smckusick soflags = rep->r_nmp->nm_soflags; 35941900Smckusick else 36041900Smckusick soflags = so->so_proto->pr_flags; 36138414Smckusick 36241900Smckusick /* 36341900Smckusick * For reliable protocols, lock against other senders/receivers 36441900Smckusick * in case a reconnect is necessary. 36541900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 36641900Smckusick * more there is to get. 36741900Smckusick * We must lock the socket against other receivers 36841900Smckusick * until we have an entire rpc request/reply. 36941900Smckusick */ 37041900Smckusick if (soflags & PR_CONNREQUIRED) { 37141900Smckusick tryagain: 37241900Smckusick /* 37341900Smckusick * Check for fatal errors and resending request. 37441900Smckusick */ 37541900Smckusick if (rep) { 37641900Smckusick /* 37741900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 37841900Smckusick * would have changed. NULL indicates a failed 37941900Smckusick * attempt that has essentially shut down this 38041900Smckusick * mount point. 38141900Smckusick */ 38241900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 38341900Smckusick (rep->r_flags & R_SOFTTERM)) 38441900Smckusick return (EINTR); 38541900Smckusick while (rep->r_flags & R_MUSTRESEND) { 38641900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 38741900Smckusick nfsstats.rpcretries++; 38841900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 38941900Smckusick rep)) 39041900Smckusick goto errout; 39140117Smckusick } 39241900Smckusick } 39341900Smckusick if ((soflags & PR_ATOMIC) == 0) { 39441900Smckusick aio.iov_base = (caddr_t) &len; 39541900Smckusick aio.iov_len = sizeof(u_long); 39641900Smckusick auio.uio_iov = &aio; 39741900Smckusick auio.uio_iovcnt = 1; 39841900Smckusick auio.uio_segflg = UIO_SYSSPACE; 39941900Smckusick auio.uio_rw = UIO_READ; 40041900Smckusick auio.uio_offset = 0; 40141900Smckusick auio.uio_resid = sizeof(u_long); 40241900Smckusick do { 40341900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 40441900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 40541900Smckusick if (error == EWOULDBLOCK && rep) { 40641900Smckusick if (rep->r_flags & R_SOFTTERM) 40741900Smckusick return (EINTR); 40841900Smckusick if (rep->r_flags & R_MUSTRESEND) 40941900Smckusick goto tryagain; 41041900Smckusick } 41141900Smckusick } while (error == EWOULDBLOCK); 41241900Smckusick if (!error && auio.uio_resid > 0) 41341900Smckusick error = EPIPE; 41440761Skarels if (error) 41541900Smckusick goto errout; 41641900Smckusick len = ntohl(len) & ~0x80000000; 41741900Smckusick /* 41841900Smckusick * This is SERIOUS! We are out of sync with the sender 41941900Smckusick * and forcing a disconnect/reconnect is all I can do. 42041900Smckusick */ 42141900Smckusick if (len > NFS_MAXPACKET) { 42241900Smckusick error = EFBIG; 42341900Smckusick goto errout; 42441900Smckusick } 42541900Smckusick auio.uio_resid = len; 42641900Smckusick do { 42741900Smckusick error = soreceive(so, (struct mbuf **)0, 42841900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 42941900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 43041900Smckusick error == ERESTART); 43141900Smckusick if (!error && auio.uio_resid > 0) 43241900Smckusick error = EPIPE; 43340117Smckusick } else { 43441900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 43541900Smckusick do { 43641900Smckusick error = soreceive(so, (struct mbuf **)0, 43741900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 43841900Smckusick if (error == EWOULDBLOCK && rep) { 43941900Smckusick if (rep->r_flags & R_SOFTTERM) 44041900Smckusick return (EINTR); 44141900Smckusick if (rep->r_flags & R_MUSTRESEND) 44241900Smckusick goto tryagain; 44341900Smckusick } 44441900Smckusick } while (error == EWOULDBLOCK); 44541900Smckusick if (!error && *mp == NULL) 44641900Smckusick error = EPIPE; 44741900Smckusick len -= auio.uio_resid; 44840117Smckusick } 44941900Smckusick errout: 45041900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 45141900Smckusick m_freem(*mp); 45241900Smckusick *mp = (struct mbuf *)0; 45341900Smckusick nfs_disconnect(rep->r_nmp); 45441900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 45541900Smckusick if (!error) 45641900Smckusick goto tryagain; 45740117Smckusick } 45841900Smckusick } else { 45941900Smckusick if (so->so_state & SS_ISCONNECTED) 46041900Smckusick getnam = (struct mbuf **)0; 46141900Smckusick else 46241900Smckusick getnam = aname; 46341900Smckusick auio.uio_resid = len = 1000000; 46441900Smckusick do { 46541900Smckusick error = soreceive(so, getnam, &auio, mp, 46641900Smckusick (struct mbuf **)0, &rcvflg); 46741900Smckusick if (error == EWOULDBLOCK && rep && 46841900Smckusick (rep->r_flags & R_SOFTTERM)) 46941900Smckusick return (EINTR); 47041900Smckusick } while (error == EWOULDBLOCK); 47141900Smckusick len -= auio.uio_resid; 47241900Smckusick } 47341900Smckusick if (error) { 47441900Smckusick m_freem(*mp); 47541900Smckusick *mp = (struct mbuf *)0; 47641900Smckusick } 47741900Smckusick /* 47841900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 47941900Smckusick * These could cause pointer alignment problems, so copy them to 48041900Smckusick * well aligned mbufs. 48141900Smckusick */ 48241900Smckusick m = *mp; 48341900Smckusick mbp = mp; 48441900Smckusick while (m) { 48541900Smckusick /* 48641900Smckusick * All this for something that may never happen. 48741900Smckusick */ 48841900Smckusick if (m->m_len & 0x3) { 48941900Smckusick printf("nfs_rcv odd length!\n"); 49041900Smckusick fcp = mtod(m, caddr_t); 49141900Smckusick mnew = m2 = (struct mbuf *)0; 49242243Smckusick #ifdef lint 49342243Smckusick m3 = (struct mbuf *)0; 49442243Smckusick mlen = 0; 49542243Smckusick #endif /* lint */ 49641900Smckusick while (m) { 49741900Smckusick if (m2 == NULL || mlen == 0) { 49841900Smckusick MGET(m2, M_WAIT, MT_DATA); 49941900Smckusick if (len > MINCLSIZE) 50041900Smckusick MCLGET(m2, M_WAIT); 50141900Smckusick m2->m_len = 0; 50241900Smckusick mlen = M_TRAILINGSPACE(m2); 50341900Smckusick tcp = mtod(m2, caddr_t); 50441900Smckusick if (mnew) { 50541900Smckusick m3->m_next = m2; 50641900Smckusick m3 = m2; 50741900Smckusick } else 50841900Smckusick mnew = m3 = m2; 50941900Smckusick } 51041900Smckusick siz = (mlen > m->m_len) ? m->m_len : mlen; 51141900Smckusick bcopy(fcp, tcp, siz); 51241900Smckusick m2->m_len += siz; 51341900Smckusick mlen -= siz; 51441900Smckusick len -= siz; 51541900Smckusick tcp += siz; 51641900Smckusick m->m_len -= siz; 51741900Smckusick fcp += siz; 51841900Smckusick if (m->m_len == 0) { 51941900Smckusick do { 52041900Smckusick m = m->m_next; 52141900Smckusick } while (m && m->m_len == 0); 52241900Smckusick if (m) 52341900Smckusick fcp = mtod(m, caddr_t); 52441900Smckusick } 52541900Smckusick } 52641900Smckusick m = *mbp; 52741900Smckusick *mbp = mnew; 52841900Smckusick m_freem(m); 52941900Smckusick break; 53040117Smckusick } 53141900Smckusick len -= m->m_len; 53241900Smckusick mbp = &m->m_next; 53341900Smckusick m = m->m_next; 53438414Smckusick } 53538414Smckusick return (error); 53638414Smckusick } 53738414Smckusick 53838414Smckusick struct rpc_replyhead { 53938414Smckusick u_long r_xid; 54038414Smckusick u_long r_rep; 54138414Smckusick }; 54238414Smckusick 54338414Smckusick /* 54441900Smckusick * Implement receipt of reply on a socket. 54538414Smckusick * We must search through the list of received datagrams matching them 54638414Smckusick * with outstanding requests using the xid, until ours is found. 54738414Smckusick */ 54841900Smckusick /* ARGSUSED */ 54941900Smckusick nfs_reply(nmp, myrep) 55041900Smckusick struct nfsmount *nmp; 55139344Smckusick struct nfsreq *myrep; 55238414Smckusick { 55338414Smckusick register struct mbuf *m; 55438414Smckusick register struct nfsreq *rep; 55541900Smckusick register int error = 0; 55638414Smckusick struct rpc_replyhead replyh; 55741900Smckusick struct mbuf *mp, *nam; 55841900Smckusick char *cp; 55941900Smckusick int cnt, xfer; 56038414Smckusick 56141900Smckusick /* 56241900Smckusick * Loop around until we get our own reply 56341900Smckusick */ 56441900Smckusick for (;;) { 56541900Smckusick /* 56641900Smckusick * Lock against other receivers so that I don't get stuck in 56741900Smckusick * sbwait() after someone else has received my reply for me. 56841900Smckusick * Also necessary for connection based protocols to avoid 56941900Smckusick * race conditions during a reconnect. 57041900Smckusick */ 571*43351Smckusick nfs_solock(&nmp->nm_flag); 57241900Smckusick /* Already received, bye bye */ 57341900Smckusick if (myrep->r_mrep != NULL) { 57441900Smckusick nfs_sounlock(&nmp->nm_flag); 57541900Smckusick return (0); 57640117Smckusick } 57741900Smckusick /* 57841900Smckusick * Get the next Rpc reply off the socket 57941900Smckusick */ 58041900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 58141900Smckusick nfs_sounlock(&nmp->nm_flag); 58238414Smckusick 58341900Smckusick /* 58441900Smckusick * Ignore routing errors on connectionless protocols?? 58541900Smckusick */ 58641900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 58741900Smckusick nmp->nm_so->so_error = 0; 58841900Smckusick continue; 58941900Smckusick } 59041900Smckusick 59141900Smckusick /* 59241900Smckusick * Otherwise cleanup and return a fatal error. 59341900Smckusick */ 59441900Smckusick if (myrep->r_flags & R_TIMING) { 59541900Smckusick myrep->r_flags &= ~R_TIMING; 59641900Smckusick nmp->nm_rtt = -1; 59741900Smckusick } 59841900Smckusick if (myrep->r_flags & R_SENT) { 59941900Smckusick myrep->r_flags &= ~R_SENT; 60041900Smckusick nmp->nm_sent--; 60141900Smckusick } 60241900Smckusick return (error); 60338414Smckusick } 60441900Smckusick 60541900Smckusick /* 60641900Smckusick * Get the xid and check that it is an rpc reply 60741900Smckusick */ 60841900Smckusick m = mp; 60941900Smckusick if (m->m_len >= 2*NFSX_UNSIGNED) 61041900Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 61141900Smckusick 2*NFSX_UNSIGNED); 61241900Smckusick else { 61341900Smckusick cnt = 2*NFSX_UNSIGNED; 61441900Smckusick cp = (caddr_t)&replyh; 61541900Smckusick while (m && cnt > 0) { 61641900Smckusick if (m->m_len > 0) { 61741900Smckusick xfer = (m->m_len >= cnt) ? cnt : 61841900Smckusick m->m_len; 61941900Smckusick bcopy(mtod(m, caddr_t), cp, xfer); 62041900Smckusick cnt -= xfer; 62141900Smckusick cp += xfer; 62241900Smckusick } 62341900Smckusick if (cnt > 0) 62441900Smckusick m = m->m_next; 62541900Smckusick } 62640117Smckusick } 62741900Smckusick if (replyh.r_rep != rpc_reply || m == NULL) { 62840117Smckusick nfsstats.rpcinvalid++; 62941900Smckusick m_freem(mp); 63041900Smckusick nfs_sounlock(&nmp->nm_flag); 63141900Smckusick continue; 63238414Smckusick } 63341900Smckusick /* 63441900Smckusick * Loop through the request list to match up the reply 63541900Smckusick * Iff no match, just drop the datagram 63641900Smckusick */ 63741900Smckusick m = mp; 63841900Smckusick rep = nfsreqh.r_next; 63941900Smckusick while (rep != &nfsreqh) { 64041900Smckusick if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) { 64141900Smckusick /* Found it.. */ 64241900Smckusick rep->r_mrep = m; 64341900Smckusick /* 64441900Smckusick * Update timing 64541900Smckusick */ 64641900Smckusick if (rep->r_flags & R_TIMING) { 64741900Smckusick nfs_updatetimer(rep->r_nmp); 64841900Smckusick rep->r_flags &= ~R_TIMING; 64941900Smckusick rep->r_nmp->nm_rtt = -1; 65041900Smckusick } 65141900Smckusick if (rep->r_flags & R_SENT) { 65241900Smckusick rep->r_flags &= ~R_SENT; 65341900Smckusick rep->r_nmp->nm_sent--; 65441900Smckusick } 65540117Smckusick break; 65638414Smckusick } 65741900Smckusick rep = rep->r_next; 65838414Smckusick } 65941900Smckusick nfs_sounlock(&nmp->nm_flag); 66041900Smckusick if (nam) 66141900Smckusick m_freem(nam); 66241900Smckusick /* 66341900Smckusick * If not matched to a request, drop it. 66441900Smckusick * If it's mine, get out. 66541900Smckusick */ 66641900Smckusick if (rep == &nfsreqh) { 66741900Smckusick nfsstats.rpcunexpected++; 66841900Smckusick m_freem(m); 66941900Smckusick } else if (rep == myrep) 67041900Smckusick return (0); 67138414Smckusick } 67238414Smckusick } 67338414Smckusick 67438414Smckusick /* 67538414Smckusick * nfs_request - goes something like this 67638414Smckusick * - fill in request struct 67738414Smckusick * - links it into list 67841900Smckusick * - calls nfs_send() for first transmit 67941900Smckusick * - calls nfs_receive() to get reply 68038414Smckusick * - break down rpc header and return with nfs reply pointed to 68138414Smckusick * by mrep or error 68238414Smckusick * nb: always frees up mreq mbuf list 68338414Smckusick */ 684*43351Smckusick nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp) 68538414Smckusick struct vnode *vp; 68638414Smckusick struct mbuf *mreq; 68738414Smckusick u_long xid; 68841900Smckusick int procnum; 68941900Smckusick struct proc *procp; 690*43351Smckusick int tryhard; 69138414Smckusick struct mount *mp; 69238414Smckusick struct mbuf **mrp; 69338414Smckusick struct mbuf **mdp; 69438414Smckusick caddr_t *dposp; 69538414Smckusick { 69638414Smckusick register struct mbuf *m, *mrep; 69738414Smckusick register struct nfsreq *rep; 69838414Smckusick register u_long *p; 69938414Smckusick register int len; 70041900Smckusick struct nfsmount *nmp; 70138414Smckusick struct mbuf *md; 70239344Smckusick struct nfsreq *reph; 70338414Smckusick caddr_t dpos; 70438414Smckusick char *cp2; 70538414Smckusick int t1; 70638414Smckusick int s; 70741900Smckusick int error = 0; 70838414Smckusick 70941900Smckusick nmp = VFSTONFS(mp); 71038414Smckusick m = mreq; 71138414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 71238414Smckusick rep->r_xid = xid; 71341900Smckusick rep->r_nmp = nmp; 71438414Smckusick rep->r_vp = vp; 71541900Smckusick rep->r_procp = procp; 716*43351Smckusick if ((nmp->nm_flag & NFSMNT_SOFT) || 717*43351Smckusick ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard)) 71841900Smckusick rep->r_retry = nmp->nm_retry; 71938414Smckusick else 72040117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 72140117Smckusick rep->r_flags = rep->r_rexmit = 0; 72241900Smckusick /* 72341900Smckusick * Three cases: 72441900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 72541900Smckusick * - idempotent requests on SOCK_DGRAM use 0 72641900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 72741900Smckusick * Timeouts are still done on reliable transports to ensure detection 728*43351Smckusick * of excessive connection delay. 72941900Smckusick */ 73041900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 73141900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 73241900Smckusick else if (nonidempotent[procnum]) 73341900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 73441900Smckusick else 73541900Smckusick rep->r_timerinit = 0; 73641900Smckusick rep->r_timer = rep->r_timerinit; 73738414Smckusick rep->r_mrep = NULL; 73838414Smckusick len = 0; 73938414Smckusick while (m) { 74038414Smckusick len += m->m_len; 74138414Smckusick m = m->m_next; 74238414Smckusick } 74341900Smckusick mreq->m_pkthdr.len = len; 74441900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 74541900Smckusick /* 74641900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 74741900Smckusick */ 74841900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 74941900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 75041900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 75141900Smckusick } 75241900Smckusick rep->r_mreq = mreq; 75338414Smckusick 75440117Smckusick /* 75540117Smckusick * Do the client side RPC. 75640117Smckusick */ 75740117Smckusick nfsstats.rpcrequests++; 75841900Smckusick /* 75941900Smckusick * Chain request into list of outstanding requests. Be sure 76041900Smckusick * to put it LAST so timer finds oldest requests first. 76141900Smckusick */ 76240117Smckusick s = splnet(); 76339344Smckusick reph = &nfsreqh; 76441900Smckusick reph->r_prev->r_next = rep; 76541900Smckusick rep->r_prev = reph->r_prev; 76639344Smckusick reph->r_prev = rep; 76739344Smckusick rep->r_next = reph; 76840117Smckusick /* 76940117Smckusick * If backing off another request or avoiding congestion, don't 77040117Smckusick * send this one now but let timer do it. If not timing a request, 77140117Smckusick * do it now. 77240117Smckusick */ 77341900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 77441900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 77541900Smckusick nmp->nm_sent++; 77641900Smckusick rep->r_flags |= R_SENT; 77741900Smckusick if (nmp->nm_rtt == -1) { 77841900Smckusick nmp->nm_rtt = 0; 77941900Smckusick rep->r_flags |= R_TIMING; 78041900Smckusick } 78140117Smckusick splx(s); 78241900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 78341900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 784*43351Smckusick nfs_solock(&nmp->nm_flag); 78541900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 78641900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 78741900Smckusick nfs_sounlock(&nmp->nm_flag); 78841900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 78941900Smckusick nmp->nm_so->so_error = error = 0; 79041900Smckusick } else 79141900Smckusick splx(s); 79238414Smckusick 79338414Smckusick /* 79440117Smckusick * Wait for the reply from our send or the timer's. 79540117Smckusick */ 79641900Smckusick if (!error) 79741900Smckusick error = nfs_reply(nmp, rep); 79838414Smckusick 79940117Smckusick /* 80040117Smckusick * RPC done, unlink the request. 80140117Smckusick */ 80238414Smckusick s = splnet(); 80338414Smckusick rep->r_prev->r_next = rep->r_next; 80439344Smckusick rep->r_next->r_prev = rep->r_prev; 80538414Smckusick splx(s); 80641900Smckusick 80741900Smckusick /* 80841900Smckusick * If there was a successful reply and a tprintf msg. 80941900Smckusick * tprintf a response. 81041900Smckusick */ 81141900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 81241900Smckusick if (rep->r_procp) 81343061Smarc tprintf(rep->r_procp->p_session, 81441900Smckusick "Nfs server %s, is alive again\n", 81541900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81641900Smckusick else 81743061Smarc tprintf(NULL, "Nfs server %s, is alive again\n", 81841900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81941900Smckusick } 82038414Smckusick m_freem(rep->r_mreq); 82138414Smckusick mrep = md = rep->r_mrep; 82238414Smckusick FREE((caddr_t)rep, M_NFSREQ); 82338414Smckusick if (error) 82438414Smckusick return (error); 82538414Smckusick 82638414Smckusick /* 82738414Smckusick * break down the rpc header and check if ok 82838414Smckusick */ 82938414Smckusick dpos = mtod(md, caddr_t); 83038414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 83138414Smckusick p += 2; 83238414Smckusick if (*p++ == rpc_msgdenied) { 83338414Smckusick if (*p == rpc_mismatch) 83438414Smckusick error = EOPNOTSUPP; 83538414Smckusick else 83638414Smckusick error = EACCES; 83738414Smckusick m_freem(mrep); 83838414Smckusick return (error); 83938414Smckusick } 84038414Smckusick /* 84138414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 84238414Smckusick * for nfs_reqhead(), but for now just dump it 84338414Smckusick */ 84438414Smckusick if (*++p != 0) { 84538414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 84638414Smckusick nfsm_adv(len); 84738414Smckusick } 84838414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84938414Smckusick /* 0 == ok */ 85038414Smckusick if (*p == 0) { 85138414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 85238414Smckusick if (*p != 0) { 85338414Smckusick error = fxdr_unsigned(int, *p); 85438414Smckusick m_freem(mrep); 85538414Smckusick return (error); 85638414Smckusick } 85738414Smckusick *mrp = mrep; 85838414Smckusick *mdp = md; 85938414Smckusick *dposp = dpos; 86038414Smckusick return (0); 86138414Smckusick } 86238414Smckusick m_freem(mrep); 86338414Smckusick return (EPROTONOSUPPORT); 86438414Smckusick nfsmout: 86538414Smckusick return (error); 86638414Smckusick } 86738414Smckusick 86838414Smckusick /* 86938414Smckusick * Get a request for the server main loop 87038414Smckusick * - receive a request via. nfs_soreceive() 87138414Smckusick * - verify it 87238414Smckusick * - fill in the cred struct. 87338414Smckusick */ 87442243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 875*43351Smckusick msk, mtch) 87638414Smckusick struct socket *so; 87738414Smckusick u_long prog; 87838414Smckusick u_long vers; 87938414Smckusick int maxproc; 88038414Smckusick struct mbuf **nam; 88138414Smckusick struct mbuf **mrp; 88238414Smckusick struct mbuf **mdp; 88338414Smckusick caddr_t *dposp; 88438414Smckusick u_long *retxid; 88542243Smckusick u_long *procnum; 88638414Smckusick register struct ucred *cr; 88741900Smckusick struct mbuf *msk, *mtch; 88838414Smckusick { 88938414Smckusick register int i; 89039494Smckusick register u_long *p; 89139494Smckusick register long t1; 89239494Smckusick caddr_t dpos, cp2; 89339494Smckusick int error = 0; 89439494Smckusick struct mbuf *mrep, *md; 89539494Smckusick int len; 89638414Smckusick 89741900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 89841900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89941900Smckusick } else { 90041900Smckusick mrep = (struct mbuf *)0; 90141900Smckusick do { 90241900Smckusick if (mrep) { 90341900Smckusick m_freem(*nam); 90441900Smckusick m_freem(mrep); 90541900Smckusick } 90641900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90741900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 90841900Smckusick } 90941900Smckusick if (error) 91038414Smckusick return (error); 91138414Smckusick md = mrep; 91238414Smckusick dpos = mtod(mrep, caddr_t); 91338414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 91438414Smckusick *retxid = *p++; 91538414Smckusick if (*p++ != rpc_call) { 91638414Smckusick m_freem(mrep); 91738414Smckusick return (ERPCMISMATCH); 91838414Smckusick } 91938414Smckusick if (*p++ != rpc_vers) { 92038414Smckusick m_freem(mrep); 92138414Smckusick return (ERPCMISMATCH); 92238414Smckusick } 92338414Smckusick if (*p++ != prog) { 92438414Smckusick m_freem(mrep); 92538414Smckusick return (EPROGUNAVAIL); 92638414Smckusick } 92738414Smckusick if (*p++ != vers) { 92838414Smckusick m_freem(mrep); 92938414Smckusick return (EPROGMISMATCH); 93038414Smckusick } 93142243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 93242243Smckusick if (*procnum == NFSPROC_NULL) { 93338414Smckusick *mrp = mrep; 93438414Smckusick return (0); 93538414Smckusick } 93642243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 93738414Smckusick m_freem(mrep); 93838414Smckusick return (EPROCUNAVAIL); 93938414Smckusick } 94041900Smckusick len = fxdr_unsigned(int, *p++); 94141900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 94241900Smckusick m_freem(mrep); 94341900Smckusick return (EBADRPC); 94441900Smckusick } 94539494Smckusick len = fxdr_unsigned(int, *++p); 94641900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 94741900Smckusick m_freem(mrep); 94841900Smckusick return (EBADRPC); 94941900Smckusick } 95039494Smckusick nfsm_adv(nfsm_rndup(len)); 95138414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 95238414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 95338414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 95439494Smckusick len = fxdr_unsigned(int, *p); 95541900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 95638414Smckusick m_freem(mrep); 95738414Smckusick return (EBADRPC); 95838414Smckusick } 95939494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 96039494Smckusick for (i = 1; i <= len; i++) 96141900Smckusick if (i < NGROUPS) 96241900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 96341900Smckusick else 96441900Smckusick p++; 96541900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 96638414Smckusick /* 96738414Smckusick * Do we have any use for the verifier. 96838414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96938414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 97038414Smckusick * For now, just skip over it 97138414Smckusick */ 97239494Smckusick len = fxdr_unsigned(int, *++p); 97341900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 97441900Smckusick m_freem(mrep); 97541900Smckusick return (EBADRPC); 97641900Smckusick } 97739494Smckusick if (len > 0) 97839494Smckusick nfsm_adv(nfsm_rndup(len)); 97938414Smckusick *mrp = mrep; 98038414Smckusick *mdp = md; 98138414Smckusick *dposp = dpos; 98238414Smckusick return (0); 98338414Smckusick nfsmout: 98438414Smckusick return (error); 98538414Smckusick } 98638414Smckusick 98738414Smckusick /* 98838414Smckusick * Generate the rpc reply header 98938414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 99038414Smckusick */ 99138414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 99238414Smckusick int siz; 99338414Smckusick u_long retxid; 99438414Smckusick int err; 99538414Smckusick struct mbuf **mrq; 99638414Smckusick struct mbuf **mbp; 99738414Smckusick caddr_t *bposp; 99838414Smckusick { 99939494Smckusick register u_long *p; 100039494Smckusick register long t1; 100139494Smckusick caddr_t bpos; 100239494Smckusick struct mbuf *mreq, *mb, *mb2; 100338414Smckusick 100438414Smckusick NFSMGETHDR(mreq); 100538414Smckusick mb = mreq; 100638414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 100741900Smckusick MCLGET(mreq, M_WAIT); 100838414Smckusick p = mtod(mreq, u_long *); 100938414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 101038414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 101138414Smckusick *p++ = retxid; 101238414Smckusick *p++ = rpc_reply; 101338414Smckusick if (err == ERPCMISMATCH) { 101438414Smckusick *p++ = rpc_msgdenied; 101538414Smckusick *p++ = rpc_mismatch; 101638414Smckusick *p++ = txdr_unsigned(2); 101738414Smckusick *p = txdr_unsigned(2); 101838414Smckusick } else { 101938414Smckusick *p++ = rpc_msgaccepted; 102038414Smckusick *p++ = 0; 102138414Smckusick *p++ = 0; 102238414Smckusick switch (err) { 102338414Smckusick case EPROGUNAVAIL: 102438414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 102538414Smckusick break; 102638414Smckusick case EPROGMISMATCH: 102738414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102838414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102938414Smckusick *p++ = txdr_unsigned(2); 103038414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 103138414Smckusick break; 103238414Smckusick case EPROCUNAVAIL: 103338414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 103438414Smckusick break; 103538414Smckusick default: 103638414Smckusick *p = 0; 103738414Smckusick if (err != VNOVAL) { 103838414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103938414Smckusick *p = txdr_unsigned(err); 104038414Smckusick } 104138414Smckusick break; 104238414Smckusick }; 104338414Smckusick } 104438414Smckusick *mrq = mreq; 104538414Smckusick *mbp = mb; 104638414Smckusick *bposp = bpos; 104738414Smckusick if (err != 0 && err != VNOVAL) 104838414Smckusick nfsstats.srvrpc_errs++; 104938414Smckusick return (0); 105038414Smckusick } 105138414Smckusick 105238414Smckusick /* 105338414Smckusick * Nfs timer routine 105438414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 105538414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 105640117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 105738414Smckusick */ 105838414Smckusick nfs_timer() 105938414Smckusick { 106038414Smckusick register struct nfsreq *rep; 106138414Smckusick register struct mbuf *m; 106238414Smckusick register struct socket *so; 106341900Smckusick register struct nfsmount *nmp; 106440117Smckusick int s, error; 106538414Smckusick 106638414Smckusick s = splnet(); 106741900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 106841900Smckusick nmp = rep->r_nmp; 106941900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 107041900Smckusick (so = nmp->nm_so) == NULL) 107141900Smckusick continue; 107241900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 107341900Smckusick rep->r_flags |= R_SOFTTERM; 107441900Smckusick continue; 107541900Smckusick } 107640117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 107741900Smckusick nmp->nm_rtt++; 107841900Smckusick /* If not timed out */ 107941900Smckusick if (++rep->r_timer < nmp->nm_rto) 108041900Smckusick continue; 108140117Smckusick /* Do backoff and save new timeout in mount */ 108240117Smckusick if (rep->r_flags & R_TIMING) { 108341900Smckusick nfs_backofftimer(nmp); 108440117Smckusick rep->r_flags &= ~R_TIMING; 108541900Smckusick nmp->nm_rtt = -1; 108640117Smckusick } 108740117Smckusick if (rep->r_flags & R_SENT) { 108840117Smckusick rep->r_flags &= ~R_SENT; 108941900Smckusick nmp->nm_sent--; 109040117Smckusick } 109141900Smckusick 109241900Smckusick /* 109341900Smckusick * Check for too many retries on soft mount. 109441900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 109541900Smckusick */ 109641900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 109740117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 109840117Smckusick 109941900Smckusick /* 110041900Smckusick * Check for server not responding 110141900Smckusick */ 110241900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1103*43351Smckusick rep->r_rexmit > NFS_FISHY) { 110441900Smckusick if (rep->r_procp && rep->r_procp->p_session) 110543061Smarc tprintf(rep->r_procp->p_session, 110641900Smckusick "Nfs server %s, not responding\n", 110741900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 110841900Smckusick else 110943061Smarc tprintf(NULL, 111041900Smckusick "Nfs server %s, not responding\n", 111141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 111241900Smckusick rep->r_flags |= R_TPRINTFMSG; 111341900Smckusick } 1114*43351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */ 111541900Smckusick nfsstats.rpctimeouts++; 111641900Smckusick rep->r_flags |= R_SOFTTERM; 111741900Smckusick continue; 111841900Smckusick } 1119*43351Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 1120*43351Smckusick continue; 112141900Smckusick 112241900Smckusick /* 112341900Smckusick * If there is enough space and the window allows.. 112441900Smckusick * Resend it 112541900Smckusick */ 112641900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 112741900Smckusick nmp->nm_sent < nmp->nm_window && 112841900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 112941900Smckusick nfsstats.rpcretries++; 113041900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 113141900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113241900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113341900Smckusick else 113441900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113541900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 113641900Smckusick if (error) { 113741900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 113841900Smckusick so->so_error = 0; 113941900Smckusick } else { 114041900Smckusick /* 114141900Smckusick * We need to time the request even though we 114241900Smckusick * are retransmitting. 114341900Smckusick */ 114441900Smckusick nmp->nm_rtt = 0; 114541900Smckusick nmp->nm_sent++; 114641900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 114741900Smckusick rep->r_timer = rep->r_timerinit; 114841900Smckusick } 114941900Smckusick } 115040117Smckusick } 115140117Smckusick splx(s); 115240117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115340117Smckusick } 115440117Smckusick 115540117Smckusick /* 115640117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 115740117Smckusick * used here. The timer state is held in the nfsmount structure and 115840117Smckusick * a single request is used to clock the response. When successful 115940117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 116040117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116140117Smckusick * routines. 116240117Smckusick * 116340117Smckusick * Congestion variables are held in the nfshost structure which 116440117Smckusick * is referenced by nfsmounts and shared per-server. This separation 116540117Smckusick * makes it possible to do per-mount timing which allows varying disk 116640117Smckusick * access times to be dealt with, while preserving a network oriented 116740117Smckusick * congestion control scheme. 116840117Smckusick * 116940117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 117040117Smckusick * with adjusted scaling factors. We start with one request, then send 117140117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117240117Smckusick * we increment at a rate proportional to the window. On failure, we 117340117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117440117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 117540117Smckusick * for now. 117640117Smckusick * 117740117Smckusick * NFS behaves much more like a transport protocol with these changes, 117840117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 117940117Smckusick * implementations. 118040117Smckusick * 118140117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118240117Smckusick */ 118340117Smckusick 118440117Smckusick /* 118540117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 118640117Smckusick * responds only after performing lookups/diskio/etc, we have to be 118740117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 118841900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 118940117Smckusick */ 119041900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119140117Smckusick 119241900Smckusick nfs_updatetimer(nmp) 119341900Smckusick register struct nfsmount *nmp; 119440117Smckusick { 119540117Smckusick 119640117Smckusick /* If retransmitted, clear and return */ 119741900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 119841900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 119940117Smckusick return; 120040117Smckusick } 120140117Smckusick /* If have a measurement, do smoothing */ 120241900Smckusick if (nmp->nm_srtt) { 120340117Smckusick register short delta; 120441900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 120541900Smckusick if ((nmp->nm_srtt += delta) <= 0) 120641900Smckusick nmp->nm_srtt = 1; 120740117Smckusick if (delta < 0) 120840117Smckusick delta = -delta; 120941900Smckusick delta -= (nmp->nm_rttvar >> 2); 121041900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 121141900Smckusick nmp->nm_rttvar = 1; 121240117Smckusick /* Else initialize */ 121340117Smckusick } else { 121441900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 121541900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 121641900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 121740117Smckusick } 121840117Smckusick /* Compute new Retransmission TimeOut and clip */ 121941900Smckusick nmp->nm_rto = NFS_RTO(nmp); 122041900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 122141900Smckusick nmp->nm_rto = NFS_MINTIMEO; 122241900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122341900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122440117Smckusick 122540117Smckusick /* Update window estimate */ 122641900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 122741900Smckusick nmp->nm_window += 4; 122840117Smckusick else { /* slowly */ 122941900Smckusick register long incr = ++nmp->nm_winext; 123041900Smckusick incr = (incr * incr) / nmp->nm_window; 123140117Smckusick if (incr > 0) { 123241900Smckusick nmp->nm_winext = 0; 123341900Smckusick ++nmp->nm_window; 123440117Smckusick } 123540117Smckusick } 123641900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 123741900Smckusick nmp->nm_window = NFS_MAXWINDOW; 123840117Smckusick } 123940117Smckusick 124041900Smckusick nfs_backofftimer(nmp) 124141900Smckusick register struct nfsmount *nmp; 124240117Smckusick { 124340117Smckusick register unsigned long newrto; 124440117Smckusick 124540117Smckusick /* Clip shift count */ 124641900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 124741900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 124840117Smckusick /* Back off RTO exponentially */ 124941900Smckusick newrto = NFS_RTO(nmp); 125041900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125140117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125240117Smckusick newrto = NFS_MAXTIMEO; 125341900Smckusick nmp->nm_rto = newrto; 125440117Smckusick 125540117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 125641900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 125741900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 125841900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 125941900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 126041900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 126141900Smckusick nmp->nm_srtt = 0; 126238414Smckusick } 126338414Smckusick } 126438414Smckusick } 126540117Smckusick /* Close down window but remember this point (3/4 current) for later */ 126641900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 126741900Smckusick nmp->nm_window = 1; 126841900Smckusick nmp->nm_winext = 0; 126938414Smckusick } 127038414Smckusick 127138414Smckusick /* 127241900Smckusick * Test for a termination signal pending on procp. 127341900Smckusick * This is used for NFSMNT_INT mounts. 127438414Smckusick */ 127541900Smckusick nfs_sigintr(p) 127641900Smckusick register struct proc *p; 127741900Smckusick { 127841900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 127941900Smckusick NFSINT_SIGMASK)) 128041900Smckusick return (1); 128141900Smckusick else 128241900Smckusick return (0); 128341900Smckusick } 128440117Smckusick 128541900Smckusick /* 128641900Smckusick * Lock a socket against others. 128741900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 128841900Smckusick * and also to avoid race conditions between the processes with nfs requests 128941900Smckusick * in progress when a reconnect is necessary. 129041900Smckusick */ 1291*43351Smckusick nfs_solock(flagp) 1292*43351Smckusick register int *flagp; 129338414Smckusick { 129440117Smckusick 129541900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 129641900Smckusick *flagp |= NFSMNT_WANTSCK; 1297*43351Smckusick (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0); 129840117Smckusick } 129941900Smckusick *flagp |= NFSMNT_SCKLOCK; 130041900Smckusick } 130140117Smckusick 130241900Smckusick /* 130341900Smckusick * Unlock the stream socket for others. 130441900Smckusick */ 130541900Smckusick nfs_sounlock(flagp) 1306*43351Smckusick register int *flagp; 130741900Smckusick { 130841900Smckusick 130941900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 131041900Smckusick panic("nfs sounlock"); 131141900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 131241900Smckusick if (*flagp & NFSMNT_WANTSCK) { 131341900Smckusick *flagp &= ~NFSMNT_WANTSCK; 131441900Smckusick wakeup((caddr_t)flagp); 131540117Smckusick } 131638414Smckusick } 131741900Smckusick 131841900Smckusick /* 131941900Smckusick * This function compares two net addresses by family and returns TRUE 132041900Smckusick * if they are the same. 132141900Smckusick * If there is any doubt, return FALSE. 132241900Smckusick */ 132341900Smckusick nfs_netaddr_match(nam1, nam2) 132441900Smckusick struct mbuf *nam1, *nam2; 132541900Smckusick { 132641900Smckusick register struct sockaddr *saddr1, *saddr2; 132741900Smckusick 132841900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 132941900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 133041900Smckusick if (saddr1->sa_family != saddr2->sa_family) 133141900Smckusick return (0); 133241900Smckusick 133341900Smckusick /* 133441900Smckusick * Must do each address family separately since unused fields 133541900Smckusick * are undefined values and not always zeroed. 133641900Smckusick */ 133741900Smckusick switch (saddr1->sa_family) { 133841900Smckusick case AF_INET: 133941900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 134041900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 134141900Smckusick return (1); 134241900Smckusick break; 134341900Smckusick default: 134441900Smckusick break; 134541900Smckusick }; 134641900Smckusick return (0); 134741900Smckusick } 134841900Smckusick 134941900Smckusick /* 135041900Smckusick * Check the hostname fields for nfsd's mask and match fields. 135141900Smckusick * By address family: 135241900Smckusick * - Bitwise AND the mask with the host address field 135341900Smckusick * - Compare for == with match 135441900Smckusick * return TRUE if not equal 135541900Smckusick */ 135641900Smckusick nfs_badnam(nam, msk, mtch) 135741900Smckusick register struct mbuf *nam, *msk, *mtch; 135841900Smckusick { 135941900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 136041900Smckusick case AF_INET: 136141900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 136241900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 136341900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 136441900Smckusick default: 136541900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 136641900Smckusick return (0); 136741900Smckusick }; 136841900Smckusick } 1369