138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 838414Smckusick * Redistribution and use in source and binary forms are permitted 938414Smckusick * provided that the above copyright notice and this paragraph are 1038414Smckusick * duplicated in all such forms and that any documentation, 1138414Smckusick * advertising materials, and other materials related to such 1238414Smckusick * distribution and use acknowledge that the software was developed 1338414Smckusick * by the University of California, Berkeley. The name of the 1438414Smckusick * University may not be used to endorse or promote products derived 1538414Smckusick * from this software without specific prior written permission. 1638414Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1738414Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1838414Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1938414Smckusick * 20*43062Smckusick * @(#)nfs_socket.c 7.16 (Berkeley) 06/08/90 2138414Smckusick */ 2238414Smckusick 2338414Smckusick /* 2441900Smckusick * Socket operations for use by nfs 2538414Smckusick */ 2638414Smckusick 2738414Smckusick #include "types.h" 2838414Smckusick #include "param.h" 2938414Smckusick #include "uio.h" 3038414Smckusick #include "user.h" 3140117Smckusick #include "proc.h" 3240117Smckusick #include "signal.h" 3338414Smckusick #include "mount.h" 3438414Smckusick #include "kernel.h" 3538414Smckusick #include "malloc.h" 3638414Smckusick #include "mbuf.h" 3738414Smckusick #include "vnode.h" 3838414Smckusick #include "domain.h" 3938414Smckusick #include "protosw.h" 4038414Smckusick #include "socket.h" 4138414Smckusick #include "socketvar.h" 4242877Smckusick #include "../netinet/in.h" 4342877Smckusick #include "../netinet/tcp.h" 4438414Smckusick #include "rpcv2.h" 4538414Smckusick #include "nfsv2.h" 4638414Smckusick #include "nfs.h" 4738414Smckusick #include "xdr_subs.h" 4838414Smckusick #include "nfsm_subs.h" 4938414Smckusick #include "nfsmount.h" 5038414Smckusick 5140117Smckusick #include "syslog.h" 5240117Smckusick 5338414Smckusick #define TRUE 1 5438414Smckusick 5540117Smckusick /* 5638414Smckusick * External data, mostly RPC constants in XDR form 5738414Smckusick */ 5838414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 5938414Smckusick rpc_msgaccepted, rpc_call; 6038414Smckusick extern u_long nfs_prog, nfs_vers; 6141900Smckusick extern int nonidempotent[NFS_NPROCS]; 6241900Smckusick int nfs_sbwait(); 6341900Smckusick void nfs_disconnect(); 6441900Smckusick 6538414Smckusick int nfsrv_null(), 6638414Smckusick nfsrv_getattr(), 6738414Smckusick nfsrv_setattr(), 6838414Smckusick nfsrv_lookup(), 6938414Smckusick nfsrv_readlink(), 7038414Smckusick nfsrv_read(), 7138414Smckusick nfsrv_write(), 7238414Smckusick nfsrv_create(), 7338414Smckusick nfsrv_remove(), 7438414Smckusick nfsrv_rename(), 7538414Smckusick nfsrv_link(), 7638414Smckusick nfsrv_symlink(), 7738414Smckusick nfsrv_mkdir(), 7838414Smckusick nfsrv_rmdir(), 7938414Smckusick nfsrv_readdir(), 8038414Smckusick nfsrv_statfs(), 8138414Smckusick nfsrv_noop(); 8238414Smckusick 8338414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 8438414Smckusick nfsrv_null, 8538414Smckusick nfsrv_getattr, 8638414Smckusick nfsrv_setattr, 8738414Smckusick nfsrv_noop, 8838414Smckusick nfsrv_lookup, 8938414Smckusick nfsrv_readlink, 9038414Smckusick nfsrv_read, 9138414Smckusick nfsrv_noop, 9238414Smckusick nfsrv_write, 9338414Smckusick nfsrv_create, 9438414Smckusick nfsrv_remove, 9538414Smckusick nfsrv_rename, 9638414Smckusick nfsrv_link, 9738414Smckusick nfsrv_symlink, 9838414Smckusick nfsrv_mkdir, 9938414Smckusick nfsrv_rmdir, 10038414Smckusick nfsrv_readdir, 10138414Smckusick nfsrv_statfs, 10238414Smckusick }; 10338414Smckusick 10440117Smckusick struct nfsreq nfsreqh; 10540117Smckusick int nfsrexmtthresh = NFS_FISHY; 10641900Smckusick int nfs_tcpnodelay = 1; 10738414Smckusick 10838414Smckusick /* 10941900Smckusick * Initialize sockets and congestion for a new NFS connection. 11040117Smckusick * We do not free the sockaddr if error. 11138414Smckusick */ 11241900Smckusick nfs_connect(nmp) 11340117Smckusick register struct nfsmount *nmp; 11440117Smckusick { 11541900Smckusick register struct socket *so; 11641900Smckusick int s, error; 11740117Smckusick struct mbuf *m; 11840117Smckusick 11941900Smckusick nmp->nm_so = (struct socket *)0; 12041900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 12141900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 12240117Smckusick goto bad; 12341900Smckusick so = nmp->nm_so; 12441900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 12540117Smckusick 12641900Smckusick /* 12741900Smckusick * Protocols that do not require connections may be optionally left 12841900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 12941900Smckusick */ 13041900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 13141900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 13241900Smckusick error = ENOTCONN; 13340117Smckusick goto bad; 13440117Smckusick } 13541900Smckusick } else { 13641900Smckusick if (error = soconnect(so, nmp->nm_nam)) 13740117Smckusick goto bad; 13841900Smckusick 13941900Smckusick /* 14041900Smckusick * Wait for the connection to complete. Cribbed from the 14141900Smckusick * connect system call but with the wait at negative prio. 14241900Smckusick */ 14341900Smckusick s = splnet(); 14441900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 14541900Smckusick sleep((caddr_t)&so->so_timeo, PZERO-2); 14641900Smckusick splx(s); 14741900Smckusick if (so->so_error) { 14841900Smckusick error = so->so_error; 14941900Smckusick goto bad; 15041900Smckusick } 15140117Smckusick } 15241900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 15341900Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 15441900Smckusick so->so_rcv.sb_timeo = (5 * hz); 15541900Smckusick so->so_snd.sb_timeo = (5 * hz); 15641900Smckusick } else { 15741900Smckusick so->so_rcv.sb_timeo = 0; 15841900Smckusick so->so_snd.sb_timeo = 0; 15941900Smckusick } 16041900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 16141900Smckusick (nmp->nm_rsize + NFS_MAXPKTHDR) * 4)) 16241900Smckusick goto bad; 16341900Smckusick } else { 16441900Smckusick if (nmp->nm_flag & NFSMNT_INT) { 16541900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16641900Smckusick so->so_snd.sb_timeo = (5 * hz); 16741900Smckusick } else { 16841900Smckusick so->so_rcv.sb_timeo = 0; 16941900Smckusick so->so_snd.sb_timeo = 0; 17041900Smckusick } 17141900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 17241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 17341900Smckusick *mtod(m, int *) = 1; 17441900Smckusick m->m_len = sizeof(int); 17541900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 17641900Smckusick } 17741900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 17841900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 17941900Smckusick nfs_tcpnodelay) { 18041900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18141900Smckusick *mtod(m, int *) = 1; 18241900Smckusick m->m_len = sizeof(int); 18341900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 18441900Smckusick } 18541900Smckusick if (error = soreserve(so, 18641900Smckusick (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)) * 2, 18741900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 18841900Smckusick goto bad; 18941900Smckusick } 19041900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 19141900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 19240117Smckusick 19341900Smckusick /* Initialize other non-zero congestion variables */ 19441900Smckusick nmp->nm_rto = NFS_TIMEO; 19541900Smckusick nmp->nm_window = 2; /* Initial send window */ 19641900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 19741900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 19841900Smckusick nmp->nm_sent = 0; 19941900Smckusick nmp->nm_currexmit = 0; 20041900Smckusick return (0); 20140117Smckusick 20241900Smckusick bad: 20341900Smckusick nfs_disconnect(nmp); 20441900Smckusick return (error); 20541900Smckusick } 20640117Smckusick 20741900Smckusick /* 20841900Smckusick * Reconnect routine: 20941900Smckusick * Called when a connection is broken on a reliable protocol. 21041900Smckusick * - clean up the old socket 21141900Smckusick * - nfs_connect() again 21241900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 21341900Smckusick * If this fails the mount point is DEAD! 21441900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 21541900Smckusick */ 21641900Smckusick nfs_reconnect(rep, nmp) 21741900Smckusick register struct nfsreq *rep; 21841900Smckusick register struct nfsmount *nmp; 21941900Smckusick { 22041900Smckusick register struct nfsreq *rp; 22141900Smckusick int error; 22240117Smckusick 22341900Smckusick if (rep->r_procp) 22443061Smarc tprintf(rep->r_procp->p_session, 22541900Smckusick "Nfs server %s, trying reconnect\n", 22641900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 22741900Smckusick else 22843061Smarc tprintf(NULL, "Nfs server %s, trying a reconnect\n", 22941900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23041900Smckusick while (error = nfs_connect(nmp)) { 23142243Smckusick #ifdef lint 23242243Smckusick error = error; 23342243Smckusick #endif /* lint */ 23441900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 23541900Smckusick return (EINTR); 23641900Smckusick tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 23740117Smckusick } 23841900Smckusick if (rep->r_procp) 23943061Smarc tprintf(rep->r_procp->p_session, 24041900Smckusick "Nfs server %s, reconnected\n", 24141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24241900Smckusick else 24343061Smarc tprintf(NULL, "Nfs server %s, reconnected\n", 24441900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24541900Smckusick 24641900Smckusick /* 24741900Smckusick * Loop through outstanding request list and fix up all requests 24841900Smckusick * on old socket. 24941900Smckusick */ 25041900Smckusick rp = nfsreqh.r_next; 25141900Smckusick while (rp != &nfsreqh) { 25241900Smckusick if (rp->r_nmp == nmp) 25341900Smckusick rp->r_flags |= R_MUSTRESEND; 25441900Smckusick rp = rp->r_next; 25540117Smckusick } 25640117Smckusick return (0); 25740117Smckusick } 25840117Smckusick 25940117Smckusick /* 26040117Smckusick * NFS disconnect. Clean up and unlink. 26140117Smckusick */ 26241900Smckusick void 26340117Smckusick nfs_disconnect(nmp) 26440117Smckusick register struct nfsmount *nmp; 26540117Smckusick { 26641900Smckusick register struct socket *so; 26740117Smckusick 26841900Smckusick if (nmp->nm_so) { 26941900Smckusick so = nmp->nm_so; 27041900Smckusick nmp->nm_so = (struct socket *)0; 27141900Smckusick soshutdown(so, 2); 27241900Smckusick soclose(so); 27340117Smckusick } 27440117Smckusick } 27540117Smckusick 27640117Smckusick /* 27741900Smckusick * This is the nfs send routine. For connection based socket types, it 27841900Smckusick * must be called with an nfs_solock() on the socket. 27941900Smckusick * "rep == NULL" indicates that it has been called from a server. 28040117Smckusick */ 28141900Smckusick nfs_send(so, nam, top, rep) 28238414Smckusick register struct socket *so; 28338414Smckusick struct mbuf *nam; 28441900Smckusick register struct mbuf *top; 28541900Smckusick struct nfsreq *rep; 28638414Smckusick { 28741900Smckusick struct mbuf *sendnam; 28841900Smckusick int error, soflags; 28938414Smckusick 29041900Smckusick if (rep) { 29141900Smckusick if (rep->r_flags & R_SOFTTERM) { 29240117Smckusick m_freem(top); 29341900Smckusick return (EINTR); 29440117Smckusick } 295*43062Smckusick if (rep->r_nmp->nm_so == NULL && 29641900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 29741900Smckusick return (error); 29841900Smckusick rep->r_flags &= ~R_MUSTRESEND; 299*43062Smckusick so = rep->r_nmp->nm_so; 30041900Smckusick soflags = rep->r_nmp->nm_soflags; 30141900Smckusick } else 30241900Smckusick soflags = so->so_proto->pr_flags; 30341900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 30441900Smckusick sendnam = (struct mbuf *)0; 30541900Smckusick else 30641900Smckusick sendnam = nam; 30741900Smckusick 30841900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 30941900Smckusick (struct mbuf *)0, 0); 31041900Smckusick if (error == EWOULDBLOCK && rep) { 31141900Smckusick if (rep->r_flags & R_SOFTTERM) 31241900Smckusick error = EINTR; 31341900Smckusick else { 31441900Smckusick rep->r_flags |= R_MUSTRESEND; 31541900Smckusick error = 0; 31640117Smckusick } 31738414Smckusick } 31841900Smckusick /* 31941900Smckusick * Ignore socket errors?? 32041900Smckusick */ 32141900Smckusick if (error && error != EINTR && error != ERESTART) 32241900Smckusick error = 0; 32338414Smckusick return (error); 32438414Smckusick } 32538414Smckusick 32638414Smckusick /* 32741900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 32841900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 32941900Smckusick * Mark and consolidate the data into a new mbuf list. 33041900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 33141900Smckusick * small mbufs. 33241900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 33341900Smckusick * we have read any of it, even if the system call has been interrupted. 33438414Smckusick */ 33541900Smckusick nfs_receive(so, aname, mp, rep) 33638414Smckusick register struct socket *so; 33738414Smckusick struct mbuf **aname; 33838414Smckusick struct mbuf **mp; 33941900Smckusick register struct nfsreq *rep; 34038414Smckusick { 34141900Smckusick struct uio auio; 34241900Smckusick struct iovec aio; 34338414Smckusick register struct mbuf *m; 34441900Smckusick struct mbuf *m2, *m3, *mnew, **mbp; 34541900Smckusick caddr_t fcp, tcp; 34641900Smckusick u_long len; 34741900Smckusick struct mbuf **getnam; 34841900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 34938414Smckusick 35041900Smckusick /* 35141900Smckusick * Set up arguments for soreceive() 35241900Smckusick */ 35341900Smckusick *mp = (struct mbuf *)0; 35441900Smckusick *aname = (struct mbuf *)0; 35541900Smckusick if (rep) 35641900Smckusick soflags = rep->r_nmp->nm_soflags; 35741900Smckusick else 35841900Smckusick soflags = so->so_proto->pr_flags; 35938414Smckusick 36041900Smckusick /* 36141900Smckusick * For reliable protocols, lock against other senders/receivers 36241900Smckusick * in case a reconnect is necessary. 36341900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 36441900Smckusick * more there is to get. 36541900Smckusick * We must lock the socket against other receivers 36641900Smckusick * until we have an entire rpc request/reply. 36741900Smckusick */ 36841900Smckusick if (soflags & PR_CONNREQUIRED) { 36941900Smckusick tryagain: 37041900Smckusick /* 37141900Smckusick * Check for fatal errors and resending request. 37241900Smckusick */ 37341900Smckusick if (rep) { 37441900Smckusick /* 37541900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 37641900Smckusick * would have changed. NULL indicates a failed 37741900Smckusick * attempt that has essentially shut down this 37841900Smckusick * mount point. 37941900Smckusick */ 38041900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 38141900Smckusick (rep->r_flags & R_SOFTTERM)) 38241900Smckusick return (EINTR); 38341900Smckusick while (rep->r_flags & R_MUSTRESEND) { 38441900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 38541900Smckusick nfsstats.rpcretries++; 38641900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 38741900Smckusick rep)) 38841900Smckusick goto errout; 38940117Smckusick } 39041900Smckusick } 39141900Smckusick if ((soflags & PR_ATOMIC) == 0) { 39241900Smckusick aio.iov_base = (caddr_t) &len; 39341900Smckusick aio.iov_len = sizeof(u_long); 39441900Smckusick auio.uio_iov = &aio; 39541900Smckusick auio.uio_iovcnt = 1; 39641900Smckusick auio.uio_segflg = UIO_SYSSPACE; 39741900Smckusick auio.uio_rw = UIO_READ; 39841900Smckusick auio.uio_offset = 0; 39941900Smckusick auio.uio_resid = sizeof(u_long); 40041900Smckusick do { 40141900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 40241900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 40341900Smckusick if (error == EWOULDBLOCK && rep) { 40441900Smckusick if (rep->r_flags & R_SOFTTERM) 40541900Smckusick return (EINTR); 40641900Smckusick if (rep->r_flags & R_MUSTRESEND) 40741900Smckusick goto tryagain; 40841900Smckusick } 40941900Smckusick } while (error == EWOULDBLOCK); 41041900Smckusick if (!error && auio.uio_resid > 0) 41141900Smckusick error = EPIPE; 41240761Skarels if (error) 41341900Smckusick goto errout; 41441900Smckusick len = ntohl(len) & ~0x80000000; 41541900Smckusick /* 41641900Smckusick * This is SERIOUS! We are out of sync with the sender 41741900Smckusick * and forcing a disconnect/reconnect is all I can do. 41841900Smckusick */ 41941900Smckusick if (len > NFS_MAXPACKET) { 42041900Smckusick error = EFBIG; 42141900Smckusick goto errout; 42241900Smckusick } 42341900Smckusick auio.uio_resid = len; 42441900Smckusick do { 42541900Smckusick error = soreceive(so, (struct mbuf **)0, 42641900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 42741900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 42841900Smckusick error == ERESTART); 42941900Smckusick if (!error && auio.uio_resid > 0) 43041900Smckusick error = EPIPE; 43140117Smckusick } else { 43241900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 43341900Smckusick do { 43441900Smckusick error = soreceive(so, (struct mbuf **)0, 43541900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 43641900Smckusick if (error == EWOULDBLOCK && rep) { 43741900Smckusick if (rep->r_flags & R_SOFTTERM) 43841900Smckusick return (EINTR); 43941900Smckusick if (rep->r_flags & R_MUSTRESEND) 44041900Smckusick goto tryagain; 44141900Smckusick } 44241900Smckusick } while (error == EWOULDBLOCK); 44341900Smckusick if (!error && *mp == NULL) 44441900Smckusick error = EPIPE; 44541900Smckusick len -= auio.uio_resid; 44640117Smckusick } 44741900Smckusick errout: 44841900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 44941900Smckusick m_freem(*mp); 45041900Smckusick *mp = (struct mbuf *)0; 45141900Smckusick nfs_disconnect(rep->r_nmp); 45241900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 45341900Smckusick if (!error) 45441900Smckusick goto tryagain; 45540117Smckusick } 45641900Smckusick } else { 45741900Smckusick if (so->so_state & SS_ISCONNECTED) 45841900Smckusick getnam = (struct mbuf **)0; 45941900Smckusick else 46041900Smckusick getnam = aname; 46141900Smckusick auio.uio_resid = len = 1000000; 46241900Smckusick do { 46341900Smckusick error = soreceive(so, getnam, &auio, mp, 46441900Smckusick (struct mbuf **)0, &rcvflg); 46541900Smckusick if (error == EWOULDBLOCK && rep && 46641900Smckusick (rep->r_flags & R_SOFTTERM)) 46741900Smckusick return (EINTR); 46841900Smckusick } while (error == EWOULDBLOCK); 46941900Smckusick len -= auio.uio_resid; 47041900Smckusick } 47141900Smckusick if (error) { 47241900Smckusick m_freem(*mp); 47341900Smckusick *mp = (struct mbuf *)0; 47441900Smckusick } 47541900Smckusick /* 47641900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 47741900Smckusick * These could cause pointer alignment problems, so copy them to 47841900Smckusick * well aligned mbufs. 47941900Smckusick */ 48041900Smckusick m = *mp; 48141900Smckusick mbp = mp; 48241900Smckusick while (m) { 48341900Smckusick /* 48441900Smckusick * All this for something that may never happen. 48541900Smckusick */ 48641900Smckusick if (m->m_len & 0x3) { 48741900Smckusick printf("nfs_rcv odd length!\n"); 48841900Smckusick fcp = mtod(m, caddr_t); 48941900Smckusick mnew = m2 = (struct mbuf *)0; 49042243Smckusick #ifdef lint 49142243Smckusick m3 = (struct mbuf *)0; 49242243Smckusick mlen = 0; 49342243Smckusick #endif /* lint */ 49441900Smckusick while (m) { 49541900Smckusick if (m2 == NULL || mlen == 0) { 49641900Smckusick MGET(m2, M_WAIT, MT_DATA); 49741900Smckusick if (len > MINCLSIZE) 49841900Smckusick MCLGET(m2, M_WAIT); 49941900Smckusick m2->m_len = 0; 50041900Smckusick mlen = M_TRAILINGSPACE(m2); 50141900Smckusick tcp = mtod(m2, caddr_t); 50241900Smckusick if (mnew) { 50341900Smckusick m3->m_next = m2; 50441900Smckusick m3 = m2; 50541900Smckusick } else 50641900Smckusick mnew = m3 = m2; 50741900Smckusick } 50841900Smckusick siz = (mlen > m->m_len) ? m->m_len : mlen; 50941900Smckusick bcopy(fcp, tcp, siz); 51041900Smckusick m2->m_len += siz; 51141900Smckusick mlen -= siz; 51241900Smckusick len -= siz; 51341900Smckusick tcp += siz; 51441900Smckusick m->m_len -= siz; 51541900Smckusick fcp += siz; 51641900Smckusick if (m->m_len == 0) { 51741900Smckusick do { 51841900Smckusick m = m->m_next; 51941900Smckusick } while (m && m->m_len == 0); 52041900Smckusick if (m) 52141900Smckusick fcp = mtod(m, caddr_t); 52241900Smckusick } 52341900Smckusick } 52441900Smckusick m = *mbp; 52541900Smckusick *mbp = mnew; 52641900Smckusick m_freem(m); 52741900Smckusick break; 52840117Smckusick } 52941900Smckusick len -= m->m_len; 53041900Smckusick mbp = &m->m_next; 53141900Smckusick m = m->m_next; 53238414Smckusick } 53338414Smckusick return (error); 53438414Smckusick } 53538414Smckusick 53638414Smckusick struct rpc_replyhead { 53738414Smckusick u_long r_xid; 53838414Smckusick u_long r_rep; 53938414Smckusick }; 54038414Smckusick 54138414Smckusick /* 54241900Smckusick * Implement receipt of reply on a socket. 54338414Smckusick * We must search through the list of received datagrams matching them 54438414Smckusick * with outstanding requests using the xid, until ours is found. 54538414Smckusick */ 54641900Smckusick /* ARGSUSED */ 54741900Smckusick nfs_reply(nmp, myrep) 54841900Smckusick struct nfsmount *nmp; 54939344Smckusick struct nfsreq *myrep; 55038414Smckusick { 55138414Smckusick register struct mbuf *m; 55238414Smckusick register struct nfsreq *rep; 55341900Smckusick register int error = 0; 55438414Smckusick struct rpc_replyhead replyh; 55541900Smckusick struct mbuf *mp, *nam; 55641900Smckusick char *cp; 55741900Smckusick int cnt, xfer; 55838414Smckusick 55941900Smckusick /* 56041900Smckusick * Loop around until we get our own reply 56141900Smckusick */ 56241900Smckusick for (;;) { 56341900Smckusick /* 56441900Smckusick * Lock against other receivers so that I don't get stuck in 56541900Smckusick * sbwait() after someone else has received my reply for me. 56641900Smckusick * Also necessary for connection based protocols to avoid 56741900Smckusick * race conditions during a reconnect. 56841900Smckusick */ 56941900Smckusick nfs_solock(&nmp->nm_flag, 1); 57041900Smckusick /* Already received, bye bye */ 57141900Smckusick if (myrep->r_mrep != NULL) { 57241900Smckusick nfs_sounlock(&nmp->nm_flag); 57341900Smckusick return (0); 57440117Smckusick } 57541900Smckusick /* 57641900Smckusick * Get the next Rpc reply off the socket 57741900Smckusick */ 57841900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 57941900Smckusick nfs_sounlock(&nmp->nm_flag); 58038414Smckusick 58141900Smckusick /* 58241900Smckusick * Ignore routing errors on connectionless protocols?? 58341900Smckusick */ 58441900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 58541900Smckusick nmp->nm_so->so_error = 0; 58641900Smckusick continue; 58741900Smckusick } 58841900Smckusick 58941900Smckusick /* 59041900Smckusick * Otherwise cleanup and return a fatal error. 59141900Smckusick */ 59241900Smckusick if (myrep->r_flags & R_TIMING) { 59341900Smckusick myrep->r_flags &= ~R_TIMING; 59441900Smckusick nmp->nm_rtt = -1; 59541900Smckusick } 59641900Smckusick if (myrep->r_flags & R_SENT) { 59741900Smckusick myrep->r_flags &= ~R_SENT; 59841900Smckusick nmp->nm_sent--; 59941900Smckusick } 60041900Smckusick return (error); 60138414Smckusick } 60241900Smckusick 60341900Smckusick /* 60441900Smckusick * Get the xid and check that it is an rpc reply 60541900Smckusick */ 60641900Smckusick m = mp; 60741900Smckusick if (m->m_len >= 2*NFSX_UNSIGNED) 60841900Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 60941900Smckusick 2*NFSX_UNSIGNED); 61041900Smckusick else { 61141900Smckusick cnt = 2*NFSX_UNSIGNED; 61241900Smckusick cp = (caddr_t)&replyh; 61341900Smckusick while (m && cnt > 0) { 61441900Smckusick if (m->m_len > 0) { 61541900Smckusick xfer = (m->m_len >= cnt) ? cnt : 61641900Smckusick m->m_len; 61741900Smckusick bcopy(mtod(m, caddr_t), cp, xfer); 61841900Smckusick cnt -= xfer; 61941900Smckusick cp += xfer; 62041900Smckusick } 62141900Smckusick if (cnt > 0) 62241900Smckusick m = m->m_next; 62341900Smckusick } 62440117Smckusick } 62541900Smckusick if (replyh.r_rep != rpc_reply || m == NULL) { 62640117Smckusick nfsstats.rpcinvalid++; 62741900Smckusick m_freem(mp); 62841900Smckusick nfs_sounlock(&nmp->nm_flag); 62941900Smckusick continue; 63038414Smckusick } 63141900Smckusick /* 63241900Smckusick * Loop through the request list to match up the reply 63341900Smckusick * Iff no match, just drop the datagram 63441900Smckusick */ 63541900Smckusick m = mp; 63641900Smckusick rep = nfsreqh.r_next; 63741900Smckusick while (rep != &nfsreqh) { 63841900Smckusick if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) { 63941900Smckusick /* Found it.. */ 64041900Smckusick rep->r_mrep = m; 64141900Smckusick /* 64241900Smckusick * Update timing 64341900Smckusick */ 64441900Smckusick if (rep->r_flags & R_TIMING) { 64541900Smckusick nfs_updatetimer(rep->r_nmp); 64641900Smckusick rep->r_flags &= ~R_TIMING; 64741900Smckusick rep->r_nmp->nm_rtt = -1; 64841900Smckusick } 64941900Smckusick if (rep->r_flags & R_SENT) { 65041900Smckusick rep->r_flags &= ~R_SENT; 65141900Smckusick rep->r_nmp->nm_sent--; 65241900Smckusick } 65340117Smckusick break; 65438414Smckusick } 65541900Smckusick rep = rep->r_next; 65638414Smckusick } 65741900Smckusick nfs_sounlock(&nmp->nm_flag); 65841900Smckusick if (nam) 65941900Smckusick m_freem(nam); 66041900Smckusick /* 66141900Smckusick * If not matched to a request, drop it. 66241900Smckusick * If it's mine, get out. 66341900Smckusick */ 66441900Smckusick if (rep == &nfsreqh) { 66541900Smckusick nfsstats.rpcunexpected++; 66641900Smckusick m_freem(m); 66741900Smckusick } else if (rep == myrep) 66841900Smckusick return (0); 66938414Smckusick } 67038414Smckusick } 67138414Smckusick 67238414Smckusick /* 67338414Smckusick * nfs_request - goes something like this 67438414Smckusick * - fill in request struct 67538414Smckusick * - links it into list 67641900Smckusick * - calls nfs_send() for first transmit 67741900Smckusick * - calls nfs_receive() to get reply 67838414Smckusick * - break down rpc header and return with nfs reply pointed to 67938414Smckusick * by mrep or error 68038414Smckusick * nb: always frees up mreq mbuf list 68138414Smckusick */ 68241900Smckusick nfs_request(vp, mreq, xid, procnum, procp, mp, mrp, mdp, dposp) 68338414Smckusick struct vnode *vp; 68438414Smckusick struct mbuf *mreq; 68538414Smckusick u_long xid; 68641900Smckusick int procnum; 68741900Smckusick struct proc *procp; 68838414Smckusick struct mount *mp; 68938414Smckusick struct mbuf **mrp; 69038414Smckusick struct mbuf **mdp; 69138414Smckusick caddr_t *dposp; 69238414Smckusick { 69338414Smckusick register struct mbuf *m, *mrep; 69438414Smckusick register struct nfsreq *rep; 69538414Smckusick register u_long *p; 69638414Smckusick register int len; 69741900Smckusick struct nfsmount *nmp; 69838414Smckusick struct mbuf *md; 69939344Smckusick struct nfsreq *reph; 70038414Smckusick caddr_t dpos; 70138414Smckusick char *cp2; 70238414Smckusick int t1; 70338414Smckusick int s; 70441900Smckusick int error = 0; 70538414Smckusick 70641900Smckusick nmp = VFSTONFS(mp); 70738414Smckusick m = mreq; 70838414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70938414Smckusick rep->r_xid = xid; 71041900Smckusick rep->r_nmp = nmp; 71138414Smckusick rep->r_vp = vp; 71241900Smckusick rep->r_procp = procp; 71341900Smckusick if (nmp->nm_flag & NFSMNT_SOFT) 71441900Smckusick rep->r_retry = nmp->nm_retry; 71538414Smckusick else 71640117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71740117Smckusick rep->r_flags = rep->r_rexmit = 0; 71841900Smckusick /* 71941900Smckusick * Three cases: 72041900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 72141900Smckusick * - idempotent requests on SOCK_DGRAM use 0 72241900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 72341900Smckusick * Timeouts are still done on reliable transports to ensure detection 72441900Smckusick * of connection loss. 72541900Smckusick */ 72641900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 72741900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 72841900Smckusick else if (nonidempotent[procnum]) 72941900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 73041900Smckusick else 73141900Smckusick rep->r_timerinit = 0; 73241900Smckusick rep->r_timer = rep->r_timerinit; 73338414Smckusick rep->r_mrep = NULL; 73438414Smckusick len = 0; 73538414Smckusick while (m) { 73638414Smckusick len += m->m_len; 73738414Smckusick m = m->m_next; 73838414Smckusick } 73941900Smckusick mreq->m_pkthdr.len = len; 74041900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 74141900Smckusick /* 74241900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 74341900Smckusick */ 74441900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 74541900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 74641900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 74741900Smckusick } 74841900Smckusick rep->r_mreq = mreq; 74938414Smckusick 75040117Smckusick /* 75140117Smckusick * Do the client side RPC. 75240117Smckusick */ 75340117Smckusick nfsstats.rpcrequests++; 75441900Smckusick /* 75541900Smckusick * Chain request into list of outstanding requests. Be sure 75641900Smckusick * to put it LAST so timer finds oldest requests first. 75741900Smckusick */ 75840117Smckusick s = splnet(); 75939344Smckusick reph = &nfsreqh; 76041900Smckusick reph->r_prev->r_next = rep; 76141900Smckusick rep->r_prev = reph->r_prev; 76239344Smckusick reph->r_prev = rep; 76339344Smckusick rep->r_next = reph; 76440117Smckusick /* 76540117Smckusick * If backing off another request or avoiding congestion, don't 76640117Smckusick * send this one now but let timer do it. If not timing a request, 76740117Smckusick * do it now. 76840117Smckusick */ 76941900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 77041900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 77141900Smckusick nmp->nm_sent++; 77241900Smckusick rep->r_flags |= R_SENT; 77341900Smckusick if (nmp->nm_rtt == -1) { 77441900Smckusick nmp->nm_rtt = 0; 77541900Smckusick rep->r_flags |= R_TIMING; 77641900Smckusick } 77740117Smckusick splx(s); 77841900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 77941900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 78041900Smckusick nfs_solock(&nmp->nm_flag, 1); 78141900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 78241900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 78341900Smckusick nfs_sounlock(&nmp->nm_flag); 78441900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 78541900Smckusick nmp->nm_so->so_error = error = 0; 78641900Smckusick } else 78741900Smckusick splx(s); 78838414Smckusick 78938414Smckusick /* 79040117Smckusick * Wait for the reply from our send or the timer's. 79140117Smckusick */ 79241900Smckusick if (!error) 79341900Smckusick error = nfs_reply(nmp, rep); 79438414Smckusick 79540117Smckusick /* 79640117Smckusick * RPC done, unlink the request. 79740117Smckusick */ 79838414Smckusick s = splnet(); 79938414Smckusick rep->r_prev->r_next = rep->r_next; 80039344Smckusick rep->r_next->r_prev = rep->r_prev; 80138414Smckusick splx(s); 80241900Smckusick 80341900Smckusick /* 80441900Smckusick * If there was a successful reply and a tprintf msg. 80541900Smckusick * tprintf a response. 80641900Smckusick */ 80741900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 80841900Smckusick if (rep->r_procp) 80943061Smarc tprintf(rep->r_procp->p_session, 81041900Smckusick "Nfs server %s, is alive again\n", 81141900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81241900Smckusick else 81343061Smarc tprintf(NULL, "Nfs server %s, is alive again\n", 81441900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81541900Smckusick } 81638414Smckusick m_freem(rep->r_mreq); 81738414Smckusick mrep = md = rep->r_mrep; 81838414Smckusick FREE((caddr_t)rep, M_NFSREQ); 81938414Smckusick if (error) 82038414Smckusick return (error); 82138414Smckusick 82238414Smckusick /* 82338414Smckusick * break down the rpc header and check if ok 82438414Smckusick */ 82538414Smckusick dpos = mtod(md, caddr_t); 82638414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82738414Smckusick p += 2; 82838414Smckusick if (*p++ == rpc_msgdenied) { 82938414Smckusick if (*p == rpc_mismatch) 83038414Smckusick error = EOPNOTSUPP; 83138414Smckusick else 83238414Smckusick error = EACCES; 83338414Smckusick m_freem(mrep); 83438414Smckusick return (error); 83538414Smckusick } 83638414Smckusick /* 83738414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83838414Smckusick * for nfs_reqhead(), but for now just dump it 83938414Smckusick */ 84038414Smckusick if (*++p != 0) { 84138414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 84238414Smckusick nfsm_adv(len); 84338414Smckusick } 84438414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84538414Smckusick /* 0 == ok */ 84638414Smckusick if (*p == 0) { 84738414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84838414Smckusick if (*p != 0) { 84938414Smckusick error = fxdr_unsigned(int, *p); 85038414Smckusick m_freem(mrep); 85138414Smckusick return (error); 85238414Smckusick } 85338414Smckusick *mrp = mrep; 85438414Smckusick *mdp = md; 85538414Smckusick *dposp = dpos; 85638414Smckusick return (0); 85738414Smckusick } 85838414Smckusick m_freem(mrep); 85938414Smckusick return (EPROTONOSUPPORT); 86038414Smckusick nfsmout: 86138414Smckusick return (error); 86238414Smckusick } 86338414Smckusick 86438414Smckusick /* 86538414Smckusick * Get a request for the server main loop 86638414Smckusick * - receive a request via. nfs_soreceive() 86738414Smckusick * - verify it 86838414Smckusick * - fill in the cred struct. 86938414Smckusick */ 87042243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 87141900Smckusick lockp, msk, mtch) 87238414Smckusick struct socket *so; 87338414Smckusick u_long prog; 87438414Smckusick u_long vers; 87538414Smckusick int maxproc; 87638414Smckusick struct mbuf **nam; 87738414Smckusick struct mbuf **mrp; 87838414Smckusick struct mbuf **mdp; 87938414Smckusick caddr_t *dposp; 88038414Smckusick u_long *retxid; 88142243Smckusick u_long *procnum; 88238414Smckusick register struct ucred *cr; 88341900Smckusick int *lockp; 88441900Smckusick struct mbuf *msk, *mtch; 88538414Smckusick { 88638414Smckusick register int i; 88739494Smckusick register u_long *p; 88839494Smckusick register long t1; 88939494Smckusick caddr_t dpos, cp2; 89039494Smckusick int error = 0; 89139494Smckusick struct mbuf *mrep, *md; 89239494Smckusick int len; 89338414Smckusick 89441900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 89541900Smckusick nfs_solock(lockp, 0); 89641900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89741900Smckusick nfs_sounlock(lockp); 89841900Smckusick } else { 89941900Smckusick mrep = (struct mbuf *)0; 90041900Smckusick do { 90141900Smckusick if (mrep) { 90241900Smckusick m_freem(*nam); 90341900Smckusick m_freem(mrep); 90441900Smckusick } 90541900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90641900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 90741900Smckusick } 90841900Smckusick if (error) 90938414Smckusick return (error); 91038414Smckusick md = mrep; 91138414Smckusick dpos = mtod(mrep, caddr_t); 91238414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 91338414Smckusick *retxid = *p++; 91438414Smckusick if (*p++ != rpc_call) { 91538414Smckusick m_freem(mrep); 91638414Smckusick return (ERPCMISMATCH); 91738414Smckusick } 91838414Smckusick if (*p++ != rpc_vers) { 91938414Smckusick m_freem(mrep); 92038414Smckusick return (ERPCMISMATCH); 92138414Smckusick } 92238414Smckusick if (*p++ != prog) { 92338414Smckusick m_freem(mrep); 92438414Smckusick return (EPROGUNAVAIL); 92538414Smckusick } 92638414Smckusick if (*p++ != vers) { 92738414Smckusick m_freem(mrep); 92838414Smckusick return (EPROGMISMATCH); 92938414Smckusick } 93042243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 93142243Smckusick if (*procnum == NFSPROC_NULL) { 93238414Smckusick *mrp = mrep; 93338414Smckusick return (0); 93438414Smckusick } 93542243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 93638414Smckusick m_freem(mrep); 93738414Smckusick return (EPROCUNAVAIL); 93838414Smckusick } 93941900Smckusick len = fxdr_unsigned(int, *p++); 94041900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 94141900Smckusick m_freem(mrep); 94241900Smckusick return (EBADRPC); 94341900Smckusick } 94439494Smckusick len = fxdr_unsigned(int, *++p); 94541900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 94641900Smckusick m_freem(mrep); 94741900Smckusick return (EBADRPC); 94841900Smckusick } 94939494Smckusick nfsm_adv(nfsm_rndup(len)); 95038414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 95138414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 95238414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 95339494Smckusick len = fxdr_unsigned(int, *p); 95441900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 95538414Smckusick m_freem(mrep); 95638414Smckusick return (EBADRPC); 95738414Smckusick } 95839494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95939494Smckusick for (i = 1; i <= len; i++) 96041900Smckusick if (i < NGROUPS) 96141900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 96241900Smckusick else 96341900Smckusick p++; 96441900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 96538414Smckusick /* 96638414Smckusick * Do we have any use for the verifier. 96738414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96838414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96938414Smckusick * For now, just skip over it 97038414Smckusick */ 97139494Smckusick len = fxdr_unsigned(int, *++p); 97241900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 97341900Smckusick m_freem(mrep); 97441900Smckusick return (EBADRPC); 97541900Smckusick } 97639494Smckusick if (len > 0) 97739494Smckusick nfsm_adv(nfsm_rndup(len)); 97838414Smckusick *mrp = mrep; 97938414Smckusick *mdp = md; 98038414Smckusick *dposp = dpos; 98138414Smckusick return (0); 98238414Smckusick nfsmout: 98338414Smckusick return (error); 98438414Smckusick } 98538414Smckusick 98638414Smckusick /* 98738414Smckusick * Generate the rpc reply header 98838414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98938414Smckusick */ 99038414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 99138414Smckusick int siz; 99238414Smckusick u_long retxid; 99338414Smckusick int err; 99438414Smckusick struct mbuf **mrq; 99538414Smckusick struct mbuf **mbp; 99638414Smckusick caddr_t *bposp; 99738414Smckusick { 99839494Smckusick register u_long *p; 99939494Smckusick register long t1; 100039494Smckusick caddr_t bpos; 100139494Smckusick struct mbuf *mreq, *mb, *mb2; 100238414Smckusick 100338414Smckusick NFSMGETHDR(mreq); 100438414Smckusick mb = mreq; 100538414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 100641900Smckusick MCLGET(mreq, M_WAIT); 100738414Smckusick p = mtod(mreq, u_long *); 100838414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100938414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 101038414Smckusick *p++ = retxid; 101138414Smckusick *p++ = rpc_reply; 101238414Smckusick if (err == ERPCMISMATCH) { 101338414Smckusick *p++ = rpc_msgdenied; 101438414Smckusick *p++ = rpc_mismatch; 101538414Smckusick *p++ = txdr_unsigned(2); 101638414Smckusick *p = txdr_unsigned(2); 101738414Smckusick } else { 101838414Smckusick *p++ = rpc_msgaccepted; 101938414Smckusick *p++ = 0; 102038414Smckusick *p++ = 0; 102138414Smckusick switch (err) { 102238414Smckusick case EPROGUNAVAIL: 102338414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 102438414Smckusick break; 102538414Smckusick case EPROGMISMATCH: 102638414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102738414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102838414Smckusick *p++ = txdr_unsigned(2); 102938414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 103038414Smckusick break; 103138414Smckusick case EPROCUNAVAIL: 103238414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 103338414Smckusick break; 103438414Smckusick default: 103538414Smckusick *p = 0; 103638414Smckusick if (err != VNOVAL) { 103738414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103838414Smckusick *p = txdr_unsigned(err); 103938414Smckusick } 104038414Smckusick break; 104138414Smckusick }; 104238414Smckusick } 104338414Smckusick *mrq = mreq; 104438414Smckusick *mbp = mb; 104538414Smckusick *bposp = bpos; 104638414Smckusick if (err != 0 && err != VNOVAL) 104738414Smckusick nfsstats.srvrpc_errs++; 104838414Smckusick return (0); 104938414Smckusick } 105038414Smckusick 105138414Smckusick /* 105238414Smckusick * Nfs timer routine 105338414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 105438414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 105540117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 105638414Smckusick */ 105738414Smckusick nfs_timer() 105838414Smckusick { 105938414Smckusick register struct nfsreq *rep; 106038414Smckusick register struct mbuf *m; 106138414Smckusick register struct socket *so; 106241900Smckusick register struct nfsmount *nmp; 106340117Smckusick int s, error; 106438414Smckusick 106538414Smckusick s = splnet(); 106641900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 106741900Smckusick nmp = rep->r_nmp; 106841900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 106941900Smckusick (so = nmp->nm_so) == NULL) 107041900Smckusick continue; 107141900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 107241900Smckusick rep->r_flags |= R_SOFTTERM; 107341900Smckusick continue; 107441900Smckusick } 107540117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 107641900Smckusick nmp->nm_rtt++; 107741900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 107840117Smckusick continue; 107941900Smckusick /* If not timed out */ 108041900Smckusick if (++rep->r_timer < nmp->nm_rto) 108141900Smckusick continue; 108241900Smckusick #ifdef notdef 108341900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) { 108441900Smckusick rep->r_flags |= R_MUSTRESEND; 108541900Smckusick rep->r_timer = rep->r_timerinit; 108641900Smckusick continue; 108741900Smckusick } 108841900Smckusick #endif 108940117Smckusick /* Do backoff and save new timeout in mount */ 109040117Smckusick if (rep->r_flags & R_TIMING) { 109141900Smckusick nfs_backofftimer(nmp); 109240117Smckusick rep->r_flags &= ~R_TIMING; 109341900Smckusick nmp->nm_rtt = -1; 109440117Smckusick } 109540117Smckusick if (rep->r_flags & R_SENT) { 109640117Smckusick rep->r_flags &= ~R_SENT; 109741900Smckusick nmp->nm_sent--; 109840117Smckusick } 109941900Smckusick 110041900Smckusick /* 110141900Smckusick * Check for too many retries on soft mount. 110241900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 110341900Smckusick */ 110441900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 110540117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 110640117Smckusick 110741900Smckusick /* 110841900Smckusick * Check for server not responding 110941900Smckusick */ 111041900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 111141900Smckusick rep->r_rexmit > 8) { 111241900Smckusick if (rep->r_procp && rep->r_procp->p_session) 111343061Smarc tprintf(rep->r_procp->p_session, 111441900Smckusick "Nfs server %s, not responding\n", 111541900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 111641900Smckusick else 111743061Smarc tprintf(NULL, 111841900Smckusick "Nfs server %s, not responding\n", 111941900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 112041900Smckusick rep->r_flags |= R_TPRINTFMSG; 112141900Smckusick } 112241900Smckusick if (rep->r_rexmit > rep->r_retry) { /* too many */ 112341900Smckusick nfsstats.rpctimeouts++; 112441900Smckusick rep->r_flags |= R_SOFTTERM; 112541900Smckusick continue; 112641900Smckusick } 112741900Smckusick 112841900Smckusick /* 112941900Smckusick * If there is enough space and the window allows.. 113041900Smckusick * Resend it 113141900Smckusick */ 113241900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 113341900Smckusick nmp->nm_sent < nmp->nm_window && 113441900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 113541900Smckusick nfsstats.rpcretries++; 113641900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 113741900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113841900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113941900Smckusick else 114041900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 114141900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 114241900Smckusick if (error) { 114341900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 114441900Smckusick so->so_error = 0; 114541900Smckusick } else { 114641900Smckusick /* 114741900Smckusick * We need to time the request even though we 114841900Smckusick * are retransmitting. 114941900Smckusick */ 115041900Smckusick nmp->nm_rtt = 0; 115141900Smckusick nmp->nm_sent++; 115241900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 115341900Smckusick rep->r_timer = rep->r_timerinit; 115441900Smckusick } 115541900Smckusick } 115640117Smckusick } 115740117Smckusick splx(s); 115840117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115940117Smckusick } 116040117Smckusick 116140117Smckusick /* 116240117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 116340117Smckusick * used here. The timer state is held in the nfsmount structure and 116440117Smckusick * a single request is used to clock the response. When successful 116540117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 116640117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116740117Smckusick * routines. 116840117Smckusick * 116940117Smckusick * Congestion variables are held in the nfshost structure which 117040117Smckusick * is referenced by nfsmounts and shared per-server. This separation 117140117Smckusick * makes it possible to do per-mount timing which allows varying disk 117240117Smckusick * access times to be dealt with, while preserving a network oriented 117340117Smckusick * congestion control scheme. 117440117Smckusick * 117540117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 117640117Smckusick * with adjusted scaling factors. We start with one request, then send 117740117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117840117Smckusick * we increment at a rate proportional to the window. On failure, we 117940117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 118040117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 118140117Smckusick * for now. 118240117Smckusick * 118340117Smckusick * NFS behaves much more like a transport protocol with these changes, 118440117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 118540117Smckusick * implementations. 118640117Smckusick * 118740117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118840117Smckusick */ 118940117Smckusick 119040117Smckusick /* 119140117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 119240117Smckusick * responds only after performing lookups/diskio/etc, we have to be 119340117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 119441900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 119540117Smckusick */ 119641900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119740117Smckusick 119841900Smckusick nfs_updatetimer(nmp) 119941900Smckusick register struct nfsmount *nmp; 120040117Smckusick { 120140117Smckusick 120240117Smckusick /* If retransmitted, clear and return */ 120341900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 120441900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 120540117Smckusick return; 120640117Smckusick } 120740117Smckusick /* If have a measurement, do smoothing */ 120841900Smckusick if (nmp->nm_srtt) { 120940117Smckusick register short delta; 121041900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 121141900Smckusick if ((nmp->nm_srtt += delta) <= 0) 121241900Smckusick nmp->nm_srtt = 1; 121340117Smckusick if (delta < 0) 121440117Smckusick delta = -delta; 121541900Smckusick delta -= (nmp->nm_rttvar >> 2); 121641900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 121741900Smckusick nmp->nm_rttvar = 1; 121840117Smckusick /* Else initialize */ 121940117Smckusick } else { 122041900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 122141900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 122241900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 122340117Smckusick } 122440117Smckusick /* Compute new Retransmission TimeOut and clip */ 122541900Smckusick nmp->nm_rto = NFS_RTO(nmp); 122641900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 122741900Smckusick nmp->nm_rto = NFS_MINTIMEO; 122841900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122941900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 123040117Smckusick 123140117Smckusick /* Update window estimate */ 123241900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 123341900Smckusick nmp->nm_window += 4; 123440117Smckusick else { /* slowly */ 123541900Smckusick register long incr = ++nmp->nm_winext; 123641900Smckusick incr = (incr * incr) / nmp->nm_window; 123740117Smckusick if (incr > 0) { 123841900Smckusick nmp->nm_winext = 0; 123941900Smckusick ++nmp->nm_window; 124040117Smckusick } 124140117Smckusick } 124241900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 124341900Smckusick nmp->nm_window = NFS_MAXWINDOW; 124440117Smckusick } 124540117Smckusick 124641900Smckusick nfs_backofftimer(nmp) 124741900Smckusick register struct nfsmount *nmp; 124840117Smckusick { 124940117Smckusick register unsigned long newrto; 125040117Smckusick 125140117Smckusick /* Clip shift count */ 125241900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 125341900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 125440117Smckusick /* Back off RTO exponentially */ 125541900Smckusick newrto = NFS_RTO(nmp); 125641900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125740117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125840117Smckusick newrto = NFS_MAXTIMEO; 125941900Smckusick nmp->nm_rto = newrto; 126040117Smckusick 126140117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 126241900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 126341900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 126441900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 126541900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 126641900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 126741900Smckusick nmp->nm_srtt = 0; 126838414Smckusick } 126938414Smckusick } 127038414Smckusick } 127140117Smckusick /* Close down window but remember this point (3/4 current) for later */ 127241900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 127341900Smckusick nmp->nm_window = 1; 127441900Smckusick nmp->nm_winext = 0; 127538414Smckusick } 127638414Smckusick 127738414Smckusick /* 127841900Smckusick * Test for a termination signal pending on procp. 127941900Smckusick * This is used for NFSMNT_INT mounts. 128038414Smckusick */ 128141900Smckusick nfs_sigintr(p) 128241900Smckusick register struct proc *p; 128341900Smckusick { 128441900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 128541900Smckusick NFSINT_SIGMASK)) 128641900Smckusick return (1); 128741900Smckusick else 128841900Smckusick return (0); 128941900Smckusick } 129040117Smckusick 129141900Smckusick /* 129241900Smckusick * Lock a socket against others. 129341900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 129441900Smckusick * and also to avoid race conditions between the processes with nfs requests 129541900Smckusick * in progress when a reconnect is necessary. 129641900Smckusick */ 129741900Smckusick nfs_solock(flagp, cant_intr) 129841900Smckusick int *flagp; 129941900Smckusick int cant_intr; 130038414Smckusick { 130140117Smckusick 130241900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 130341900Smckusick *flagp |= NFSMNT_WANTSCK; 130441900Smckusick if (cant_intr) 130541900Smckusick (void) sleep((caddr_t)flagp, PZERO-7); 130641900Smckusick else 130741900Smckusick (void) tsleep((caddr_t)flagp, PZERO+1, "nfssolck", 0); 130840117Smckusick } 130941900Smckusick *flagp |= NFSMNT_SCKLOCK; 131041900Smckusick } 131140117Smckusick 131241900Smckusick /* 131341900Smckusick * Unlock the stream socket for others. 131441900Smckusick */ 131541900Smckusick nfs_sounlock(flagp) 131641900Smckusick int *flagp; 131741900Smckusick { 131841900Smckusick 131941900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 132041900Smckusick panic("nfs sounlock"); 132141900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 132241900Smckusick if (*flagp & NFSMNT_WANTSCK) { 132341900Smckusick *flagp &= ~NFSMNT_WANTSCK; 132441900Smckusick wakeup((caddr_t)flagp); 132540117Smckusick } 132638414Smckusick } 132741900Smckusick 132841900Smckusick /* 132941900Smckusick * This function compares two net addresses by family and returns TRUE 133041900Smckusick * if they are the same. 133141900Smckusick * If there is any doubt, return FALSE. 133241900Smckusick */ 133341900Smckusick nfs_netaddr_match(nam1, nam2) 133441900Smckusick struct mbuf *nam1, *nam2; 133541900Smckusick { 133641900Smckusick register struct sockaddr *saddr1, *saddr2; 133741900Smckusick 133841900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 133941900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 134041900Smckusick if (saddr1->sa_family != saddr2->sa_family) 134141900Smckusick return (0); 134241900Smckusick 134341900Smckusick /* 134441900Smckusick * Must do each address family separately since unused fields 134541900Smckusick * are undefined values and not always zeroed. 134641900Smckusick */ 134741900Smckusick switch (saddr1->sa_family) { 134841900Smckusick case AF_INET: 134941900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 135041900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 135141900Smckusick return (1); 135241900Smckusick break; 135341900Smckusick default: 135441900Smckusick break; 135541900Smckusick }; 135641900Smckusick return (0); 135741900Smckusick } 135841900Smckusick 135941900Smckusick /* 136041900Smckusick * Check the hostname fields for nfsd's mask and match fields. 136141900Smckusick * By address family: 136241900Smckusick * - Bitwise AND the mask with the host address field 136341900Smckusick * - Compare for == with match 136441900Smckusick * return TRUE if not equal 136541900Smckusick */ 136641900Smckusick nfs_badnam(nam, msk, mtch) 136741900Smckusick register struct mbuf *nam, *msk, *mtch; 136841900Smckusick { 136941900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 137041900Smckusick case AF_INET: 137141900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 137241900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 137341900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 137441900Smckusick default: 137541900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 137641900Smckusick return (0); 137741900Smckusick }; 137841900Smckusick } 1379