138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 838414Smckusick * Redistribution and use in source and binary forms are permitted 938414Smckusick * provided that the above copyright notice and this paragraph are 1038414Smckusick * duplicated in all such forms and that any documentation, 1138414Smckusick * advertising materials, and other materials related to such 1238414Smckusick * distribution and use acknowledge that the software was developed 1338414Smckusick * by the University of California, Berkeley. The name of the 1438414Smckusick * University may not be used to endorse or promote products derived 1538414Smckusick * from this software without specific prior written permission. 1638414Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1738414Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1838414Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1938414Smckusick * 20*42243Smckusick * @(#)nfs_socket.c 7.13 (Berkeley) 05/18/90 2138414Smckusick */ 2238414Smckusick 2338414Smckusick /* 2441900Smckusick * Socket operations for use by nfs 2538414Smckusick */ 2638414Smckusick 2738414Smckusick #include "types.h" 2838414Smckusick #include "param.h" 2938414Smckusick #include "uio.h" 3038414Smckusick #include "user.h" 3140117Smckusick #include "proc.h" 3240117Smckusick #include "signal.h" 3338414Smckusick #include "mount.h" 3438414Smckusick #include "kernel.h" 3538414Smckusick #include "malloc.h" 3638414Smckusick #include "mbuf.h" 3738414Smckusick #include "vnode.h" 3838414Smckusick #include "domain.h" 3938414Smckusick #include "protosw.h" 4038414Smckusick #include "socket.h" 4138414Smckusick #include "socketvar.h" 4241900Smckusick #include "netinet/in.h" 4341900Smckusick #include "netinet/tcp.h" 4438414Smckusick #include "rpcv2.h" 4538414Smckusick #include "nfsv2.h" 4638414Smckusick #include "nfs.h" 4738414Smckusick #include "xdr_subs.h" 4838414Smckusick #include "nfsm_subs.h" 4938414Smckusick #include "nfsmount.h" 5038414Smckusick 5140117Smckusick #include "syslog.h" 5240117Smckusick 5338414Smckusick #define TRUE 1 5438414Smckusick 5540117Smckusick /* 5638414Smckusick * External data, mostly RPC constants in XDR form 5738414Smckusick */ 5838414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 5938414Smckusick rpc_msgaccepted, rpc_call; 6038414Smckusick extern u_long nfs_prog, nfs_vers; 6141900Smckusick extern int nonidempotent[NFS_NPROCS]; 6241900Smckusick int nfs_sbwait(); 6341900Smckusick void nfs_disconnect(); 6441900Smckusick 6538414Smckusick int nfsrv_null(), 6638414Smckusick nfsrv_getattr(), 6738414Smckusick nfsrv_setattr(), 6838414Smckusick nfsrv_lookup(), 6938414Smckusick nfsrv_readlink(), 7038414Smckusick nfsrv_read(), 7138414Smckusick nfsrv_write(), 7238414Smckusick nfsrv_create(), 7338414Smckusick nfsrv_remove(), 7438414Smckusick nfsrv_rename(), 7538414Smckusick nfsrv_link(), 7638414Smckusick nfsrv_symlink(), 7738414Smckusick nfsrv_mkdir(), 7838414Smckusick nfsrv_rmdir(), 7938414Smckusick nfsrv_readdir(), 8038414Smckusick nfsrv_statfs(), 8138414Smckusick nfsrv_noop(); 8238414Smckusick 8338414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 8438414Smckusick nfsrv_null, 8538414Smckusick nfsrv_getattr, 8638414Smckusick nfsrv_setattr, 8738414Smckusick nfsrv_noop, 8838414Smckusick nfsrv_lookup, 8938414Smckusick nfsrv_readlink, 9038414Smckusick nfsrv_read, 9138414Smckusick nfsrv_noop, 9238414Smckusick nfsrv_write, 9338414Smckusick nfsrv_create, 9438414Smckusick nfsrv_remove, 9538414Smckusick nfsrv_rename, 9638414Smckusick nfsrv_link, 9738414Smckusick nfsrv_symlink, 9838414Smckusick nfsrv_mkdir, 9938414Smckusick nfsrv_rmdir, 10038414Smckusick nfsrv_readdir, 10138414Smckusick nfsrv_statfs, 10238414Smckusick }; 10338414Smckusick 10440117Smckusick struct nfsreq nfsreqh; 10540117Smckusick int nfsrexmtthresh = NFS_FISHY; 10641900Smckusick int nfs_tcpnodelay = 1; 10738414Smckusick 10838414Smckusick /* 10941900Smckusick * Initialize sockets and congestion for a new NFS connection. 11040117Smckusick * We do not free the sockaddr if error. 11138414Smckusick */ 11241900Smckusick nfs_connect(nmp) 11340117Smckusick register struct nfsmount *nmp; 11440117Smckusick { 11541900Smckusick register struct socket *so; 11641900Smckusick int s, error; 11740117Smckusick struct mbuf *m; 11840117Smckusick 11941900Smckusick nmp->nm_so = (struct socket *)0; 12041900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 12141900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 12240117Smckusick goto bad; 12341900Smckusick so = nmp->nm_so; 12441900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 12540117Smckusick 12641900Smckusick /* 12741900Smckusick * Protocols that do not require connections may be optionally left 12841900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 12941900Smckusick */ 13041900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 13141900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 13241900Smckusick error = ENOTCONN; 13340117Smckusick goto bad; 13440117Smckusick } 13541900Smckusick } else { 13641900Smckusick if (error = soconnect(so, nmp->nm_nam)) 13740117Smckusick goto bad; 13841900Smckusick 13941900Smckusick /* 14041900Smckusick * Wait for the connection to complete. Cribbed from the 14141900Smckusick * connect system call but with the wait at negative prio. 14241900Smckusick */ 14341900Smckusick s = splnet(); 14441900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 14541900Smckusick sleep((caddr_t)&so->so_timeo, PZERO-2); 14641900Smckusick splx(s); 14741900Smckusick if (so->so_error) { 14841900Smckusick error = so->so_error; 14941900Smckusick goto bad; 15041900Smckusick } 15140117Smckusick } 15241900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 15341900Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 15441900Smckusick so->so_rcv.sb_timeo = (5 * hz); 15541900Smckusick so->so_snd.sb_timeo = (5 * hz); 15641900Smckusick } else { 15741900Smckusick so->so_rcv.sb_timeo = 0; 15841900Smckusick so->so_snd.sb_timeo = 0; 15941900Smckusick } 16041900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 16141900Smckusick (nmp->nm_rsize + NFS_MAXPKTHDR) * 4)) 16241900Smckusick goto bad; 16341900Smckusick } else { 16441900Smckusick if (nmp->nm_flag & NFSMNT_INT) { 16541900Smckusick so->so_rcv.sb_timeo = (5 * hz); 16641900Smckusick so->so_snd.sb_timeo = (5 * hz); 16741900Smckusick } else { 16841900Smckusick so->so_rcv.sb_timeo = 0; 16941900Smckusick so->so_snd.sb_timeo = 0; 17041900Smckusick } 17141900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 17241900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 17341900Smckusick *mtod(m, int *) = 1; 17441900Smckusick m->m_len = sizeof(int); 17541900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 17641900Smckusick } 17741900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 17841900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 17941900Smckusick nfs_tcpnodelay) { 18041900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 18141900Smckusick *mtod(m, int *) = 1; 18241900Smckusick m->m_len = sizeof(int); 18341900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 18441900Smckusick } 18541900Smckusick if (error = soreserve(so, 18641900Smckusick (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)) * 2, 18741900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 18841900Smckusick goto bad; 18941900Smckusick } 19041900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 19141900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 19240117Smckusick 19341900Smckusick /* Initialize other non-zero congestion variables */ 19441900Smckusick nmp->nm_rto = NFS_TIMEO; 19541900Smckusick nmp->nm_window = 2; /* Initial send window */ 19641900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 19741900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 19841900Smckusick nmp->nm_sent = 0; 19941900Smckusick nmp->nm_currexmit = 0; 20041900Smckusick return (0); 20140117Smckusick 20241900Smckusick bad: 20341900Smckusick nfs_disconnect(nmp); 20441900Smckusick return (error); 20541900Smckusick } 20640117Smckusick 20741900Smckusick /* 20841900Smckusick * Reconnect routine: 20941900Smckusick * Called when a connection is broken on a reliable protocol. 21041900Smckusick * - clean up the old socket 21141900Smckusick * - nfs_connect() again 21241900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 21341900Smckusick * If this fails the mount point is DEAD! 21441900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 21541900Smckusick */ 21641900Smckusick nfs_reconnect(rep, nmp) 21741900Smckusick register struct nfsreq *rep; 21841900Smckusick register struct nfsmount *nmp; 21941900Smckusick { 22041900Smckusick register struct nfsreq *rp; 22141900Smckusick int error; 22240117Smckusick 22341900Smckusick if (rep->r_procp) 22441900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 22541900Smckusick "Nfs server %s, trying reconnect\n", 22641900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 22741900Smckusick else 22841900Smckusick tprintf(NULLVP, "Nfs server %s, trying a reconnect\n", 22941900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 23041900Smckusick while (error = nfs_connect(nmp)) { 231*42243Smckusick #ifdef lint 232*42243Smckusick error = error; 233*42243Smckusick #endif /* lint */ 23441900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 23541900Smckusick return (EINTR); 23641900Smckusick tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 23740117Smckusick } 23841900Smckusick if (rep->r_procp) 23941900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 24041900Smckusick "Nfs server %s, reconnected\n", 24141900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24241900Smckusick else 24341900Smckusick tprintf(NULLVP, "Nfs server %s, reconnected\n", 24441900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 24541900Smckusick 24641900Smckusick /* 24741900Smckusick * Loop through outstanding request list and fix up all requests 24841900Smckusick * on old socket. 24941900Smckusick */ 25041900Smckusick rp = nfsreqh.r_next; 25141900Smckusick while (rp != &nfsreqh) { 25241900Smckusick if (rp->r_nmp == nmp) 25341900Smckusick rp->r_flags |= R_MUSTRESEND; 25441900Smckusick rp = rp->r_next; 25540117Smckusick } 25640117Smckusick return (0); 25740117Smckusick } 25840117Smckusick 25940117Smckusick /* 26040117Smckusick * NFS disconnect. Clean up and unlink. 26140117Smckusick */ 26241900Smckusick void 26340117Smckusick nfs_disconnect(nmp) 26440117Smckusick register struct nfsmount *nmp; 26540117Smckusick { 26641900Smckusick register struct socket *so; 26740117Smckusick 26841900Smckusick if (nmp->nm_so) { 26941900Smckusick so = nmp->nm_so; 27041900Smckusick nmp->nm_so = (struct socket *)0; 27141900Smckusick soshutdown(so, 2); 27241900Smckusick soclose(so); 27340117Smckusick } 27440117Smckusick } 27540117Smckusick 27640117Smckusick /* 27741900Smckusick * This is the nfs send routine. For connection based socket types, it 27841900Smckusick * must be called with an nfs_solock() on the socket. 27941900Smckusick * "rep == NULL" indicates that it has been called from a server. 28040117Smckusick */ 28141900Smckusick nfs_send(so, nam, top, rep) 28238414Smckusick register struct socket *so; 28338414Smckusick struct mbuf *nam; 28441900Smckusick register struct mbuf *top; 28541900Smckusick struct nfsreq *rep; 28638414Smckusick { 28741900Smckusick struct mbuf *sendnam; 28841900Smckusick int error, soflags; 28938414Smckusick 29041900Smckusick if (rep) { 29141900Smckusick if (rep->r_flags & R_SOFTTERM) { 29240117Smckusick m_freem(top); 29341900Smckusick return (EINTR); 29440117Smckusick } 29541900Smckusick if ((so = rep->r_nmp->nm_so) == NULL && 29641900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 29741900Smckusick return (error); 29841900Smckusick rep->r_flags &= ~R_MUSTRESEND; 29941900Smckusick soflags = rep->r_nmp->nm_soflags; 30041900Smckusick } else 30141900Smckusick soflags = so->so_proto->pr_flags; 30241900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 30341900Smckusick sendnam = (struct mbuf *)0; 30441900Smckusick else 30541900Smckusick sendnam = nam; 30641900Smckusick 30741900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 30841900Smckusick (struct mbuf *)0, 0); 30941900Smckusick if (error == EWOULDBLOCK && rep) { 31041900Smckusick if (rep->r_flags & R_SOFTTERM) 31141900Smckusick error = EINTR; 31241900Smckusick else { 31341900Smckusick rep->r_flags |= R_MUSTRESEND; 31441900Smckusick error = 0; 31540117Smckusick } 31638414Smckusick } 31741900Smckusick /* 31841900Smckusick * Ignore socket errors?? 31941900Smckusick */ 32041900Smckusick if (error && error != EINTR && error != ERESTART) 32141900Smckusick error = 0; 32238414Smckusick return (error); 32338414Smckusick } 32438414Smckusick 32538414Smckusick /* 32641900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 32741900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 32841900Smckusick * Mark and consolidate the data into a new mbuf list. 32941900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 33041900Smckusick * small mbufs. 33141900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 33241900Smckusick * we have read any of it, even if the system call has been interrupted. 33338414Smckusick */ 33441900Smckusick nfs_receive(so, aname, mp, rep) 33538414Smckusick register struct socket *so; 33638414Smckusick struct mbuf **aname; 33738414Smckusick struct mbuf **mp; 33841900Smckusick register struct nfsreq *rep; 33938414Smckusick { 34041900Smckusick struct uio auio; 34141900Smckusick struct iovec aio; 34238414Smckusick register struct mbuf *m; 34341900Smckusick struct mbuf *m2, *m3, *mnew, **mbp; 34441900Smckusick caddr_t fcp, tcp; 34541900Smckusick u_long len; 34641900Smckusick struct mbuf **getnam; 34741900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 34838414Smckusick 34941900Smckusick /* 35041900Smckusick * Set up arguments for soreceive() 35141900Smckusick */ 35241900Smckusick *mp = (struct mbuf *)0; 35341900Smckusick *aname = (struct mbuf *)0; 35441900Smckusick if (rep) 35541900Smckusick soflags = rep->r_nmp->nm_soflags; 35641900Smckusick else 35741900Smckusick soflags = so->so_proto->pr_flags; 35838414Smckusick 35941900Smckusick /* 36041900Smckusick * For reliable protocols, lock against other senders/receivers 36141900Smckusick * in case a reconnect is necessary. 36241900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 36341900Smckusick * more there is to get. 36441900Smckusick * We must lock the socket against other receivers 36541900Smckusick * until we have an entire rpc request/reply. 36641900Smckusick */ 36741900Smckusick if (soflags & PR_CONNREQUIRED) { 36841900Smckusick tryagain: 36941900Smckusick /* 37041900Smckusick * Check for fatal errors and resending request. 37141900Smckusick */ 37241900Smckusick if (rep) { 37341900Smckusick /* 37441900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 37541900Smckusick * would have changed. NULL indicates a failed 37641900Smckusick * attempt that has essentially shut down this 37741900Smckusick * mount point. 37841900Smckusick */ 37941900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 38041900Smckusick (rep->r_flags & R_SOFTTERM)) 38141900Smckusick return (EINTR); 38241900Smckusick while (rep->r_flags & R_MUSTRESEND) { 38341900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 38441900Smckusick nfsstats.rpcretries++; 38541900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 38641900Smckusick rep)) 38741900Smckusick goto errout; 38840117Smckusick } 38941900Smckusick } 39041900Smckusick if ((soflags & PR_ATOMIC) == 0) { 39141900Smckusick aio.iov_base = (caddr_t) &len; 39241900Smckusick aio.iov_len = sizeof(u_long); 39341900Smckusick auio.uio_iov = &aio; 39441900Smckusick auio.uio_iovcnt = 1; 39541900Smckusick auio.uio_segflg = UIO_SYSSPACE; 39641900Smckusick auio.uio_rw = UIO_READ; 39741900Smckusick auio.uio_offset = 0; 39841900Smckusick auio.uio_resid = sizeof(u_long); 39941900Smckusick do { 40041900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 40141900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 40241900Smckusick if (error == EWOULDBLOCK && rep) { 40341900Smckusick if (rep->r_flags & R_SOFTTERM) 40441900Smckusick return (EINTR); 40541900Smckusick if (rep->r_flags & R_MUSTRESEND) 40641900Smckusick goto tryagain; 40741900Smckusick } 40841900Smckusick } while (error == EWOULDBLOCK); 40941900Smckusick if (!error && auio.uio_resid > 0) 41041900Smckusick error = EPIPE; 41140761Skarels if (error) 41241900Smckusick goto errout; 41341900Smckusick len = ntohl(len) & ~0x80000000; 41441900Smckusick /* 41541900Smckusick * This is SERIOUS! We are out of sync with the sender 41641900Smckusick * and forcing a disconnect/reconnect is all I can do. 41741900Smckusick */ 41841900Smckusick if (len > NFS_MAXPACKET) { 41941900Smckusick error = EFBIG; 42041900Smckusick goto errout; 42141900Smckusick } 42241900Smckusick auio.uio_resid = len; 42341900Smckusick do { 42441900Smckusick error = soreceive(so, (struct mbuf **)0, 42541900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 42641900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 42741900Smckusick error == ERESTART); 42841900Smckusick if (!error && auio.uio_resid > 0) 42941900Smckusick error = EPIPE; 43040117Smckusick } else { 43141900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 43241900Smckusick do { 43341900Smckusick error = soreceive(so, (struct mbuf **)0, 43441900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 43541900Smckusick if (error == EWOULDBLOCK && rep) { 43641900Smckusick if (rep->r_flags & R_SOFTTERM) 43741900Smckusick return (EINTR); 43841900Smckusick if (rep->r_flags & R_MUSTRESEND) 43941900Smckusick goto tryagain; 44041900Smckusick } 44141900Smckusick } while (error == EWOULDBLOCK); 44241900Smckusick if (!error && *mp == NULL) 44341900Smckusick error = EPIPE; 44441900Smckusick len -= auio.uio_resid; 44540117Smckusick } 44641900Smckusick errout: 44741900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 44841900Smckusick m_freem(*mp); 44941900Smckusick *mp = (struct mbuf *)0; 45041900Smckusick nfs_disconnect(rep->r_nmp); 45141900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 45241900Smckusick if (!error) 45341900Smckusick goto tryagain; 45440117Smckusick } 45541900Smckusick } else { 45641900Smckusick if (so->so_state & SS_ISCONNECTED) 45741900Smckusick getnam = (struct mbuf **)0; 45841900Smckusick else 45941900Smckusick getnam = aname; 46041900Smckusick auio.uio_resid = len = 1000000; 46141900Smckusick do { 46241900Smckusick error = soreceive(so, getnam, &auio, mp, 46341900Smckusick (struct mbuf **)0, &rcvflg); 46441900Smckusick if (error == EWOULDBLOCK && rep && 46541900Smckusick (rep->r_flags & R_SOFTTERM)) 46641900Smckusick return (EINTR); 46741900Smckusick } while (error == EWOULDBLOCK); 46841900Smckusick len -= auio.uio_resid; 46941900Smckusick } 47041900Smckusick if (error) { 47141900Smckusick m_freem(*mp); 47241900Smckusick *mp = (struct mbuf *)0; 47341900Smckusick } 47441900Smckusick /* 47541900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 47641900Smckusick * These could cause pointer alignment problems, so copy them to 47741900Smckusick * well aligned mbufs. 47841900Smckusick */ 47941900Smckusick m = *mp; 48041900Smckusick mbp = mp; 48141900Smckusick while (m) { 48241900Smckusick /* 48341900Smckusick * All this for something that may never happen. 48441900Smckusick */ 48541900Smckusick if (m->m_len & 0x3) { 48641900Smckusick printf("nfs_rcv odd length!\n"); 48741900Smckusick fcp = mtod(m, caddr_t); 48841900Smckusick mnew = m2 = (struct mbuf *)0; 489*42243Smckusick #ifdef lint 490*42243Smckusick m3 = (struct mbuf *)0; 491*42243Smckusick mlen = 0; 492*42243Smckusick #endif /* lint */ 49341900Smckusick while (m) { 49441900Smckusick if (m2 == NULL || mlen == 0) { 49541900Smckusick MGET(m2, M_WAIT, MT_DATA); 49641900Smckusick if (len > MINCLSIZE) 49741900Smckusick MCLGET(m2, M_WAIT); 49841900Smckusick m2->m_len = 0; 49941900Smckusick mlen = M_TRAILINGSPACE(m2); 50041900Smckusick tcp = mtod(m2, caddr_t); 50141900Smckusick if (mnew) { 50241900Smckusick m3->m_next = m2; 50341900Smckusick m3 = m2; 50441900Smckusick } else 50541900Smckusick mnew = m3 = m2; 50641900Smckusick } 50741900Smckusick siz = (mlen > m->m_len) ? m->m_len : mlen; 50841900Smckusick bcopy(fcp, tcp, siz); 50941900Smckusick m2->m_len += siz; 51041900Smckusick mlen -= siz; 51141900Smckusick len -= siz; 51241900Smckusick tcp += siz; 51341900Smckusick m->m_len -= siz; 51441900Smckusick fcp += siz; 51541900Smckusick if (m->m_len == 0) { 51641900Smckusick do { 51741900Smckusick m = m->m_next; 51841900Smckusick } while (m && m->m_len == 0); 51941900Smckusick if (m) 52041900Smckusick fcp = mtod(m, caddr_t); 52141900Smckusick } 52241900Smckusick } 52341900Smckusick m = *mbp; 52441900Smckusick *mbp = mnew; 52541900Smckusick m_freem(m); 52641900Smckusick break; 52740117Smckusick } 52841900Smckusick len -= m->m_len; 52941900Smckusick mbp = &m->m_next; 53041900Smckusick m = m->m_next; 53138414Smckusick } 53238414Smckusick return (error); 53338414Smckusick } 53438414Smckusick 53538414Smckusick struct rpc_replyhead { 53638414Smckusick u_long r_xid; 53738414Smckusick u_long r_rep; 53838414Smckusick }; 53938414Smckusick 54038414Smckusick /* 54141900Smckusick * Implement receipt of reply on a socket. 54238414Smckusick * We must search through the list of received datagrams matching them 54338414Smckusick * with outstanding requests using the xid, until ours is found. 54438414Smckusick */ 54541900Smckusick /* ARGSUSED */ 54641900Smckusick nfs_reply(nmp, myrep) 54741900Smckusick struct nfsmount *nmp; 54839344Smckusick struct nfsreq *myrep; 54938414Smckusick { 55038414Smckusick register struct mbuf *m; 55138414Smckusick register struct nfsreq *rep; 55241900Smckusick register int error = 0; 55338414Smckusick struct rpc_replyhead replyh; 55441900Smckusick struct mbuf *mp, *nam; 55541900Smckusick char *cp; 55641900Smckusick int cnt, xfer; 55738414Smckusick 55841900Smckusick /* 55941900Smckusick * Loop around until we get our own reply 56041900Smckusick */ 56141900Smckusick for (;;) { 56241900Smckusick /* 56341900Smckusick * Lock against other receivers so that I don't get stuck in 56441900Smckusick * sbwait() after someone else has received my reply for me. 56541900Smckusick * Also necessary for connection based protocols to avoid 56641900Smckusick * race conditions during a reconnect. 56741900Smckusick */ 56841900Smckusick nfs_solock(&nmp->nm_flag, 1); 56941900Smckusick /* Already received, bye bye */ 57041900Smckusick if (myrep->r_mrep != NULL) { 57141900Smckusick nfs_sounlock(&nmp->nm_flag); 57241900Smckusick return (0); 57340117Smckusick } 57441900Smckusick /* 57541900Smckusick * Get the next Rpc reply off the socket 57641900Smckusick */ 57741900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 57841900Smckusick nfs_sounlock(&nmp->nm_flag); 57938414Smckusick 58041900Smckusick /* 58141900Smckusick * Ignore routing errors on connectionless protocols?? 58241900Smckusick */ 58341900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 58441900Smckusick nmp->nm_so->so_error = 0; 58541900Smckusick continue; 58641900Smckusick } 58741900Smckusick 58841900Smckusick /* 58941900Smckusick * Otherwise cleanup and return a fatal error. 59041900Smckusick */ 59141900Smckusick if (myrep->r_flags & R_TIMING) { 59241900Smckusick myrep->r_flags &= ~R_TIMING; 59341900Smckusick nmp->nm_rtt = -1; 59441900Smckusick } 59541900Smckusick if (myrep->r_flags & R_SENT) { 59641900Smckusick myrep->r_flags &= ~R_SENT; 59741900Smckusick nmp->nm_sent--; 59841900Smckusick } 59941900Smckusick return (error); 60038414Smckusick } 60141900Smckusick 60241900Smckusick /* 60341900Smckusick * Get the xid and check that it is an rpc reply 60441900Smckusick */ 60541900Smckusick m = mp; 60641900Smckusick if (m->m_len >= 2*NFSX_UNSIGNED) 60741900Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 60841900Smckusick 2*NFSX_UNSIGNED); 60941900Smckusick else { 61041900Smckusick cnt = 2*NFSX_UNSIGNED; 61141900Smckusick cp = (caddr_t)&replyh; 61241900Smckusick while (m && cnt > 0) { 61341900Smckusick if (m->m_len > 0) { 61441900Smckusick xfer = (m->m_len >= cnt) ? cnt : 61541900Smckusick m->m_len; 61641900Smckusick bcopy(mtod(m, caddr_t), cp, xfer); 61741900Smckusick cnt -= xfer; 61841900Smckusick cp += xfer; 61941900Smckusick } 62041900Smckusick if (cnt > 0) 62141900Smckusick m = m->m_next; 62241900Smckusick } 62340117Smckusick } 62441900Smckusick if (replyh.r_rep != rpc_reply || m == NULL) { 62540117Smckusick nfsstats.rpcinvalid++; 62641900Smckusick m_freem(mp); 62741900Smckusick nfs_sounlock(&nmp->nm_flag); 62841900Smckusick continue; 62938414Smckusick } 63041900Smckusick /* 63141900Smckusick * Loop through the request list to match up the reply 63241900Smckusick * Iff no match, just drop the datagram 63341900Smckusick */ 63441900Smckusick m = mp; 63541900Smckusick rep = nfsreqh.r_next; 63641900Smckusick while (rep != &nfsreqh) { 63741900Smckusick if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) { 63841900Smckusick /* Found it.. */ 63941900Smckusick rep->r_mrep = m; 64041900Smckusick /* 64141900Smckusick * Update timing 64241900Smckusick */ 64341900Smckusick if (rep->r_flags & R_TIMING) { 64441900Smckusick nfs_updatetimer(rep->r_nmp); 64541900Smckusick rep->r_flags &= ~R_TIMING; 64641900Smckusick rep->r_nmp->nm_rtt = -1; 64741900Smckusick } 64841900Smckusick if (rep->r_flags & R_SENT) { 64941900Smckusick rep->r_flags &= ~R_SENT; 65041900Smckusick rep->r_nmp->nm_sent--; 65141900Smckusick } 65240117Smckusick break; 65338414Smckusick } 65441900Smckusick rep = rep->r_next; 65538414Smckusick } 65641900Smckusick nfs_sounlock(&nmp->nm_flag); 65741900Smckusick if (nam) 65841900Smckusick m_freem(nam); 65941900Smckusick /* 66041900Smckusick * If not matched to a request, drop it. 66141900Smckusick * If it's mine, get out. 66241900Smckusick */ 66341900Smckusick if (rep == &nfsreqh) { 66441900Smckusick nfsstats.rpcunexpected++; 66541900Smckusick m_freem(m); 66641900Smckusick } else if (rep == myrep) 66741900Smckusick return (0); 66838414Smckusick } 66938414Smckusick } 67038414Smckusick 67138414Smckusick /* 67238414Smckusick * nfs_request - goes something like this 67338414Smckusick * - fill in request struct 67438414Smckusick * - links it into list 67541900Smckusick * - calls nfs_send() for first transmit 67641900Smckusick * - calls nfs_receive() to get reply 67738414Smckusick * - break down rpc header and return with nfs reply pointed to 67838414Smckusick * by mrep or error 67938414Smckusick * nb: always frees up mreq mbuf list 68038414Smckusick */ 68141900Smckusick nfs_request(vp, mreq, xid, procnum, procp, mp, mrp, mdp, dposp) 68238414Smckusick struct vnode *vp; 68338414Smckusick struct mbuf *mreq; 68438414Smckusick u_long xid; 68541900Smckusick int procnum; 68641900Smckusick struct proc *procp; 68738414Smckusick struct mount *mp; 68838414Smckusick struct mbuf **mrp; 68938414Smckusick struct mbuf **mdp; 69038414Smckusick caddr_t *dposp; 69138414Smckusick { 69238414Smckusick register struct mbuf *m, *mrep; 69338414Smckusick register struct nfsreq *rep; 69438414Smckusick register u_long *p; 69538414Smckusick register int len; 69641900Smckusick struct nfsmount *nmp; 69738414Smckusick struct mbuf *md; 69839344Smckusick struct nfsreq *reph; 69938414Smckusick caddr_t dpos; 70038414Smckusick char *cp2; 70138414Smckusick int t1; 70238414Smckusick int s; 70341900Smckusick int error = 0; 70438414Smckusick 70541900Smckusick nmp = VFSTONFS(mp); 70638414Smckusick m = mreq; 70738414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70838414Smckusick rep->r_xid = xid; 70941900Smckusick rep->r_nmp = nmp; 71038414Smckusick rep->r_vp = vp; 71141900Smckusick rep->r_procp = procp; 71241900Smckusick if (nmp->nm_flag & NFSMNT_SOFT) 71341900Smckusick rep->r_retry = nmp->nm_retry; 71438414Smckusick else 71540117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71640117Smckusick rep->r_flags = rep->r_rexmit = 0; 71741900Smckusick /* 71841900Smckusick * Three cases: 71941900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 72041900Smckusick * - idempotent requests on SOCK_DGRAM use 0 72141900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 72241900Smckusick * Timeouts are still done on reliable transports to ensure detection 72341900Smckusick * of connection loss. 72441900Smckusick */ 72541900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 72641900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 72741900Smckusick else if (nonidempotent[procnum]) 72841900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 72941900Smckusick else 73041900Smckusick rep->r_timerinit = 0; 73141900Smckusick rep->r_timer = rep->r_timerinit; 73238414Smckusick rep->r_mrep = NULL; 73338414Smckusick len = 0; 73438414Smckusick while (m) { 73538414Smckusick len += m->m_len; 73638414Smckusick m = m->m_next; 73738414Smckusick } 73841900Smckusick mreq->m_pkthdr.len = len; 73941900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 74041900Smckusick /* 74141900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 74241900Smckusick */ 74341900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 74441900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 74541900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 74641900Smckusick } 74741900Smckusick rep->r_mreq = mreq; 74838414Smckusick 74940117Smckusick /* 75040117Smckusick * Do the client side RPC. 75140117Smckusick */ 75240117Smckusick nfsstats.rpcrequests++; 75341900Smckusick /* 75441900Smckusick * Chain request into list of outstanding requests. Be sure 75541900Smckusick * to put it LAST so timer finds oldest requests first. 75641900Smckusick */ 75740117Smckusick s = splnet(); 75839344Smckusick reph = &nfsreqh; 75941900Smckusick reph->r_prev->r_next = rep; 76041900Smckusick rep->r_prev = reph->r_prev; 76139344Smckusick reph->r_prev = rep; 76239344Smckusick rep->r_next = reph; 76340117Smckusick /* 76440117Smckusick * If backing off another request or avoiding congestion, don't 76540117Smckusick * send this one now but let timer do it. If not timing a request, 76640117Smckusick * do it now. 76740117Smckusick */ 76841900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 76941900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 77041900Smckusick nmp->nm_sent++; 77141900Smckusick rep->r_flags |= R_SENT; 77241900Smckusick if (nmp->nm_rtt == -1) { 77341900Smckusick nmp->nm_rtt = 0; 77441900Smckusick rep->r_flags |= R_TIMING; 77541900Smckusick } 77640117Smckusick splx(s); 77741900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 77841900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 77941900Smckusick nfs_solock(&nmp->nm_flag, 1); 78041900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 78141900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 78241900Smckusick nfs_sounlock(&nmp->nm_flag); 78341900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 78441900Smckusick nmp->nm_so->so_error = error = 0; 78541900Smckusick } else 78641900Smckusick splx(s); 78738414Smckusick 78838414Smckusick /* 78940117Smckusick * Wait for the reply from our send or the timer's. 79040117Smckusick */ 79141900Smckusick if (!error) 79241900Smckusick error = nfs_reply(nmp, rep); 79338414Smckusick 79440117Smckusick /* 79540117Smckusick * RPC done, unlink the request. 79640117Smckusick */ 79738414Smckusick s = splnet(); 79838414Smckusick rep->r_prev->r_next = rep->r_next; 79939344Smckusick rep->r_next->r_prev = rep->r_prev; 80038414Smckusick splx(s); 80141900Smckusick 80241900Smckusick /* 80341900Smckusick * If there was a successful reply and a tprintf msg. 80441900Smckusick * tprintf a response. 80541900Smckusick */ 80641900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 80741900Smckusick if (rep->r_procp) 80841900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 80941900Smckusick "Nfs server %s, is alive again\n", 81041900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81141900Smckusick else 81241900Smckusick tprintf(NULLVP, "Nfs server %s, is alive again\n", 81341900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 81441900Smckusick } 81538414Smckusick m_freem(rep->r_mreq); 81638414Smckusick mrep = md = rep->r_mrep; 81738414Smckusick FREE((caddr_t)rep, M_NFSREQ); 81838414Smckusick if (error) 81938414Smckusick return (error); 82038414Smckusick 82138414Smckusick /* 82238414Smckusick * break down the rpc header and check if ok 82338414Smckusick */ 82438414Smckusick dpos = mtod(md, caddr_t); 82538414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82638414Smckusick p += 2; 82738414Smckusick if (*p++ == rpc_msgdenied) { 82838414Smckusick if (*p == rpc_mismatch) 82938414Smckusick error = EOPNOTSUPP; 83038414Smckusick else 83138414Smckusick error = EACCES; 83238414Smckusick m_freem(mrep); 83338414Smckusick return (error); 83438414Smckusick } 83538414Smckusick /* 83638414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83738414Smckusick * for nfs_reqhead(), but for now just dump it 83838414Smckusick */ 83938414Smckusick if (*++p != 0) { 84038414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 84138414Smckusick nfsm_adv(len); 84238414Smckusick } 84338414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84438414Smckusick /* 0 == ok */ 84538414Smckusick if (*p == 0) { 84638414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84738414Smckusick if (*p != 0) { 84838414Smckusick error = fxdr_unsigned(int, *p); 84938414Smckusick m_freem(mrep); 85038414Smckusick return (error); 85138414Smckusick } 85238414Smckusick *mrp = mrep; 85338414Smckusick *mdp = md; 85438414Smckusick *dposp = dpos; 85538414Smckusick return (0); 85638414Smckusick } 85738414Smckusick m_freem(mrep); 85838414Smckusick return (EPROTONOSUPPORT); 85938414Smckusick nfsmout: 86038414Smckusick return (error); 86138414Smckusick } 86238414Smckusick 86338414Smckusick /* 86438414Smckusick * Get a request for the server main loop 86538414Smckusick * - receive a request via. nfs_soreceive() 86638414Smckusick * - verify it 86738414Smckusick * - fill in the cred struct. 86838414Smckusick */ 869*42243Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr, 87041900Smckusick lockp, msk, mtch) 87138414Smckusick struct socket *so; 87238414Smckusick u_long prog; 87338414Smckusick u_long vers; 87438414Smckusick int maxproc; 87538414Smckusick struct mbuf **nam; 87638414Smckusick struct mbuf **mrp; 87738414Smckusick struct mbuf **mdp; 87838414Smckusick caddr_t *dposp; 87938414Smckusick u_long *retxid; 880*42243Smckusick u_long *procnum; 88138414Smckusick register struct ucred *cr; 88241900Smckusick int *lockp; 88341900Smckusick struct mbuf *msk, *mtch; 88438414Smckusick { 88538414Smckusick register int i; 88639494Smckusick register u_long *p; 88739494Smckusick register long t1; 88839494Smckusick caddr_t dpos, cp2; 88939494Smckusick int error = 0; 89039494Smckusick struct mbuf *mrep, *md; 89139494Smckusick int len; 89238414Smckusick 89341900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 89441900Smckusick nfs_solock(lockp, 0); 89541900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 89641900Smckusick nfs_sounlock(lockp); 89741900Smckusick } else { 89841900Smckusick mrep = (struct mbuf *)0; 89941900Smckusick do { 90041900Smckusick if (mrep) { 90141900Smckusick m_freem(*nam); 90241900Smckusick m_freem(mrep); 90341900Smckusick } 90441900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 90541900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 90641900Smckusick } 90741900Smckusick if (error) 90838414Smckusick return (error); 90938414Smckusick md = mrep; 91038414Smckusick dpos = mtod(mrep, caddr_t); 91138414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 91238414Smckusick *retxid = *p++; 91338414Smckusick if (*p++ != rpc_call) { 91438414Smckusick m_freem(mrep); 91538414Smckusick return (ERPCMISMATCH); 91638414Smckusick } 91738414Smckusick if (*p++ != rpc_vers) { 91838414Smckusick m_freem(mrep); 91938414Smckusick return (ERPCMISMATCH); 92038414Smckusick } 92138414Smckusick if (*p++ != prog) { 92238414Smckusick m_freem(mrep); 92338414Smckusick return (EPROGUNAVAIL); 92438414Smckusick } 92538414Smckusick if (*p++ != vers) { 92638414Smckusick m_freem(mrep); 92738414Smckusick return (EPROGMISMATCH); 92838414Smckusick } 929*42243Smckusick *procnum = fxdr_unsigned(u_long, *p++); 930*42243Smckusick if (*procnum == NFSPROC_NULL) { 93138414Smckusick *mrp = mrep; 93238414Smckusick return (0); 93338414Smckusick } 934*42243Smckusick if (*procnum > maxproc || *p++ != rpc_auth_unix) { 93538414Smckusick m_freem(mrep); 93638414Smckusick return (EPROCUNAVAIL); 93738414Smckusick } 93841900Smckusick len = fxdr_unsigned(int, *p++); 93941900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 94041900Smckusick m_freem(mrep); 94141900Smckusick return (EBADRPC); 94241900Smckusick } 94339494Smckusick len = fxdr_unsigned(int, *++p); 94441900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 94541900Smckusick m_freem(mrep); 94641900Smckusick return (EBADRPC); 94741900Smckusick } 94839494Smckusick nfsm_adv(nfsm_rndup(len)); 94938414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 95038414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 95138414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 95239494Smckusick len = fxdr_unsigned(int, *p); 95341900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 95438414Smckusick m_freem(mrep); 95538414Smckusick return (EBADRPC); 95638414Smckusick } 95739494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95839494Smckusick for (i = 1; i <= len; i++) 95941900Smckusick if (i < NGROUPS) 96041900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 96141900Smckusick else 96241900Smckusick p++; 96341900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 96438414Smckusick /* 96538414Smckusick * Do we have any use for the verifier. 96638414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96738414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96838414Smckusick * For now, just skip over it 96938414Smckusick */ 97039494Smckusick len = fxdr_unsigned(int, *++p); 97141900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 97241900Smckusick m_freem(mrep); 97341900Smckusick return (EBADRPC); 97441900Smckusick } 97539494Smckusick if (len > 0) 97639494Smckusick nfsm_adv(nfsm_rndup(len)); 97738414Smckusick *mrp = mrep; 97838414Smckusick *mdp = md; 97938414Smckusick *dposp = dpos; 98038414Smckusick return (0); 98138414Smckusick nfsmout: 98238414Smckusick return (error); 98338414Smckusick } 98438414Smckusick 98538414Smckusick /* 98638414Smckusick * Generate the rpc reply header 98738414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98838414Smckusick */ 98938414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 99038414Smckusick int siz; 99138414Smckusick u_long retxid; 99238414Smckusick int err; 99338414Smckusick struct mbuf **mrq; 99438414Smckusick struct mbuf **mbp; 99538414Smckusick caddr_t *bposp; 99638414Smckusick { 99739494Smckusick register u_long *p; 99839494Smckusick register long t1; 99939494Smckusick caddr_t bpos; 100039494Smckusick struct mbuf *mreq, *mb, *mb2; 100138414Smckusick 100238414Smckusick NFSMGETHDR(mreq); 100338414Smckusick mb = mreq; 100438414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 100541900Smckusick MCLGET(mreq, M_WAIT); 100638414Smckusick p = mtod(mreq, u_long *); 100738414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100838414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 100938414Smckusick *p++ = retxid; 101038414Smckusick *p++ = rpc_reply; 101138414Smckusick if (err == ERPCMISMATCH) { 101238414Smckusick *p++ = rpc_msgdenied; 101338414Smckusick *p++ = rpc_mismatch; 101438414Smckusick *p++ = txdr_unsigned(2); 101538414Smckusick *p = txdr_unsigned(2); 101638414Smckusick } else { 101738414Smckusick *p++ = rpc_msgaccepted; 101838414Smckusick *p++ = 0; 101938414Smckusick *p++ = 0; 102038414Smckusick switch (err) { 102138414Smckusick case EPROGUNAVAIL: 102238414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 102338414Smckusick break; 102438414Smckusick case EPROGMISMATCH: 102538414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102638414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102738414Smckusick *p++ = txdr_unsigned(2); 102838414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 102938414Smckusick break; 103038414Smckusick case EPROCUNAVAIL: 103138414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 103238414Smckusick break; 103338414Smckusick default: 103438414Smckusick *p = 0; 103538414Smckusick if (err != VNOVAL) { 103638414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103738414Smckusick *p = txdr_unsigned(err); 103838414Smckusick } 103938414Smckusick break; 104038414Smckusick }; 104138414Smckusick } 104238414Smckusick *mrq = mreq; 104338414Smckusick *mbp = mb; 104438414Smckusick *bposp = bpos; 104538414Smckusick if (err != 0 && err != VNOVAL) 104638414Smckusick nfsstats.srvrpc_errs++; 104738414Smckusick return (0); 104838414Smckusick } 104938414Smckusick 105038414Smckusick /* 105138414Smckusick * Nfs timer routine 105238414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 105338414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 105440117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 105538414Smckusick */ 105638414Smckusick nfs_timer() 105738414Smckusick { 105838414Smckusick register struct nfsreq *rep; 105938414Smckusick register struct mbuf *m; 106038414Smckusick register struct socket *so; 106141900Smckusick register struct nfsmount *nmp; 106240117Smckusick int s, error; 106338414Smckusick 106438414Smckusick s = splnet(); 106541900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 106641900Smckusick nmp = rep->r_nmp; 106741900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 106841900Smckusick (so = nmp->nm_so) == NULL) 106941900Smckusick continue; 107041900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 107141900Smckusick rep->r_flags |= R_SOFTTERM; 107241900Smckusick continue; 107341900Smckusick } 107440117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 107541900Smckusick nmp->nm_rtt++; 107641900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 107740117Smckusick continue; 107841900Smckusick /* If not timed out */ 107941900Smckusick if (++rep->r_timer < nmp->nm_rto) 108041900Smckusick continue; 108141900Smckusick #ifdef notdef 108241900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) { 108341900Smckusick rep->r_flags |= R_MUSTRESEND; 108441900Smckusick rep->r_timer = rep->r_timerinit; 108541900Smckusick continue; 108641900Smckusick } 108741900Smckusick #endif 108840117Smckusick /* Do backoff and save new timeout in mount */ 108940117Smckusick if (rep->r_flags & R_TIMING) { 109041900Smckusick nfs_backofftimer(nmp); 109140117Smckusick rep->r_flags &= ~R_TIMING; 109241900Smckusick nmp->nm_rtt = -1; 109340117Smckusick } 109440117Smckusick if (rep->r_flags & R_SENT) { 109540117Smckusick rep->r_flags &= ~R_SENT; 109641900Smckusick nmp->nm_sent--; 109740117Smckusick } 109841900Smckusick 109941900Smckusick /* 110041900Smckusick * Check for too many retries on soft mount. 110141900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 110241900Smckusick */ 110341900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 110440117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 110540117Smckusick 110641900Smckusick /* 110741900Smckusick * Check for server not responding 110841900Smckusick */ 110941900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 111041900Smckusick rep->r_rexmit > 8) { 111141900Smckusick if (rep->r_procp && rep->r_procp->p_session) 111241900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 111341900Smckusick "Nfs server %s, not responding\n", 111441900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 111541900Smckusick else 111641900Smckusick tprintf(NULLVP, 111741900Smckusick "Nfs server %s, not responding\n", 111841900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 111941900Smckusick rep->r_flags |= R_TPRINTFMSG; 112041900Smckusick } 112141900Smckusick if (rep->r_rexmit > rep->r_retry) { /* too many */ 112241900Smckusick nfsstats.rpctimeouts++; 112341900Smckusick rep->r_flags |= R_SOFTTERM; 112441900Smckusick continue; 112541900Smckusick } 112641900Smckusick 112741900Smckusick /* 112841900Smckusick * If there is enough space and the window allows.. 112941900Smckusick * Resend it 113041900Smckusick */ 113141900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 113241900Smckusick nmp->nm_sent < nmp->nm_window && 113341900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 113441900Smckusick nfsstats.rpcretries++; 113541900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 113641900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 113741900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 113841900Smckusick else 113941900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 114041900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 114141900Smckusick if (error) { 114241900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 114341900Smckusick so->so_error = 0; 114441900Smckusick } else { 114541900Smckusick /* 114641900Smckusick * We need to time the request even though we 114741900Smckusick * are retransmitting. 114841900Smckusick */ 114941900Smckusick nmp->nm_rtt = 0; 115041900Smckusick nmp->nm_sent++; 115141900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 115241900Smckusick rep->r_timer = rep->r_timerinit; 115341900Smckusick } 115441900Smckusick } 115540117Smckusick } 115640117Smckusick splx(s); 115740117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115840117Smckusick } 115940117Smckusick 116040117Smckusick /* 116140117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 116240117Smckusick * used here. The timer state is held in the nfsmount structure and 116340117Smckusick * a single request is used to clock the response. When successful 116440117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 116540117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116640117Smckusick * routines. 116740117Smckusick * 116840117Smckusick * Congestion variables are held in the nfshost structure which 116940117Smckusick * is referenced by nfsmounts and shared per-server. This separation 117040117Smckusick * makes it possible to do per-mount timing which allows varying disk 117140117Smckusick * access times to be dealt with, while preserving a network oriented 117240117Smckusick * congestion control scheme. 117340117Smckusick * 117440117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 117540117Smckusick * with adjusted scaling factors. We start with one request, then send 117640117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117740117Smckusick * we increment at a rate proportional to the window. On failure, we 117840117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117940117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 118040117Smckusick * for now. 118140117Smckusick * 118240117Smckusick * NFS behaves much more like a transport protocol with these changes, 118340117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 118440117Smckusick * implementations. 118540117Smckusick * 118640117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118740117Smckusick */ 118840117Smckusick 118940117Smckusick /* 119040117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 119140117Smckusick * responds only after performing lookups/diskio/etc, we have to be 119240117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 119341900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 119440117Smckusick */ 119541900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119640117Smckusick 119741900Smckusick nfs_updatetimer(nmp) 119841900Smckusick register struct nfsmount *nmp; 119940117Smckusick { 120040117Smckusick 120140117Smckusick /* If retransmitted, clear and return */ 120241900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 120341900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 120440117Smckusick return; 120540117Smckusick } 120640117Smckusick /* If have a measurement, do smoothing */ 120741900Smckusick if (nmp->nm_srtt) { 120840117Smckusick register short delta; 120941900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 121041900Smckusick if ((nmp->nm_srtt += delta) <= 0) 121141900Smckusick nmp->nm_srtt = 1; 121240117Smckusick if (delta < 0) 121340117Smckusick delta = -delta; 121441900Smckusick delta -= (nmp->nm_rttvar >> 2); 121541900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 121641900Smckusick nmp->nm_rttvar = 1; 121740117Smckusick /* Else initialize */ 121840117Smckusick } else { 121941900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 122041900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 122141900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 122240117Smckusick } 122340117Smckusick /* Compute new Retransmission TimeOut and clip */ 122441900Smckusick nmp->nm_rto = NFS_RTO(nmp); 122541900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 122641900Smckusick nmp->nm_rto = NFS_MINTIMEO; 122741900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 122841900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122940117Smckusick 123040117Smckusick /* Update window estimate */ 123141900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 123241900Smckusick nmp->nm_window += 4; 123340117Smckusick else { /* slowly */ 123441900Smckusick register long incr = ++nmp->nm_winext; 123541900Smckusick incr = (incr * incr) / nmp->nm_window; 123640117Smckusick if (incr > 0) { 123741900Smckusick nmp->nm_winext = 0; 123841900Smckusick ++nmp->nm_window; 123940117Smckusick } 124040117Smckusick } 124141900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 124241900Smckusick nmp->nm_window = NFS_MAXWINDOW; 124340117Smckusick } 124440117Smckusick 124541900Smckusick nfs_backofftimer(nmp) 124641900Smckusick register struct nfsmount *nmp; 124740117Smckusick { 124840117Smckusick register unsigned long newrto; 124940117Smckusick 125040117Smckusick /* Clip shift count */ 125141900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 125241900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 125340117Smckusick /* Back off RTO exponentially */ 125441900Smckusick newrto = NFS_RTO(nmp); 125541900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125640117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125740117Smckusick newrto = NFS_MAXTIMEO; 125841900Smckusick nmp->nm_rto = newrto; 125940117Smckusick 126040117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 126141900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 126241900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 126341900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 126441900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 126541900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 126641900Smckusick nmp->nm_srtt = 0; 126738414Smckusick } 126838414Smckusick } 126938414Smckusick } 127040117Smckusick /* Close down window but remember this point (3/4 current) for later */ 127141900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 127241900Smckusick nmp->nm_window = 1; 127341900Smckusick nmp->nm_winext = 0; 127438414Smckusick } 127538414Smckusick 127638414Smckusick /* 127741900Smckusick * Test for a termination signal pending on procp. 127841900Smckusick * This is used for NFSMNT_INT mounts. 127938414Smckusick */ 128041900Smckusick nfs_sigintr(p) 128141900Smckusick register struct proc *p; 128241900Smckusick { 128341900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 128441900Smckusick NFSINT_SIGMASK)) 128541900Smckusick return (1); 128641900Smckusick else 128741900Smckusick return (0); 128841900Smckusick } 128940117Smckusick 129041900Smckusick /* 129141900Smckusick * Lock a socket against others. 129241900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 129341900Smckusick * and also to avoid race conditions between the processes with nfs requests 129441900Smckusick * in progress when a reconnect is necessary. 129541900Smckusick */ 129641900Smckusick nfs_solock(flagp, cant_intr) 129741900Smckusick int *flagp; 129841900Smckusick int cant_intr; 129938414Smckusick { 130040117Smckusick 130141900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 130241900Smckusick *flagp |= NFSMNT_WANTSCK; 130341900Smckusick if (cant_intr) 130441900Smckusick (void) sleep((caddr_t)flagp, PZERO-7); 130541900Smckusick else 130641900Smckusick (void) tsleep((caddr_t)flagp, PZERO+1, "nfssolck", 0); 130740117Smckusick } 130841900Smckusick *flagp |= NFSMNT_SCKLOCK; 130941900Smckusick } 131040117Smckusick 131141900Smckusick /* 131241900Smckusick * Unlock the stream socket for others. 131341900Smckusick */ 131441900Smckusick nfs_sounlock(flagp) 131541900Smckusick int *flagp; 131641900Smckusick { 131741900Smckusick 131841900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 131941900Smckusick panic("nfs sounlock"); 132041900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 132141900Smckusick if (*flagp & NFSMNT_WANTSCK) { 132241900Smckusick *flagp &= ~NFSMNT_WANTSCK; 132341900Smckusick wakeup((caddr_t)flagp); 132440117Smckusick } 132538414Smckusick } 132641900Smckusick 132741900Smckusick /* 132841900Smckusick * This function compares two net addresses by family and returns TRUE 132941900Smckusick * if they are the same. 133041900Smckusick * If there is any doubt, return FALSE. 133141900Smckusick */ 133241900Smckusick nfs_netaddr_match(nam1, nam2) 133341900Smckusick struct mbuf *nam1, *nam2; 133441900Smckusick { 133541900Smckusick register struct sockaddr *saddr1, *saddr2; 133641900Smckusick 133741900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 133841900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 133941900Smckusick if (saddr1->sa_family != saddr2->sa_family) 134041900Smckusick return (0); 134141900Smckusick 134241900Smckusick /* 134341900Smckusick * Must do each address family separately since unused fields 134441900Smckusick * are undefined values and not always zeroed. 134541900Smckusick */ 134641900Smckusick switch (saddr1->sa_family) { 134741900Smckusick case AF_INET: 134841900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 134941900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 135041900Smckusick return (1); 135141900Smckusick break; 135241900Smckusick default: 135341900Smckusick break; 135441900Smckusick }; 135541900Smckusick return (0); 135641900Smckusick } 135741900Smckusick 135841900Smckusick /* 135941900Smckusick * Check the hostname fields for nfsd's mask and match fields. 136041900Smckusick * By address family: 136141900Smckusick * - Bitwise AND the mask with the host address field 136241900Smckusick * - Compare for == with match 136341900Smckusick * return TRUE if not equal 136441900Smckusick */ 136541900Smckusick nfs_badnam(nam, msk, mtch) 136641900Smckusick register struct mbuf *nam, *msk, *mtch; 136741900Smckusick { 136841900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 136941900Smckusick case AF_INET: 137041900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 137141900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 137241900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 137341900Smckusick default: 137441900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 137541900Smckusick return (0); 137641900Smckusick }; 137741900Smckusick } 1378