138414Smckusick /* 238414Smckusick * Copyright (c) 1989 The Regents of the University of California. 338414Smckusick * All rights reserved. 438414Smckusick * 538414Smckusick * This code is derived from software contributed to Berkeley by 638414Smckusick * Rick Macklem at The University of Guelph. 738414Smckusick * 838414Smckusick * Redistribution and use in source and binary forms are permitted 938414Smckusick * provided that the above copyright notice and this paragraph are 1038414Smckusick * duplicated in all such forms and that any documentation, 1138414Smckusick * advertising materials, and other materials related to such 1238414Smckusick * distribution and use acknowledge that the software was developed 1338414Smckusick * by the University of California, Berkeley. The name of the 1438414Smckusick * University may not be used to endorse or promote products derived 1538414Smckusick * from this software without specific prior written permission. 1638414Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1738414Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1838414Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1938414Smckusick * 20*41900Smckusick * @(#)nfs_socket.c 7.12 (Berkeley) 05/14/90 2138414Smckusick */ 2238414Smckusick 2338414Smckusick /* 24*41900Smckusick * Socket operations for use by nfs 2538414Smckusick */ 2638414Smckusick 2738414Smckusick #include "types.h" 2838414Smckusick #include "param.h" 2938414Smckusick #include "uio.h" 3038414Smckusick #include "user.h" 3140117Smckusick #include "proc.h" 3240117Smckusick #include "signal.h" 3338414Smckusick #include "mount.h" 3438414Smckusick #include "kernel.h" 3538414Smckusick #include "malloc.h" 3638414Smckusick #include "mbuf.h" 3738414Smckusick #include "vnode.h" 3838414Smckusick #include "domain.h" 3938414Smckusick #include "protosw.h" 4038414Smckusick #include "socket.h" 4138414Smckusick #include "socketvar.h" 42*41900Smckusick #include "netinet/in.h" 43*41900Smckusick #include "netinet/tcp.h" 4438414Smckusick #include "rpcv2.h" 4538414Smckusick #include "nfsv2.h" 4638414Smckusick #include "nfs.h" 4738414Smckusick #include "xdr_subs.h" 4838414Smckusick #include "nfsm_subs.h" 4938414Smckusick #include "nfsmount.h" 5038414Smckusick 5140117Smckusick #include "syslog.h" 5240117Smckusick 5338414Smckusick #define TRUE 1 5438414Smckusick 5540117Smckusick /* 5638414Smckusick * External data, mostly RPC constants in XDR form 5738414Smckusick */ 5838414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 5938414Smckusick rpc_msgaccepted, rpc_call; 6038414Smckusick extern u_long nfs_prog, nfs_vers; 61*41900Smckusick extern int nonidempotent[NFS_NPROCS]; 62*41900Smckusick int nfs_sbwait(); 63*41900Smckusick void nfs_disconnect(); 64*41900Smckusick 6538414Smckusick int nfsrv_null(), 6638414Smckusick nfsrv_getattr(), 6738414Smckusick nfsrv_setattr(), 6838414Smckusick nfsrv_lookup(), 6938414Smckusick nfsrv_readlink(), 7038414Smckusick nfsrv_read(), 7138414Smckusick nfsrv_write(), 7238414Smckusick nfsrv_create(), 7338414Smckusick nfsrv_remove(), 7438414Smckusick nfsrv_rename(), 7538414Smckusick nfsrv_link(), 7638414Smckusick nfsrv_symlink(), 7738414Smckusick nfsrv_mkdir(), 7838414Smckusick nfsrv_rmdir(), 7938414Smckusick nfsrv_readdir(), 8038414Smckusick nfsrv_statfs(), 8138414Smckusick nfsrv_noop(); 8238414Smckusick 8338414Smckusick int (*nfsrv_procs[NFS_NPROCS])() = { 8438414Smckusick nfsrv_null, 8538414Smckusick nfsrv_getattr, 8638414Smckusick nfsrv_setattr, 8738414Smckusick nfsrv_noop, 8838414Smckusick nfsrv_lookup, 8938414Smckusick nfsrv_readlink, 9038414Smckusick nfsrv_read, 9138414Smckusick nfsrv_noop, 9238414Smckusick nfsrv_write, 9338414Smckusick nfsrv_create, 9438414Smckusick nfsrv_remove, 9538414Smckusick nfsrv_rename, 9638414Smckusick nfsrv_link, 9738414Smckusick nfsrv_symlink, 9838414Smckusick nfsrv_mkdir, 9938414Smckusick nfsrv_rmdir, 10038414Smckusick nfsrv_readdir, 10138414Smckusick nfsrv_statfs, 10238414Smckusick }; 10338414Smckusick 10440117Smckusick struct nfsreq nfsreqh; 10540117Smckusick int nfsrexmtthresh = NFS_FISHY; 106*41900Smckusick int nfs_tcpnodelay = 1; 10738414Smckusick 10838414Smckusick /* 109*41900Smckusick * Initialize sockets and congestion for a new NFS connection. 11040117Smckusick * We do not free the sockaddr if error. 11138414Smckusick */ 112*41900Smckusick nfs_connect(nmp) 11340117Smckusick register struct nfsmount *nmp; 11440117Smckusick { 115*41900Smckusick register struct socket *so; 116*41900Smckusick int s, error; 11740117Smckusick struct mbuf *m; 11840117Smckusick 119*41900Smckusick nmp->nm_so = (struct socket *)0; 120*41900Smckusick if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 121*41900Smckusick &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 12240117Smckusick goto bad; 123*41900Smckusick so = nmp->nm_so; 124*41900Smckusick nmp->nm_soflags = so->so_proto->pr_flags; 12540117Smckusick 126*41900Smckusick /* 127*41900Smckusick * Protocols that do not require connections may be optionally left 128*41900Smckusick * unconnected for servers that reply from a port other than NFS_PORT. 129*41900Smckusick */ 130*41900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) { 131*41900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) { 132*41900Smckusick error = ENOTCONN; 13340117Smckusick goto bad; 13440117Smckusick } 135*41900Smckusick } else { 136*41900Smckusick if (error = soconnect(so, nmp->nm_nam)) 13740117Smckusick goto bad; 138*41900Smckusick 139*41900Smckusick /* 140*41900Smckusick * Wait for the connection to complete. Cribbed from the 141*41900Smckusick * connect system call but with the wait at negative prio. 142*41900Smckusick */ 143*41900Smckusick s = splnet(); 144*41900Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) 145*41900Smckusick sleep((caddr_t)&so->so_timeo, PZERO-2); 146*41900Smckusick splx(s); 147*41900Smckusick if (so->so_error) { 148*41900Smckusick error = so->so_error; 149*41900Smckusick goto bad; 150*41900Smckusick } 15140117Smckusick } 152*41900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) { 153*41900Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 154*41900Smckusick so->so_rcv.sb_timeo = (5 * hz); 155*41900Smckusick so->so_snd.sb_timeo = (5 * hz); 156*41900Smckusick } else { 157*41900Smckusick so->so_rcv.sb_timeo = 0; 158*41900Smckusick so->so_snd.sb_timeo = 0; 159*41900Smckusick } 160*41900Smckusick if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR, 161*41900Smckusick (nmp->nm_rsize + NFS_MAXPKTHDR) * 4)) 162*41900Smckusick goto bad; 163*41900Smckusick } else { 164*41900Smckusick if (nmp->nm_flag & NFSMNT_INT) { 165*41900Smckusick so->so_rcv.sb_timeo = (5 * hz); 166*41900Smckusick so->so_snd.sb_timeo = (5 * hz); 167*41900Smckusick } else { 168*41900Smckusick so->so_rcv.sb_timeo = 0; 169*41900Smckusick so->so_snd.sb_timeo = 0; 170*41900Smckusick } 171*41900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 172*41900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 173*41900Smckusick *mtod(m, int *) = 1; 174*41900Smckusick m->m_len = sizeof(int); 175*41900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 176*41900Smckusick } 177*41900Smckusick if (so->so_proto->pr_domain->dom_family == AF_INET && 178*41900Smckusick so->so_proto->pr_protocol == IPPROTO_TCP && 179*41900Smckusick nfs_tcpnodelay) { 180*41900Smckusick MGET(m, M_WAIT, MT_SOOPTS); 181*41900Smckusick *mtod(m, int *) = 1; 182*41900Smckusick m->m_len = sizeof(int); 183*41900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 184*41900Smckusick } 185*41900Smckusick if (error = soreserve(so, 186*41900Smckusick (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)) * 2, 187*41900Smckusick nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long))) 188*41900Smckusick goto bad; 189*41900Smckusick } 190*41900Smckusick so->so_rcv.sb_flags |= SB_NOINTR; 191*41900Smckusick so->so_snd.sb_flags |= SB_NOINTR; 19240117Smckusick 193*41900Smckusick /* Initialize other non-zero congestion variables */ 194*41900Smckusick nmp->nm_rto = NFS_TIMEO; 195*41900Smckusick nmp->nm_window = 2; /* Initial send window */ 196*41900Smckusick nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */ 197*41900Smckusick nmp->nm_rttvar = nmp->nm_rto << 1; 198*41900Smckusick nmp->nm_sent = 0; 199*41900Smckusick nmp->nm_currexmit = 0; 200*41900Smckusick return (0); 20140117Smckusick 202*41900Smckusick bad: 203*41900Smckusick nfs_disconnect(nmp); 204*41900Smckusick return (error); 205*41900Smckusick } 20640117Smckusick 207*41900Smckusick /* 208*41900Smckusick * Reconnect routine: 209*41900Smckusick * Called when a connection is broken on a reliable protocol. 210*41900Smckusick * - clean up the old socket 211*41900Smckusick * - nfs_connect() again 212*41900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point 213*41900Smckusick * If this fails the mount point is DEAD! 214*41900Smckusick * nb: Must be called with the nfs_solock() set on the mount point. 215*41900Smckusick */ 216*41900Smckusick nfs_reconnect(rep, nmp) 217*41900Smckusick register struct nfsreq *rep; 218*41900Smckusick register struct nfsmount *nmp; 219*41900Smckusick { 220*41900Smckusick register struct nfsreq *rp; 221*41900Smckusick register struct socket *so; 222*41900Smckusick int error; 22340117Smckusick 224*41900Smckusick if (rep->r_procp) 225*41900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 226*41900Smckusick "Nfs server %s, trying reconnect\n", 227*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 228*41900Smckusick else 229*41900Smckusick tprintf(NULLVP, "Nfs server %s, trying a reconnect\n", 230*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 231*41900Smckusick while (error = nfs_connect(nmp)) { 232*41900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) 233*41900Smckusick return (EINTR); 234*41900Smckusick tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 23540117Smckusick } 236*41900Smckusick if (rep->r_procp) 237*41900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 238*41900Smckusick "Nfs server %s, reconnected\n", 239*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 240*41900Smckusick else 241*41900Smckusick tprintf(NULLVP, "Nfs server %s, reconnected\n", 242*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 243*41900Smckusick 244*41900Smckusick /* 245*41900Smckusick * Loop through outstanding request list and fix up all requests 246*41900Smckusick * on old socket. 247*41900Smckusick */ 248*41900Smckusick rp = nfsreqh.r_next; 249*41900Smckusick while (rp != &nfsreqh) { 250*41900Smckusick if (rp->r_nmp == nmp) 251*41900Smckusick rp->r_flags |= R_MUSTRESEND; 252*41900Smckusick rp = rp->r_next; 25340117Smckusick } 25440117Smckusick return (0); 25540117Smckusick } 25640117Smckusick 25740117Smckusick /* 25840117Smckusick * NFS disconnect. Clean up and unlink. 25940117Smckusick */ 260*41900Smckusick void 26140117Smckusick nfs_disconnect(nmp) 26240117Smckusick register struct nfsmount *nmp; 26340117Smckusick { 264*41900Smckusick register struct socket *so; 26540117Smckusick 266*41900Smckusick if (nmp->nm_so) { 267*41900Smckusick so = nmp->nm_so; 268*41900Smckusick nmp->nm_so = (struct socket *)0; 269*41900Smckusick soshutdown(so, 2); 270*41900Smckusick soclose(so); 27140117Smckusick } 27240117Smckusick } 27340117Smckusick 27440117Smckusick /* 275*41900Smckusick * This is the nfs send routine. For connection based socket types, it 276*41900Smckusick * must be called with an nfs_solock() on the socket. 277*41900Smckusick * "rep == NULL" indicates that it has been called from a server. 27840117Smckusick */ 279*41900Smckusick nfs_send(so, nam, top, rep) 28038414Smckusick register struct socket *so; 28138414Smckusick struct mbuf *nam; 282*41900Smckusick register struct mbuf *top; 283*41900Smckusick struct nfsreq *rep; 28438414Smckusick { 285*41900Smckusick struct mbuf *sendnam; 286*41900Smckusick int error, soflags; 28738414Smckusick 288*41900Smckusick if (rep) { 289*41900Smckusick if (rep->r_flags & R_SOFTTERM) { 29040117Smckusick m_freem(top); 291*41900Smckusick return (EINTR); 29240117Smckusick } 293*41900Smckusick if ((so = rep->r_nmp->nm_so) == NULL && 294*41900Smckusick (error = nfs_reconnect(rep, rep->r_nmp))) 295*41900Smckusick return (error); 296*41900Smckusick rep->r_flags &= ~R_MUSTRESEND; 297*41900Smckusick soflags = rep->r_nmp->nm_soflags; 298*41900Smckusick } else 299*41900Smckusick soflags = so->so_proto->pr_flags; 300*41900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 301*41900Smckusick sendnam = (struct mbuf *)0; 302*41900Smckusick else 303*41900Smckusick sendnam = nam; 304*41900Smckusick 305*41900Smckusick error = sosend(so, sendnam, (struct uio *)0, top, 306*41900Smckusick (struct mbuf *)0, 0); 307*41900Smckusick if (error == EWOULDBLOCK && rep) { 308*41900Smckusick if (rep->r_flags & R_SOFTTERM) 309*41900Smckusick error = EINTR; 310*41900Smckusick else { 311*41900Smckusick rep->r_flags |= R_MUSTRESEND; 312*41900Smckusick error = 0; 31340117Smckusick } 31438414Smckusick } 315*41900Smckusick /* 316*41900Smckusick * Ignore socket errors?? 317*41900Smckusick */ 318*41900Smckusick if (error && error != EINTR && error != ERESTART) 319*41900Smckusick error = 0; 32038414Smckusick return (error); 32138414Smckusick } 32238414Smckusick 32338414Smckusick /* 324*41900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 325*41900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record 326*41900Smckusick * Mark and consolidate the data into a new mbuf list. 327*41900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of 328*41900Smckusick * small mbufs. 329*41900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once 330*41900Smckusick * we have read any of it, even if the system call has been interrupted. 33138414Smckusick */ 332*41900Smckusick nfs_receive(so, aname, mp, rep) 33338414Smckusick register struct socket *so; 33438414Smckusick struct mbuf **aname; 33538414Smckusick struct mbuf **mp; 336*41900Smckusick register struct nfsreq *rep; 33738414Smckusick { 338*41900Smckusick struct uio auio; 339*41900Smckusick struct iovec aio; 34038414Smckusick register struct mbuf *m; 341*41900Smckusick struct mbuf *m2, *m3, *mnew, **mbp; 342*41900Smckusick caddr_t fcp, tcp; 343*41900Smckusick u_long len; 344*41900Smckusick struct mbuf **getnam; 345*41900Smckusick int error, siz, mlen, soflags, rcvflg = MSG_WAITALL; 34638414Smckusick 347*41900Smckusick /* 348*41900Smckusick * Set up arguments for soreceive() 349*41900Smckusick */ 350*41900Smckusick *mp = (struct mbuf *)0; 351*41900Smckusick *aname = (struct mbuf *)0; 352*41900Smckusick if (rep) 353*41900Smckusick soflags = rep->r_nmp->nm_soflags; 354*41900Smckusick else 355*41900Smckusick soflags = so->so_proto->pr_flags; 35638414Smckusick 357*41900Smckusick /* 358*41900Smckusick * For reliable protocols, lock against other senders/receivers 359*41900Smckusick * in case a reconnect is necessary. 360*41900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much 361*41900Smckusick * more there is to get. 362*41900Smckusick * We must lock the socket against other receivers 363*41900Smckusick * until we have an entire rpc request/reply. 364*41900Smckusick */ 365*41900Smckusick if (soflags & PR_CONNREQUIRED) { 366*41900Smckusick tryagain: 367*41900Smckusick /* 368*41900Smckusick * Check for fatal errors and resending request. 369*41900Smckusick */ 370*41900Smckusick if (rep) { 371*41900Smckusick /* 372*41900Smckusick * Ugh: If a reconnect attempt just happened, nm_so 373*41900Smckusick * would have changed. NULL indicates a failed 374*41900Smckusick * attempt that has essentially shut down this 375*41900Smckusick * mount point. 376*41900Smckusick */ 377*41900Smckusick if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL || 378*41900Smckusick (rep->r_flags & R_SOFTTERM)) 379*41900Smckusick return (EINTR); 380*41900Smckusick while (rep->r_flags & R_MUSTRESEND) { 381*41900Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 382*41900Smckusick nfsstats.rpcretries++; 383*41900Smckusick if (error = nfs_send(so, rep->r_nmp->nm_nam, m, 384*41900Smckusick rep)) 385*41900Smckusick goto errout; 38640117Smckusick } 387*41900Smckusick } 388*41900Smckusick if ((soflags & PR_ATOMIC) == 0) { 389*41900Smckusick aio.iov_base = (caddr_t) &len; 390*41900Smckusick aio.iov_len = sizeof(u_long); 391*41900Smckusick auio.uio_iov = &aio; 392*41900Smckusick auio.uio_iovcnt = 1; 393*41900Smckusick auio.uio_segflg = UIO_SYSSPACE; 394*41900Smckusick auio.uio_rw = UIO_READ; 395*41900Smckusick auio.uio_offset = 0; 396*41900Smckusick auio.uio_resid = sizeof(u_long); 397*41900Smckusick do { 398*41900Smckusick error = soreceive(so, (struct mbuf **)0, &auio, 399*41900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 400*41900Smckusick if (error == EWOULDBLOCK && rep) { 401*41900Smckusick if (rep->r_flags & R_SOFTTERM) 402*41900Smckusick return (EINTR); 403*41900Smckusick if (rep->r_flags & R_MUSTRESEND) 404*41900Smckusick goto tryagain; 405*41900Smckusick } 406*41900Smckusick } while (error == EWOULDBLOCK); 407*41900Smckusick if (!error && auio.uio_resid > 0) 408*41900Smckusick error = EPIPE; 40940761Skarels if (error) 410*41900Smckusick goto errout; 411*41900Smckusick len = ntohl(len) & ~0x80000000; 412*41900Smckusick /* 413*41900Smckusick * This is SERIOUS! We are out of sync with the sender 414*41900Smckusick * and forcing a disconnect/reconnect is all I can do. 415*41900Smckusick */ 416*41900Smckusick if (len > NFS_MAXPACKET) { 417*41900Smckusick error = EFBIG; 418*41900Smckusick goto errout; 419*41900Smckusick } 420*41900Smckusick auio.uio_resid = len; 421*41900Smckusick do { 422*41900Smckusick error = soreceive(so, (struct mbuf **)0, 423*41900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 424*41900Smckusick } while (error == EWOULDBLOCK || error == EINTR || 425*41900Smckusick error == ERESTART); 426*41900Smckusick if (!error && auio.uio_resid > 0) 427*41900Smckusick error = EPIPE; 42840117Smckusick } else { 429*41900Smckusick auio.uio_resid = len = 1000000; /* Anything Big */ 430*41900Smckusick do { 431*41900Smckusick error = soreceive(so, (struct mbuf **)0, 432*41900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg); 433*41900Smckusick if (error == EWOULDBLOCK && rep) { 434*41900Smckusick if (rep->r_flags & R_SOFTTERM) 435*41900Smckusick return (EINTR); 436*41900Smckusick if (rep->r_flags & R_MUSTRESEND) 437*41900Smckusick goto tryagain; 438*41900Smckusick } 439*41900Smckusick } while (error == EWOULDBLOCK); 440*41900Smckusick if (!error && *mp == NULL) 441*41900Smckusick error = EPIPE; 442*41900Smckusick len -= auio.uio_resid; 44340117Smckusick } 444*41900Smckusick errout: 445*41900Smckusick if (error && rep && error != EINTR && error != ERESTART) { 446*41900Smckusick m_freem(*mp); 447*41900Smckusick *mp = (struct mbuf *)0; 448*41900Smckusick nfs_disconnect(rep->r_nmp); 449*41900Smckusick error = nfs_reconnect(rep, rep->r_nmp); 450*41900Smckusick if (!error) 451*41900Smckusick goto tryagain; 45240117Smckusick } 453*41900Smckusick } else { 454*41900Smckusick if (so->so_state & SS_ISCONNECTED) 455*41900Smckusick getnam = (struct mbuf **)0; 456*41900Smckusick else 457*41900Smckusick getnam = aname; 458*41900Smckusick auio.uio_resid = len = 1000000; 459*41900Smckusick do { 460*41900Smckusick error = soreceive(so, getnam, &auio, mp, 461*41900Smckusick (struct mbuf **)0, &rcvflg); 462*41900Smckusick if (error == EWOULDBLOCK && rep && 463*41900Smckusick (rep->r_flags & R_SOFTTERM)) 464*41900Smckusick return (EINTR); 465*41900Smckusick } while (error == EWOULDBLOCK); 466*41900Smckusick len -= auio.uio_resid; 467*41900Smckusick } 468*41900Smckusick if (error) { 469*41900Smckusick m_freem(*mp); 470*41900Smckusick *mp = (struct mbuf *)0; 471*41900Smckusick } 472*41900Smckusick /* 473*41900Smckusick * Search for any mbufs that are not a multiple of 4 bytes long. 474*41900Smckusick * These could cause pointer alignment problems, so copy them to 475*41900Smckusick * well aligned mbufs. 476*41900Smckusick */ 477*41900Smckusick m = *mp; 478*41900Smckusick mbp = mp; 479*41900Smckusick while (m) { 480*41900Smckusick /* 481*41900Smckusick * All this for something that may never happen. 482*41900Smckusick */ 483*41900Smckusick if (m->m_len & 0x3) { 484*41900Smckusick printf("nfs_rcv odd length!\n"); 485*41900Smckusick fcp = mtod(m, caddr_t); 486*41900Smckusick mnew = m2 = (struct mbuf *)0; 487*41900Smckusick while (m) { 488*41900Smckusick if (m2 == NULL || mlen == 0) { 489*41900Smckusick MGET(m2, M_WAIT, MT_DATA); 490*41900Smckusick if (len > MINCLSIZE) 491*41900Smckusick MCLGET(m2, M_WAIT); 492*41900Smckusick m2->m_len = 0; 493*41900Smckusick mlen = M_TRAILINGSPACE(m2); 494*41900Smckusick tcp = mtod(m2, caddr_t); 495*41900Smckusick if (mnew) { 496*41900Smckusick m3->m_next = m2; 497*41900Smckusick m3 = m2; 498*41900Smckusick } else 499*41900Smckusick mnew = m3 = m2; 500*41900Smckusick } 501*41900Smckusick siz = (mlen > m->m_len) ? m->m_len : mlen; 502*41900Smckusick bcopy(fcp, tcp, siz); 503*41900Smckusick m2->m_len += siz; 504*41900Smckusick mlen -= siz; 505*41900Smckusick len -= siz; 506*41900Smckusick tcp += siz; 507*41900Smckusick m->m_len -= siz; 508*41900Smckusick fcp += siz; 509*41900Smckusick if (m->m_len == 0) { 510*41900Smckusick do { 511*41900Smckusick m = m->m_next; 512*41900Smckusick } while (m && m->m_len == 0); 513*41900Smckusick if (m) 514*41900Smckusick fcp = mtod(m, caddr_t); 515*41900Smckusick } 516*41900Smckusick } 517*41900Smckusick m = *mbp; 518*41900Smckusick *mbp = mnew; 519*41900Smckusick m_freem(m); 520*41900Smckusick break; 52140117Smckusick } 522*41900Smckusick len -= m->m_len; 523*41900Smckusick mbp = &m->m_next; 524*41900Smckusick m = m->m_next; 52538414Smckusick } 52638414Smckusick return (error); 52738414Smckusick } 52838414Smckusick 52938414Smckusick struct rpc_replyhead { 53038414Smckusick u_long r_xid; 53138414Smckusick u_long r_rep; 53238414Smckusick }; 53338414Smckusick 53438414Smckusick /* 535*41900Smckusick * Implement receipt of reply on a socket. 53638414Smckusick * We must search through the list of received datagrams matching them 53738414Smckusick * with outstanding requests using the xid, until ours is found. 53838414Smckusick */ 539*41900Smckusick /* ARGSUSED */ 540*41900Smckusick nfs_reply(nmp, myrep) 541*41900Smckusick struct nfsmount *nmp; 54239344Smckusick struct nfsreq *myrep; 54338414Smckusick { 54438414Smckusick register struct mbuf *m; 54538414Smckusick register struct nfsreq *rep; 546*41900Smckusick register int error = 0; 54738414Smckusick struct rpc_replyhead replyh; 548*41900Smckusick struct mbuf *mp, *nam; 549*41900Smckusick char *cp; 550*41900Smckusick int cnt, xfer; 55138414Smckusick 552*41900Smckusick /* 553*41900Smckusick * Loop around until we get our own reply 554*41900Smckusick */ 555*41900Smckusick for (;;) { 556*41900Smckusick /* 557*41900Smckusick * Lock against other receivers so that I don't get stuck in 558*41900Smckusick * sbwait() after someone else has received my reply for me. 559*41900Smckusick * Also necessary for connection based protocols to avoid 560*41900Smckusick * race conditions during a reconnect. 561*41900Smckusick */ 562*41900Smckusick nfs_solock(&nmp->nm_flag, 1); 563*41900Smckusick /* Already received, bye bye */ 564*41900Smckusick if (myrep->r_mrep != NULL) { 565*41900Smckusick nfs_sounlock(&nmp->nm_flag); 566*41900Smckusick return (0); 56740117Smckusick } 568*41900Smckusick /* 569*41900Smckusick * Get the next Rpc reply off the socket 570*41900Smckusick */ 571*41900Smckusick if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) { 572*41900Smckusick nfs_sounlock(&nmp->nm_flag); 57338414Smckusick 574*41900Smckusick /* 575*41900Smckusick * Ignore routing errors on connectionless protocols?? 576*41900Smckusick */ 577*41900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 578*41900Smckusick nmp->nm_so->so_error = 0; 579*41900Smckusick continue; 580*41900Smckusick } 581*41900Smckusick 582*41900Smckusick /* 583*41900Smckusick * Otherwise cleanup and return a fatal error. 584*41900Smckusick */ 585*41900Smckusick if (myrep->r_flags & R_TIMING) { 586*41900Smckusick myrep->r_flags &= ~R_TIMING; 587*41900Smckusick nmp->nm_rtt = -1; 588*41900Smckusick } 589*41900Smckusick if (myrep->r_flags & R_SENT) { 590*41900Smckusick myrep->r_flags &= ~R_SENT; 591*41900Smckusick nmp->nm_sent--; 592*41900Smckusick } 593*41900Smckusick return (error); 59438414Smckusick } 595*41900Smckusick 596*41900Smckusick /* 597*41900Smckusick * Get the xid and check that it is an rpc reply 598*41900Smckusick */ 599*41900Smckusick m = mp; 600*41900Smckusick if (m->m_len >= 2*NFSX_UNSIGNED) 601*41900Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 602*41900Smckusick 2*NFSX_UNSIGNED); 603*41900Smckusick else { 604*41900Smckusick cnt = 2*NFSX_UNSIGNED; 605*41900Smckusick cp = (caddr_t)&replyh; 606*41900Smckusick while (m && cnt > 0) { 607*41900Smckusick if (m->m_len > 0) { 608*41900Smckusick xfer = (m->m_len >= cnt) ? cnt : 609*41900Smckusick m->m_len; 610*41900Smckusick bcopy(mtod(m, caddr_t), cp, xfer); 611*41900Smckusick cnt -= xfer; 612*41900Smckusick cp += xfer; 613*41900Smckusick } 614*41900Smckusick if (cnt > 0) 615*41900Smckusick m = m->m_next; 616*41900Smckusick } 61740117Smckusick } 618*41900Smckusick if (replyh.r_rep != rpc_reply || m == NULL) { 61940117Smckusick nfsstats.rpcinvalid++; 620*41900Smckusick m_freem(mp); 621*41900Smckusick nfs_sounlock(&nmp->nm_flag); 622*41900Smckusick continue; 62338414Smckusick } 624*41900Smckusick /* 625*41900Smckusick * Loop through the request list to match up the reply 626*41900Smckusick * Iff no match, just drop the datagram 627*41900Smckusick */ 628*41900Smckusick m = mp; 629*41900Smckusick rep = nfsreqh.r_next; 630*41900Smckusick while (rep != &nfsreqh) { 631*41900Smckusick if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) { 632*41900Smckusick /* Found it.. */ 633*41900Smckusick rep->r_mrep = m; 634*41900Smckusick /* 635*41900Smckusick * Update timing 636*41900Smckusick */ 637*41900Smckusick if (rep->r_flags & R_TIMING) { 638*41900Smckusick nfs_updatetimer(rep->r_nmp); 639*41900Smckusick rep->r_flags &= ~R_TIMING; 640*41900Smckusick rep->r_nmp->nm_rtt = -1; 641*41900Smckusick } 642*41900Smckusick if (rep->r_flags & R_SENT) { 643*41900Smckusick rep->r_flags &= ~R_SENT; 644*41900Smckusick rep->r_nmp->nm_sent--; 645*41900Smckusick } 64640117Smckusick break; 64738414Smckusick } 648*41900Smckusick rep = rep->r_next; 64938414Smckusick } 650*41900Smckusick nfs_sounlock(&nmp->nm_flag); 651*41900Smckusick if (nam) 652*41900Smckusick m_freem(nam); 653*41900Smckusick /* 654*41900Smckusick * If not matched to a request, drop it. 655*41900Smckusick * If it's mine, get out. 656*41900Smckusick */ 657*41900Smckusick if (rep == &nfsreqh) { 658*41900Smckusick nfsstats.rpcunexpected++; 659*41900Smckusick m_freem(m); 660*41900Smckusick } else if (rep == myrep) 661*41900Smckusick return (0); 66238414Smckusick } 66338414Smckusick } 66438414Smckusick 66538414Smckusick /* 66638414Smckusick * nfs_request - goes something like this 66738414Smckusick * - fill in request struct 66838414Smckusick * - links it into list 669*41900Smckusick * - calls nfs_send() for first transmit 670*41900Smckusick * - calls nfs_receive() to get reply 67138414Smckusick * - break down rpc header and return with nfs reply pointed to 67238414Smckusick * by mrep or error 67338414Smckusick * nb: always frees up mreq mbuf list 67438414Smckusick */ 675*41900Smckusick nfs_request(vp, mreq, xid, procnum, procp, mp, mrp, mdp, dposp) 67638414Smckusick struct vnode *vp; 67738414Smckusick struct mbuf *mreq; 67838414Smckusick u_long xid; 679*41900Smckusick int procnum; 680*41900Smckusick struct proc *procp; 68138414Smckusick struct mount *mp; 68238414Smckusick struct mbuf **mrp; 68338414Smckusick struct mbuf **mdp; 68438414Smckusick caddr_t *dposp; 68538414Smckusick { 68638414Smckusick register struct mbuf *m, *mrep; 68738414Smckusick register struct nfsreq *rep; 68838414Smckusick register u_long *p; 68938414Smckusick register int len; 690*41900Smckusick struct nfsmount *nmp; 69138414Smckusick struct mbuf *md; 69239344Smckusick struct nfsreq *reph; 69338414Smckusick caddr_t dpos; 69438414Smckusick char *cp2; 69538414Smckusick int t1; 69638414Smckusick int s; 697*41900Smckusick int error = 0; 69838414Smckusick 699*41900Smckusick nmp = VFSTONFS(mp); 70038414Smckusick m = mreq; 70138414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 70238414Smckusick rep->r_xid = xid; 703*41900Smckusick rep->r_nmp = nmp; 70438414Smckusick rep->r_vp = vp; 705*41900Smckusick rep->r_procp = procp; 706*41900Smckusick if (nmp->nm_flag & NFSMNT_SOFT) 707*41900Smckusick rep->r_retry = nmp->nm_retry; 70838414Smckusick else 70940117Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 71040117Smckusick rep->r_flags = rep->r_rexmit = 0; 711*41900Smckusick /* 712*41900Smckusick * Three cases: 713*41900Smckusick * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO 714*41900Smckusick * - idempotent requests on SOCK_DGRAM use 0 715*41900Smckusick * - Reliable transports, NFS_RELIABLETIMEO 716*41900Smckusick * Timeouts are still done on reliable transports to ensure detection 717*41900Smckusick * of connection loss. 718*41900Smckusick */ 719*41900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 720*41900Smckusick rep->r_timerinit = -NFS_RELIABLETIMEO; 721*41900Smckusick else if (nonidempotent[procnum]) 722*41900Smckusick rep->r_timerinit = -NFS_MINIDEMTIMEO; 723*41900Smckusick else 724*41900Smckusick rep->r_timerinit = 0; 725*41900Smckusick rep->r_timer = rep->r_timerinit; 72638414Smckusick rep->r_mrep = NULL; 72738414Smckusick len = 0; 72838414Smckusick while (m) { 72938414Smckusick len += m->m_len; 73038414Smckusick m = m->m_next; 73138414Smckusick } 732*41900Smckusick mreq->m_pkthdr.len = len; 733*41900Smckusick mreq->m_pkthdr.rcvif = (struct ifnet *)0; 734*41900Smckusick /* 735*41900Smckusick * For non-atomic protocols, insert a Sun RPC Record Mark. 736*41900Smckusick */ 737*41900Smckusick if ((nmp->nm_soflags & PR_ATOMIC) == 0) { 738*41900Smckusick M_PREPEND(mreq, sizeof(u_long), M_WAIT); 739*41900Smckusick *mtod(mreq, u_long *) = htonl(0x80000000 | len); 740*41900Smckusick } 741*41900Smckusick rep->r_mreq = mreq; 74238414Smckusick 74340117Smckusick /* 74440117Smckusick * Do the client side RPC. 74540117Smckusick */ 74640117Smckusick nfsstats.rpcrequests++; 747*41900Smckusick /* 748*41900Smckusick * Chain request into list of outstanding requests. Be sure 749*41900Smckusick * to put it LAST so timer finds oldest requests first. 750*41900Smckusick */ 75140117Smckusick s = splnet(); 75239344Smckusick reph = &nfsreqh; 753*41900Smckusick reph->r_prev->r_next = rep; 754*41900Smckusick rep->r_prev = reph->r_prev; 75539344Smckusick reph->r_prev = rep; 75639344Smckusick rep->r_next = reph; 75740117Smckusick /* 75840117Smckusick * If backing off another request or avoiding congestion, don't 75940117Smckusick * send this one now but let timer do it. If not timing a request, 76040117Smckusick * do it now. 76140117Smckusick */ 762*41900Smckusick if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM || 763*41900Smckusick (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) { 764*41900Smckusick nmp->nm_sent++; 765*41900Smckusick rep->r_flags |= R_SENT; 766*41900Smckusick if (nmp->nm_rtt == -1) { 767*41900Smckusick nmp->nm_rtt = 0; 768*41900Smckusick rep->r_flags |= R_TIMING; 769*41900Smckusick } 77040117Smckusick splx(s); 771*41900Smckusick m = m_copym(mreq, 0, M_COPYALL, M_WAIT); 772*41900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 773*41900Smckusick nfs_solock(&nmp->nm_flag, 1); 774*41900Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 775*41900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) 776*41900Smckusick nfs_sounlock(&nmp->nm_flag); 777*41900Smckusick if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 778*41900Smckusick nmp->nm_so->so_error = error = 0; 779*41900Smckusick } else 780*41900Smckusick splx(s); 78138414Smckusick 78238414Smckusick /* 78340117Smckusick * Wait for the reply from our send or the timer's. 78440117Smckusick */ 785*41900Smckusick if (!error) 786*41900Smckusick error = nfs_reply(nmp, rep); 78738414Smckusick 78840117Smckusick /* 78940117Smckusick * RPC done, unlink the request. 79040117Smckusick */ 79138414Smckusick s = splnet(); 79238414Smckusick rep->r_prev->r_next = rep->r_next; 79339344Smckusick rep->r_next->r_prev = rep->r_prev; 79438414Smckusick splx(s); 795*41900Smckusick 796*41900Smckusick /* 797*41900Smckusick * If there was a successful reply and a tprintf msg. 798*41900Smckusick * tprintf a response. 799*41900Smckusick */ 800*41900Smckusick if (!error && (rep->r_flags & R_TPRINTFMSG)) { 801*41900Smckusick if (rep->r_procp) 802*41900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 803*41900Smckusick "Nfs server %s, is alive again\n", 804*41900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 805*41900Smckusick else 806*41900Smckusick tprintf(NULLVP, "Nfs server %s, is alive again\n", 807*41900Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 808*41900Smckusick } 80938414Smckusick m_freem(rep->r_mreq); 81038414Smckusick mrep = md = rep->r_mrep; 81138414Smckusick FREE((caddr_t)rep, M_NFSREQ); 81238414Smckusick if (error) 81338414Smckusick return (error); 81438414Smckusick 81538414Smckusick /* 81638414Smckusick * break down the rpc header and check if ok 81738414Smckusick */ 81838414Smckusick dpos = mtod(md, caddr_t); 81938414Smckusick nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED); 82038414Smckusick p += 2; 82138414Smckusick if (*p++ == rpc_msgdenied) { 82238414Smckusick if (*p == rpc_mismatch) 82338414Smckusick error = EOPNOTSUPP; 82438414Smckusick else 82538414Smckusick error = EACCES; 82638414Smckusick m_freem(mrep); 82738414Smckusick return (error); 82838414Smckusick } 82938414Smckusick /* 83038414Smckusick * skip over the auth_verf, someday we may want to cache auth_short's 83138414Smckusick * for nfs_reqhead(), but for now just dump it 83238414Smckusick */ 83338414Smckusick if (*++p != 0) { 83438414Smckusick len = nfsm_rndup(fxdr_unsigned(long, *p)); 83538414Smckusick nfsm_adv(len); 83638414Smckusick } 83738414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 83838414Smckusick /* 0 == ok */ 83938414Smckusick if (*p == 0) { 84038414Smckusick nfsm_disect(p, u_long *, NFSX_UNSIGNED); 84138414Smckusick if (*p != 0) { 84238414Smckusick error = fxdr_unsigned(int, *p); 84338414Smckusick m_freem(mrep); 84438414Smckusick return (error); 84538414Smckusick } 84638414Smckusick *mrp = mrep; 84738414Smckusick *mdp = md; 84838414Smckusick *dposp = dpos; 84938414Smckusick return (0); 85038414Smckusick } 85138414Smckusick m_freem(mrep); 85238414Smckusick return (EPROTONOSUPPORT); 85338414Smckusick nfsmout: 85438414Smckusick return (error); 85538414Smckusick } 85638414Smckusick 85738414Smckusick /* 85838414Smckusick * Get a request for the server main loop 85938414Smckusick * - receive a request via. nfs_soreceive() 86038414Smckusick * - verify it 86138414Smckusick * - fill in the cred struct. 86238414Smckusick */ 86339754Smckusick nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, proc, cr, 864*41900Smckusick lockp, msk, mtch) 86538414Smckusick struct socket *so; 86638414Smckusick u_long prog; 86738414Smckusick u_long vers; 86838414Smckusick int maxproc; 86938414Smckusick struct mbuf **nam; 87038414Smckusick struct mbuf **mrp; 87138414Smckusick struct mbuf **mdp; 87238414Smckusick caddr_t *dposp; 87338414Smckusick u_long *retxid; 87438414Smckusick u_long *proc; 87538414Smckusick register struct ucred *cr; 876*41900Smckusick int *lockp; 877*41900Smckusick struct mbuf *msk, *mtch; 87838414Smckusick { 87938414Smckusick register int i; 88039494Smckusick register u_long *p; 88139494Smckusick register long t1; 88239494Smckusick caddr_t dpos, cp2; 88339494Smckusick int error = 0; 88439494Smckusick struct mbuf *mrep, *md; 88539494Smckusick int len; 88638414Smckusick 887*41900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 888*41900Smckusick nfs_solock(lockp, 0); 889*41900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 890*41900Smckusick nfs_sounlock(lockp); 891*41900Smckusick } else { 892*41900Smckusick mrep = (struct mbuf *)0; 893*41900Smckusick do { 894*41900Smckusick if (mrep) { 895*41900Smckusick m_freem(*nam); 896*41900Smckusick m_freem(mrep); 897*41900Smckusick } 898*41900Smckusick error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0); 899*41900Smckusick } while (!error && nfs_badnam(*nam, msk, mtch)); 900*41900Smckusick } 901*41900Smckusick if (error) 90238414Smckusick return (error); 90338414Smckusick md = mrep; 90438414Smckusick dpos = mtod(mrep, caddr_t); 90538414Smckusick nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED); 90638414Smckusick *retxid = *p++; 90738414Smckusick if (*p++ != rpc_call) { 90838414Smckusick m_freem(mrep); 90938414Smckusick return (ERPCMISMATCH); 91038414Smckusick } 91138414Smckusick if (*p++ != rpc_vers) { 91238414Smckusick m_freem(mrep); 91338414Smckusick return (ERPCMISMATCH); 91438414Smckusick } 91538414Smckusick if (*p++ != prog) { 91638414Smckusick m_freem(mrep); 91738414Smckusick return (EPROGUNAVAIL); 91838414Smckusick } 91938414Smckusick if (*p++ != vers) { 92038414Smckusick m_freem(mrep); 92138414Smckusick return (EPROGMISMATCH); 92238414Smckusick } 92338414Smckusick *proc = fxdr_unsigned(u_long, *p++); 92438414Smckusick if (*proc == NFSPROC_NULL) { 92538414Smckusick *mrp = mrep; 92638414Smckusick return (0); 92738414Smckusick } 92838414Smckusick if (*proc > maxproc || *p++ != rpc_auth_unix) { 92938414Smckusick m_freem(mrep); 93038414Smckusick return (EPROCUNAVAIL); 93138414Smckusick } 932*41900Smckusick len = fxdr_unsigned(int, *p++); 933*41900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 934*41900Smckusick m_freem(mrep); 935*41900Smckusick return (EBADRPC); 936*41900Smckusick } 93739494Smckusick len = fxdr_unsigned(int, *++p); 938*41900Smckusick if (len < 0 || len > NFS_MAXNAMLEN) { 939*41900Smckusick m_freem(mrep); 940*41900Smckusick return (EBADRPC); 941*41900Smckusick } 94239494Smckusick nfsm_adv(nfsm_rndup(len)); 94338414Smckusick nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED); 94438414Smckusick cr->cr_uid = fxdr_unsigned(uid_t, *p++); 94538414Smckusick cr->cr_gid = fxdr_unsigned(gid_t, *p++); 94639494Smckusick len = fxdr_unsigned(int, *p); 947*41900Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) { 94838414Smckusick m_freem(mrep); 94938414Smckusick return (EBADRPC); 95038414Smckusick } 95139494Smckusick nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED); 95239494Smckusick for (i = 1; i <= len; i++) 953*41900Smckusick if (i < NGROUPS) 954*41900Smckusick cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++); 955*41900Smckusick else 956*41900Smckusick p++; 957*41900Smckusick cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 95838414Smckusick /* 95938414Smckusick * Do we have any use for the verifier. 96038414Smckusick * According to the "Remote Procedure Call Protocol Spec." it 96138414Smckusick * should be AUTH_NULL, but some clients make it AUTH_UNIX? 96238414Smckusick * For now, just skip over it 96338414Smckusick */ 96439494Smckusick len = fxdr_unsigned(int, *++p); 965*41900Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) { 966*41900Smckusick m_freem(mrep); 967*41900Smckusick return (EBADRPC); 968*41900Smckusick } 96939494Smckusick if (len > 0) 97039494Smckusick nfsm_adv(nfsm_rndup(len)); 97138414Smckusick *mrp = mrep; 97238414Smckusick *mdp = md; 97338414Smckusick *dposp = dpos; 97438414Smckusick return (0); 97538414Smckusick nfsmout: 97638414Smckusick return (error); 97738414Smckusick } 97838414Smckusick 97938414Smckusick /* 98038414Smckusick * Generate the rpc reply header 98138414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile 98238414Smckusick */ 98338414Smckusick nfs_rephead(siz, retxid, err, mrq, mbp, bposp) 98438414Smckusick int siz; 98538414Smckusick u_long retxid; 98638414Smckusick int err; 98738414Smckusick struct mbuf **mrq; 98838414Smckusick struct mbuf **mbp; 98938414Smckusick caddr_t *bposp; 99038414Smckusick { 99139494Smckusick register u_long *p; 99239494Smckusick register long t1; 99339494Smckusick caddr_t bpos; 99439494Smckusick struct mbuf *mreq, *mb, *mb2; 99538414Smckusick 99638414Smckusick NFSMGETHDR(mreq); 99738414Smckusick mb = mreq; 99838414Smckusick if ((siz+RPC_REPLYSIZ) > MHLEN) 999*41900Smckusick MCLGET(mreq, M_WAIT); 100038414Smckusick p = mtod(mreq, u_long *); 100138414Smckusick mreq->m_len = 6*NFSX_UNSIGNED; 100238414Smckusick bpos = ((caddr_t)p)+mreq->m_len; 100338414Smckusick *p++ = retxid; 100438414Smckusick *p++ = rpc_reply; 100538414Smckusick if (err == ERPCMISMATCH) { 100638414Smckusick *p++ = rpc_msgdenied; 100738414Smckusick *p++ = rpc_mismatch; 100838414Smckusick *p++ = txdr_unsigned(2); 100938414Smckusick *p = txdr_unsigned(2); 101038414Smckusick } else { 101138414Smckusick *p++ = rpc_msgaccepted; 101238414Smckusick *p++ = 0; 101338414Smckusick *p++ = 0; 101438414Smckusick switch (err) { 101538414Smckusick case EPROGUNAVAIL: 101638414Smckusick *p = txdr_unsigned(RPC_PROGUNAVAIL); 101738414Smckusick break; 101838414Smckusick case EPROGMISMATCH: 101938414Smckusick *p = txdr_unsigned(RPC_PROGMISMATCH); 102038414Smckusick nfsm_build(p, u_long *, 2*NFSX_UNSIGNED); 102138414Smckusick *p++ = txdr_unsigned(2); 102238414Smckusick *p = txdr_unsigned(2); /* someday 3 */ 102338414Smckusick break; 102438414Smckusick case EPROCUNAVAIL: 102538414Smckusick *p = txdr_unsigned(RPC_PROCUNAVAIL); 102638414Smckusick break; 102738414Smckusick default: 102838414Smckusick *p = 0; 102938414Smckusick if (err != VNOVAL) { 103038414Smckusick nfsm_build(p, u_long *, NFSX_UNSIGNED); 103138414Smckusick *p = txdr_unsigned(err); 103238414Smckusick } 103338414Smckusick break; 103438414Smckusick }; 103538414Smckusick } 103638414Smckusick *mrq = mreq; 103738414Smckusick *mbp = mb; 103838414Smckusick *bposp = bpos; 103938414Smckusick if (err != 0 && err != VNOVAL) 104038414Smckusick nfsstats.srvrpc_errs++; 104138414Smckusick return (0); 104238414Smckusick } 104338414Smckusick 104438414Smckusick /* 104538414Smckusick * Nfs timer routine 104638414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out 104738414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make 104840117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0). 104938414Smckusick */ 105038414Smckusick nfs_timer() 105138414Smckusick { 105238414Smckusick register struct nfsreq *rep; 105338414Smckusick register struct mbuf *m; 105438414Smckusick register struct socket *so; 1055*41900Smckusick register struct nfsmount *nmp; 105640117Smckusick int s, error; 105738414Smckusick 105838414Smckusick s = splnet(); 1059*41900Smckusick for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 1060*41900Smckusick nmp = rep->r_nmp; 1061*41900Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) || 1062*41900Smckusick (so = nmp->nm_so) == NULL) 1063*41900Smckusick continue; 1064*41900Smckusick if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) { 1065*41900Smckusick rep->r_flags |= R_SOFTTERM; 1066*41900Smckusick continue; 1067*41900Smckusick } 106840117Smckusick if (rep->r_flags & R_TIMING) /* update rtt in mount */ 1069*41900Smckusick nmp->nm_rtt++; 1070*41900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) 107140117Smckusick continue; 1072*41900Smckusick /* If not timed out */ 1073*41900Smckusick if (++rep->r_timer < nmp->nm_rto) 1074*41900Smckusick continue; 1075*41900Smckusick #ifdef notdef 1076*41900Smckusick if (nmp->nm_sotype != SOCK_DGRAM) { 1077*41900Smckusick rep->r_flags |= R_MUSTRESEND; 1078*41900Smckusick rep->r_timer = rep->r_timerinit; 1079*41900Smckusick continue; 1080*41900Smckusick } 1081*41900Smckusick #endif 108240117Smckusick /* Do backoff and save new timeout in mount */ 108340117Smckusick if (rep->r_flags & R_TIMING) { 1084*41900Smckusick nfs_backofftimer(nmp); 108540117Smckusick rep->r_flags &= ~R_TIMING; 1086*41900Smckusick nmp->nm_rtt = -1; 108740117Smckusick } 108840117Smckusick if (rep->r_flags & R_SENT) { 108940117Smckusick rep->r_flags &= ~R_SENT; 1090*41900Smckusick nmp->nm_sent--; 109140117Smckusick } 1092*41900Smckusick 1093*41900Smckusick /* 1094*41900Smckusick * Check for too many retries on soft mount. 1095*41900Smckusick * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1 1096*41900Smckusick */ 1097*41900Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT) 109840117Smckusick rep->r_rexmit = NFS_MAXREXMIT; 109940117Smckusick 1100*41900Smckusick /* 1101*41900Smckusick * Check for server not responding 1102*41900Smckusick */ 1103*41900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1104*41900Smckusick rep->r_rexmit > 8) { 1105*41900Smckusick if (rep->r_procp && rep->r_procp->p_session) 1106*41900Smckusick tprintf(rep->r_procp->p_session->s_ttyvp, 1107*41900Smckusick "Nfs server %s, not responding\n", 1108*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 1109*41900Smckusick else 1110*41900Smckusick tprintf(NULLVP, 1111*41900Smckusick "Nfs server %s, not responding\n", 1112*41900Smckusick nmp->nm_mountp->mnt_stat.f_mntfromname); 1113*41900Smckusick rep->r_flags |= R_TPRINTFMSG; 1114*41900Smckusick } 1115*41900Smckusick if (rep->r_rexmit > rep->r_retry) { /* too many */ 1116*41900Smckusick nfsstats.rpctimeouts++; 1117*41900Smckusick rep->r_flags |= R_SOFTTERM; 1118*41900Smckusick continue; 1119*41900Smckusick } 1120*41900Smckusick 1121*41900Smckusick /* 1122*41900Smckusick * If there is enough space and the window allows.. 1123*41900Smckusick * Resend it 1124*41900Smckusick */ 1125*41900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1126*41900Smckusick nmp->nm_sent < nmp->nm_window && 1127*41900Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1128*41900Smckusick nfsstats.rpcretries++; 1129*41900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1130*41900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1131*41900Smckusick (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0); 1132*41900Smckusick else 1133*41900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1134*41900Smckusick nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0); 1135*41900Smckusick if (error) { 1136*41900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1137*41900Smckusick so->so_error = 0; 1138*41900Smckusick } else { 1139*41900Smckusick /* 1140*41900Smckusick * We need to time the request even though we 1141*41900Smckusick * are retransmitting. 1142*41900Smckusick */ 1143*41900Smckusick nmp->nm_rtt = 0; 1144*41900Smckusick nmp->nm_sent++; 1145*41900Smckusick rep->r_flags |= (R_SENT|R_TIMING); 1146*41900Smckusick rep->r_timer = rep->r_timerinit; 1147*41900Smckusick } 1148*41900Smckusick } 114940117Smckusick } 115040117Smckusick splx(s); 115140117Smckusick timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 115240117Smckusick } 115340117Smckusick 115440117Smckusick /* 115540117Smckusick * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is 115640117Smckusick * used here. The timer state is held in the nfsmount structure and 115740117Smckusick * a single request is used to clock the response. When successful 115840117Smckusick * the rtt smoothing in nfs_updatetimer is used, when failed the backoff 115940117Smckusick * is done by nfs_backofftimer. We also log failure messages in these 116040117Smckusick * routines. 116140117Smckusick * 116240117Smckusick * Congestion variables are held in the nfshost structure which 116340117Smckusick * is referenced by nfsmounts and shared per-server. This separation 116440117Smckusick * makes it possible to do per-mount timing which allows varying disk 116540117Smckusick * access times to be dealt with, while preserving a network oriented 116640117Smckusick * congestion control scheme. 116740117Smckusick * 116840117Smckusick * The windowing implements the Jacobson/Karels slowstart algorithm 116940117Smckusick * with adjusted scaling factors. We start with one request, then send 117040117Smckusick * 4 more after each success until the ssthresh limit is reached, then 117140117Smckusick * we increment at a rate proportional to the window. On failure, we 117240117Smckusick * remember 3/4 the current window and clamp the send limit to 1. Note 117340117Smckusick * ICMP source quench is not reflected in so->so_error so we ignore that 117440117Smckusick * for now. 117540117Smckusick * 117640117Smckusick * NFS behaves much more like a transport protocol with these changes, 117740117Smckusick * shedding the teenage pedal-to-the-metal tendencies of "other" 117840117Smckusick * implementations. 117940117Smckusick * 118040117Smckusick * Timers and congestion avoidance by Tom Talpey, Open Software Foundation. 118140117Smckusick */ 118240117Smckusick 118340117Smckusick /* 118440117Smckusick * The TCP algorithm was not forgiving enough. Because the NFS server 118540117Smckusick * responds only after performing lookups/diskio/etc, we have to be 118640117Smckusick * more prepared to accept a spiky variance. The TCP algorithm is: 1187*41900Smckusick * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1) 118840117Smckusick */ 1189*41900Smckusick #define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar) 119040117Smckusick 1191*41900Smckusick nfs_updatetimer(nmp) 1192*41900Smckusick register struct nfsmount *nmp; 119340117Smckusick { 119440117Smckusick 119540117Smckusick /* If retransmitted, clear and return */ 1196*41900Smckusick if (nmp->nm_rexmit || nmp->nm_currexmit) { 1197*41900Smckusick nmp->nm_rexmit = nmp->nm_currexmit = 0; 119840117Smckusick return; 119940117Smckusick } 120040117Smckusick /* If have a measurement, do smoothing */ 1201*41900Smckusick if (nmp->nm_srtt) { 120240117Smckusick register short delta; 1203*41900Smckusick delta = nmp->nm_rtt - (nmp->nm_srtt >> 3); 1204*41900Smckusick if ((nmp->nm_srtt += delta) <= 0) 1205*41900Smckusick nmp->nm_srtt = 1; 120640117Smckusick if (delta < 0) 120740117Smckusick delta = -delta; 1208*41900Smckusick delta -= (nmp->nm_rttvar >> 2); 1209*41900Smckusick if ((nmp->nm_rttvar += delta) <= 0) 1210*41900Smckusick nmp->nm_rttvar = 1; 121140117Smckusick /* Else initialize */ 121240117Smckusick } else { 1213*41900Smckusick nmp->nm_rttvar = nmp->nm_rtt << 1; 1214*41900Smckusick if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2; 1215*41900Smckusick nmp->nm_srtt = nmp->nm_rttvar << 2; 121640117Smckusick } 121740117Smckusick /* Compute new Retransmission TimeOut and clip */ 1218*41900Smckusick nmp->nm_rto = NFS_RTO(nmp); 1219*41900Smckusick if (nmp->nm_rto < NFS_MINTIMEO) 1220*41900Smckusick nmp->nm_rto = NFS_MINTIMEO; 1221*41900Smckusick else if (nmp->nm_rto > NFS_MAXTIMEO) 1222*41900Smckusick nmp->nm_rto = NFS_MAXTIMEO; 122340117Smckusick 122440117Smckusick /* Update window estimate */ 1225*41900Smckusick if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */ 1226*41900Smckusick nmp->nm_window += 4; 122740117Smckusick else { /* slowly */ 1228*41900Smckusick register long incr = ++nmp->nm_winext; 1229*41900Smckusick incr = (incr * incr) / nmp->nm_window; 123040117Smckusick if (incr > 0) { 1231*41900Smckusick nmp->nm_winext = 0; 1232*41900Smckusick ++nmp->nm_window; 123340117Smckusick } 123440117Smckusick } 1235*41900Smckusick if (nmp->nm_window > NFS_MAXWINDOW) 1236*41900Smckusick nmp->nm_window = NFS_MAXWINDOW; 123740117Smckusick } 123840117Smckusick 1239*41900Smckusick nfs_backofftimer(nmp) 1240*41900Smckusick register struct nfsmount *nmp; 124140117Smckusick { 124240117Smckusick register unsigned long newrto; 124340117Smckusick 124440117Smckusick /* Clip shift count */ 1245*41900Smckusick if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto) 1246*41900Smckusick nmp->nm_rexmit = 8 * sizeof nmp->nm_rto; 124740117Smckusick /* Back off RTO exponentially */ 1248*41900Smckusick newrto = NFS_RTO(nmp); 1249*41900Smckusick newrto <<= (nmp->nm_rexmit - 1); 125040117Smckusick if (newrto == 0 || newrto > NFS_MAXTIMEO) 125140117Smckusick newrto = NFS_MAXTIMEO; 1252*41900Smckusick nmp->nm_rto = newrto; 125340117Smckusick 125440117Smckusick /* If too many retries, message, assume a bogus RTT and re-measure */ 1255*41900Smckusick if (nmp->nm_currexmit < nmp->nm_rexmit) { 1256*41900Smckusick nmp->nm_currexmit = nmp->nm_rexmit; 1257*41900Smckusick if (nmp->nm_currexmit >= nfsrexmtthresh) { 1258*41900Smckusick if (nmp->nm_currexmit == nfsrexmtthresh) { 1259*41900Smckusick nmp->nm_rttvar += (nmp->nm_srtt >> 2); 1260*41900Smckusick nmp->nm_srtt = 0; 126138414Smckusick } 126238414Smckusick } 126338414Smckusick } 126440117Smckusick /* Close down window but remember this point (3/4 current) for later */ 1265*41900Smckusick nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2; 1266*41900Smckusick nmp->nm_window = 1; 1267*41900Smckusick nmp->nm_winext = 0; 126838414Smckusick } 126938414Smckusick 127038414Smckusick /* 1271*41900Smckusick * Test for a termination signal pending on procp. 1272*41900Smckusick * This is used for NFSMNT_INT mounts. 127338414Smckusick */ 1274*41900Smckusick nfs_sigintr(p) 1275*41900Smckusick register struct proc *p; 1276*41900Smckusick { 1277*41900Smckusick if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 1278*41900Smckusick NFSINT_SIGMASK)) 1279*41900Smckusick return (1); 1280*41900Smckusick else 1281*41900Smckusick return (0); 1282*41900Smckusick } 128340117Smckusick 1284*41900Smckusick /* 1285*41900Smckusick * Lock a socket against others. 1286*41900Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1287*41900Smckusick * and also to avoid race conditions between the processes with nfs requests 1288*41900Smckusick * in progress when a reconnect is necessary. 1289*41900Smckusick */ 1290*41900Smckusick nfs_solock(flagp, cant_intr) 1291*41900Smckusick int *flagp; 1292*41900Smckusick int cant_intr; 129338414Smckusick { 129440117Smckusick 1295*41900Smckusick while (*flagp & NFSMNT_SCKLOCK) { 1296*41900Smckusick *flagp |= NFSMNT_WANTSCK; 1297*41900Smckusick if (cant_intr) 1298*41900Smckusick (void) sleep((caddr_t)flagp, PZERO-7); 1299*41900Smckusick else 1300*41900Smckusick (void) tsleep((caddr_t)flagp, PZERO+1, "nfssolck", 0); 130140117Smckusick } 1302*41900Smckusick *flagp |= NFSMNT_SCKLOCK; 1303*41900Smckusick } 130440117Smckusick 1305*41900Smckusick /* 1306*41900Smckusick * Unlock the stream socket for others. 1307*41900Smckusick */ 1308*41900Smckusick nfs_sounlock(flagp) 1309*41900Smckusick int *flagp; 1310*41900Smckusick { 1311*41900Smckusick 1312*41900Smckusick if ((*flagp & NFSMNT_SCKLOCK) == 0) 1313*41900Smckusick panic("nfs sounlock"); 1314*41900Smckusick *flagp &= ~NFSMNT_SCKLOCK; 1315*41900Smckusick if (*flagp & NFSMNT_WANTSCK) { 1316*41900Smckusick *flagp &= ~NFSMNT_WANTSCK; 1317*41900Smckusick wakeup((caddr_t)flagp); 131840117Smckusick } 131938414Smckusick } 1320*41900Smckusick 1321*41900Smckusick /* 1322*41900Smckusick * This function compares two net addresses by family and returns TRUE 1323*41900Smckusick * if they are the same. 1324*41900Smckusick * If there is any doubt, return FALSE. 1325*41900Smckusick */ 1326*41900Smckusick nfs_netaddr_match(nam1, nam2) 1327*41900Smckusick struct mbuf *nam1, *nam2; 1328*41900Smckusick { 1329*41900Smckusick register struct sockaddr *saddr1, *saddr2; 1330*41900Smckusick 1331*41900Smckusick saddr1 = mtod(nam1, struct sockaddr *); 1332*41900Smckusick saddr2 = mtod(nam2, struct sockaddr *); 1333*41900Smckusick if (saddr1->sa_family != saddr2->sa_family) 1334*41900Smckusick return (0); 1335*41900Smckusick 1336*41900Smckusick /* 1337*41900Smckusick * Must do each address family separately since unused fields 1338*41900Smckusick * are undefined values and not always zeroed. 1339*41900Smckusick */ 1340*41900Smckusick switch (saddr1->sa_family) { 1341*41900Smckusick case AF_INET: 1342*41900Smckusick if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr == 1343*41900Smckusick ((struct sockaddr_in *)saddr2)->sin_addr.s_addr) 1344*41900Smckusick return (1); 1345*41900Smckusick break; 1346*41900Smckusick default: 1347*41900Smckusick break; 1348*41900Smckusick }; 1349*41900Smckusick return (0); 1350*41900Smckusick } 1351*41900Smckusick 1352*41900Smckusick /* 1353*41900Smckusick * Check the hostname fields for nfsd's mask and match fields. 1354*41900Smckusick * By address family: 1355*41900Smckusick * - Bitwise AND the mask with the host address field 1356*41900Smckusick * - Compare for == with match 1357*41900Smckusick * return TRUE if not equal 1358*41900Smckusick */ 1359*41900Smckusick nfs_badnam(nam, msk, mtch) 1360*41900Smckusick register struct mbuf *nam, *msk, *mtch; 1361*41900Smckusick { 1362*41900Smckusick switch (mtod(nam, struct sockaddr *)->sa_family) { 1363*41900Smckusick case AF_INET: 1364*41900Smckusick return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr & 1365*41900Smckusick mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) != 1366*41900Smckusick mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr); 1367*41900Smckusick default: 1368*41900Smckusick printf("nfs_badmatch, unknown sa_family\n"); 1369*41900Smckusick return (0); 1370*41900Smckusick }; 1371*41900Smckusick } 1372