138414Smckusick /*
2*68653Smckusick * Copyright (c) 1989, 1991, 1993, 1995
363484Sbostic * The Regents of the University of California. All rights reserved.
438414Smckusick *
538414Smckusick * This code is derived from software contributed to Berkeley by
638414Smckusick * Rick Macklem at The University of Guelph.
738414Smckusick *
844511Sbostic * %sccs.include.redist.c%
938414Smckusick *
10*68653Smckusick * @(#)nfs_socket.c 8.5 (Berkeley) 03/30/95
1138414Smckusick */
1238414Smckusick
1338414Smckusick /*
1441900Smckusick * Socket operations for use by nfs
1538414Smckusick */
1638414Smckusick
1754615Smckusick #include <sys/param.h>
1855063Spendry #include <sys/systm.h>
1954615Smckusick #include <sys/proc.h>
2054615Smckusick #include <sys/mount.h>
2154615Smckusick #include <sys/kernel.h>
2254615Smckusick #include <sys/mbuf.h>
2354615Smckusick #include <sys/vnode.h>
2454615Smckusick #include <sys/domain.h>
2554615Smckusick #include <sys/protosw.h>
2654615Smckusick #include <sys/socket.h>
2754615Smckusick #include <sys/socketvar.h>
2854615Smckusick #include <sys/syslog.h>
2954615Smckusick #include <sys/tprintf.h>
3056535Sbostic
3154615Smckusick #include <netinet/in.h>
3254615Smckusick #include <netinet/tcp.h>
33*68653Smckusick
3454615Smckusick #include <nfs/rpcv2.h>
35*68653Smckusick #include <nfs/nfsproto.h>
3654615Smckusick #include <nfs/nfs.h>
3754615Smckusick #include <nfs/xdr_subs.h>
3854615Smckusick #include <nfs/nfsm_subs.h>
3954615Smckusick #include <nfs/nfsmount.h>
4054615Smckusick #include <nfs/nfsnode.h>
4154615Smckusick #include <nfs/nfsrtt.h>
4254615Smckusick #include <nfs/nqnfs.h>
4338414Smckusick
4438414Smckusick #define TRUE 1
4543351Smckusick #define FALSE 0
4638414Smckusick
4740117Smckusick /*
4852196Smckusick * Estimate rto for an nfs rpc sent via. an unreliable datagram.
4952196Smckusick * Use the mean and mean deviation of rtt for the appropriate type of rpc
5052196Smckusick * for the frequent rpcs and a default for the others.
5152196Smckusick * The justification for doing "other" this way is that these rpcs
5252196Smckusick * happen so infrequently that timer est. would probably be stale.
5352196Smckusick * Also, since many of these rpcs are
5452196Smckusick * non-idempotent, a conservative timeout is desired.
5552196Smckusick * getattr, lookup - A+2D
5652196Smckusick * read, write - A+4D
5752196Smckusick * other - nm_timeo
5852196Smckusick */
5952196Smckusick #define NFS_RTO(n, t) \
6052196Smckusick ((t) == 0 ? (n)->nm_timeo : \
6152196Smckusick ((t) < 3 ? \
6252196Smckusick (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
6352196Smckusick ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
6452196Smckusick #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
6552196Smckusick #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
6652196Smckusick /*
6738414Smckusick * External data, mostly RPC constants in XDR form
6838414Smckusick */
6938414Smckusick extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
70*68653Smckusick rpc_msgaccepted, rpc_call, rpc_autherr,
7152196Smckusick rpc_auth_kerb;
72*68653Smckusick extern u_long nfs_prog, nqnfs_prog;
7352196Smckusick extern time_t nqnfsstarttime;
74*68653Smckusick extern struct nfsstats nfsstats;
75*68653Smckusick extern int nfsv3_procid[NFS_NPROCS];
76*68653Smckusick extern int nfs_ticks;
7752196Smckusick
7852196Smckusick /*
7952196Smckusick * Defines which timer to use for the procnum.
8052196Smckusick * 0 - default
8152196Smckusick * 1 - getattr
8252196Smckusick * 2 - lookup
8352196Smckusick * 3 - read
8452196Smckusick * 4 - write
8552196Smckusick */
8652196Smckusick static int proct[NFS_NPROCS] = {
87*68653Smckusick 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
88*68653Smckusick 0, 0, 0,
8952196Smckusick };
9052196Smckusick
9152196Smckusick /*
9252196Smckusick * There is a congestion window for outstanding rpcs maintained per mount
9352196Smckusick * point. The cwnd size is adjusted in roughly the way that:
9452196Smckusick * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
9552196Smckusick * SIGCOMM '88". ACM, August 1988.
9652196Smckusick * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
9752196Smckusick * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
9852196Smckusick * of rpcs is in progress.
9952196Smckusick * (The sent count and cwnd are scaled for integer arith.)
10052196Smckusick * Variants of "slow start" were tried and were found to be too much of a
10152196Smckusick * performance hit (ave. rtt 3 times larger),
10252196Smckusick * I suspect due to the large rtt that nfs rpcs have.
10352196Smckusick */
10452196Smckusick #define NFS_CWNDSCALE 256
10552196Smckusick #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
10652196Smckusick static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
10741900Smckusick int nfs_sbwait();
10852196Smckusick void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
10956286Smckusick void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
11052196Smckusick struct mbuf *nfsm_rpchead();
11152196Smckusick int nfsrtton = 0;
11252196Smckusick struct nfsrtt nfsrtt;
11341900Smckusick
11438414Smckusick int nfsrv_null(),
11538414Smckusick nfsrv_getattr(),
11638414Smckusick nfsrv_setattr(),
11738414Smckusick nfsrv_lookup(),
118*68653Smckusick nfsrv3_access(),
11938414Smckusick nfsrv_readlink(),
12038414Smckusick nfsrv_read(),
12138414Smckusick nfsrv_write(),
12238414Smckusick nfsrv_create(),
123*68653Smckusick nfsrv_mknod(),
12438414Smckusick nfsrv_remove(),
12538414Smckusick nfsrv_rename(),
12638414Smckusick nfsrv_link(),
12738414Smckusick nfsrv_symlink(),
12838414Smckusick nfsrv_mkdir(),
12938414Smckusick nfsrv_rmdir(),
13038414Smckusick nfsrv_readdir(),
131*68653Smckusick nfsrv_readdirplus(),
13238414Smckusick nfsrv_statfs(),
133*68653Smckusick nfsrv_fsinfo(),
134*68653Smckusick nfsrv_pathconf(),
135*68653Smckusick nfsrv_commit(),
13652196Smckusick nfsrv_noop(),
13752196Smckusick nqnfsrv_getlease(),
138*68653Smckusick nqnfsrv_vacated();
13938414Smckusick
140*68653Smckusick int (*nfsrv3_procs[NFS_NPROCS])() = {
14138414Smckusick nfsrv_null,
14238414Smckusick nfsrv_getattr,
14338414Smckusick nfsrv_setattr,
14438414Smckusick nfsrv_lookup,
145*68653Smckusick nfsrv3_access,
14638414Smckusick nfsrv_readlink,
14738414Smckusick nfsrv_read,
14838414Smckusick nfsrv_write,
14938414Smckusick nfsrv_create,
150*68653Smckusick nfsrv_mkdir,
151*68653Smckusick nfsrv_symlink,
152*68653Smckusick nfsrv_mknod,
15338414Smckusick nfsrv_remove,
154*68653Smckusick nfsrv_rmdir,
15538414Smckusick nfsrv_rename,
15638414Smckusick nfsrv_link,
15738414Smckusick nfsrv_readdir,
158*68653Smckusick nfsrv_readdirplus,
15938414Smckusick nfsrv_statfs,
160*68653Smckusick nfsrv_fsinfo,
161*68653Smckusick nfsrv_pathconf,
162*68653Smckusick nfsrv_commit,
16352196Smckusick nqnfsrv_getlease,
16452196Smckusick nqnfsrv_vacated,
16556361Smckusick nfsrv_noop,
166*68653Smckusick nfsrv_noop
16738414Smckusick };
16838414Smckusick
16938414Smckusick /*
17041900Smckusick * Initialize sockets and congestion for a new NFS connection.
17140117Smckusick * We do not free the sockaddr if error.
17238414Smckusick */
173*68653Smckusick int
nfs_connect(nmp,rep)17452196Smckusick nfs_connect(nmp, rep)
17540117Smckusick register struct nfsmount *nmp;
17652196Smckusick struct nfsreq *rep;
17740117Smckusick {
17841900Smckusick register struct socket *so;
17952196Smckusick int s, error, rcvreserve, sndreserve;
18052988Smckusick struct sockaddr *saddr;
18152988Smckusick struct sockaddr_in *sin;
18240117Smckusick struct mbuf *m;
18352988Smckusick u_short tport;
18440117Smckusick
18541900Smckusick nmp->nm_so = (struct socket *)0;
18652988Smckusick saddr = mtod(nmp->nm_nam, struct sockaddr *);
187*68653Smckusick error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
188*68653Smckusick nmp->nm_soproto);
189*68653Smckusick if (error)
19040117Smckusick goto bad;
19141900Smckusick so = nmp->nm_so;
19241900Smckusick nmp->nm_soflags = so->so_proto->pr_flags;
19340117Smckusick
19441900Smckusick /*
19552988Smckusick * Some servers require that the client port be a reserved port number.
19652988Smckusick */
19752988Smckusick if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
19852988Smckusick MGET(m, M_WAIT, MT_SONAME);
19952988Smckusick sin = mtod(m, struct sockaddr_in *);
20052988Smckusick sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
20152988Smckusick sin->sin_family = AF_INET;
20252988Smckusick sin->sin_addr.s_addr = INADDR_ANY;
20352988Smckusick tport = IPPORT_RESERVED - 1;
20452988Smckusick sin->sin_port = htons(tport);
20552988Smckusick while ((error = sobind(so, m)) == EADDRINUSE &&
20652988Smckusick --tport > IPPORT_RESERVED / 2)
20752988Smckusick sin->sin_port = htons(tport);
20852988Smckusick m_freem(m);
20952988Smckusick if (error)
21052988Smckusick goto bad;
21152988Smckusick }
21252988Smckusick
21352988Smckusick /*
21441900Smckusick * Protocols that do not require connections may be optionally left
21541900Smckusick * unconnected for servers that reply from a port other than NFS_PORT.
21641900Smckusick */
21741900Smckusick if (nmp->nm_flag & NFSMNT_NOCONN) {
21841900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED) {
21941900Smckusick error = ENOTCONN;
22040117Smckusick goto bad;
22140117Smckusick }
22241900Smckusick } else {
223*68653Smckusick error = soconnect(so, nmp->nm_nam);
224*68653Smckusick if (error)
22540117Smckusick goto bad;
22641900Smckusick
22741900Smckusick /*
22841900Smckusick * Wait for the connection to complete. Cribbed from the
22952196Smckusick * connect system call but with the wait timing out so
23052196Smckusick * that interruptible mounts don't hang here for a long time.
23141900Smckusick */
23241900Smckusick s = splnet();
23352196Smckusick while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
23452196Smckusick (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
23552196Smckusick "nfscon", 2 * hz);
23652196Smckusick if ((so->so_state & SS_ISCONNECTING) &&
23752196Smckusick so->so_error == 0 && rep &&
23852196Smckusick (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
23952196Smckusick so->so_state &= ~SS_ISCONNECTING;
24052196Smckusick splx(s);
24152196Smckusick goto bad;
24252196Smckusick }
24352196Smckusick }
24441900Smckusick if (so->so_error) {
24541900Smckusick error = so->so_error;
24652196Smckusick so->so_error = 0;
24752196Smckusick splx(s);
24841900Smckusick goto bad;
24941900Smckusick }
25052196Smckusick splx(s);
25140117Smckusick }
25252196Smckusick if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
25352196Smckusick so->so_rcv.sb_timeo = (5 * hz);
25452196Smckusick so->so_snd.sb_timeo = (5 * hz);
25552196Smckusick } else {
25652196Smckusick so->so_rcv.sb_timeo = 0;
25752196Smckusick so->so_snd.sb_timeo = 0;
25852196Smckusick }
25941900Smckusick if (nmp->nm_sotype == SOCK_DGRAM) {
26052196Smckusick sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
26152196Smckusick rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
26252196Smckusick } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
26352196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
26452196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
26541900Smckusick } else {
26652196Smckusick if (nmp->nm_sotype != SOCK_STREAM)
26752196Smckusick panic("nfscon sotype");
26841900Smckusick if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
26941900Smckusick MGET(m, M_WAIT, MT_SOOPTS);
27041900Smckusick *mtod(m, int *) = 1;
27141900Smckusick m->m_len = sizeof(int);
27241900Smckusick sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
27341900Smckusick }
27452196Smckusick if (so->so_proto->pr_protocol == IPPROTO_TCP) {
27541900Smckusick MGET(m, M_WAIT, MT_SOOPTS);
27641900Smckusick *mtod(m, int *) = 1;
27741900Smckusick m->m_len = sizeof(int);
27841900Smckusick sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
27941900Smckusick }
28052196Smckusick sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
28152196Smckusick * 2;
28252196Smckusick rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
28352196Smckusick * 2;
28441900Smckusick }
285*68653Smckusick error = soreserve(so, sndreserve, rcvreserve);
286*68653Smckusick if (error)
28752196Smckusick goto bad;
28841900Smckusick so->so_rcv.sb_flags |= SB_NOINTR;
28941900Smckusick so->so_snd.sb_flags |= SB_NOINTR;
29040117Smckusick
29141900Smckusick /* Initialize other non-zero congestion variables */
29252196Smckusick nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
29352196Smckusick nmp->nm_srtt[4] = (NFS_TIMEO << 3);
29452196Smckusick nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
29552196Smckusick nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
29652196Smckusick nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
29741900Smckusick nmp->nm_sent = 0;
29852196Smckusick nmp->nm_timeouts = 0;
29941900Smckusick return (0);
30040117Smckusick
30141900Smckusick bad:
30241900Smckusick nfs_disconnect(nmp);
30341900Smckusick return (error);
30441900Smckusick }
30540117Smckusick
30641900Smckusick /*
30741900Smckusick * Reconnect routine:
30841900Smckusick * Called when a connection is broken on a reliable protocol.
30941900Smckusick * - clean up the old socket
31041900Smckusick * - nfs_connect() again
31141900Smckusick * - set R_MUSTRESEND for all outstanding requests on mount point
31241900Smckusick * If this fails the mount point is DEAD!
31352196Smckusick * nb: Must be called with the nfs_sndlock() set on the mount point.
31441900Smckusick */
315*68653Smckusick int
nfs_reconnect(rep)31652196Smckusick nfs_reconnect(rep)
31741900Smckusick register struct nfsreq *rep;
31841900Smckusick {
31941900Smckusick register struct nfsreq *rp;
32052196Smckusick register struct nfsmount *nmp = rep->r_nmp;
32141900Smckusick int error;
32240117Smckusick
32352196Smckusick nfs_disconnect(nmp);
324*68653Smckusick while ((error = nfs_connect(nmp, rep))) {
32552196Smckusick if (error == EINTR || error == ERESTART)
32641900Smckusick return (EINTR);
32743351Smckusick (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
32840117Smckusick }
32941900Smckusick
33041900Smckusick /*
33141900Smckusick * Loop through outstanding request list and fix up all requests
33241900Smckusick * on old socket.
33341900Smckusick */
33467708Smckusick for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
33541900Smckusick if (rp->r_nmp == nmp)
33641900Smckusick rp->r_flags |= R_MUSTRESEND;
33740117Smckusick }
33840117Smckusick return (0);
33940117Smckusick }
34040117Smckusick
34140117Smckusick /*
34240117Smckusick * NFS disconnect. Clean up and unlink.
34340117Smckusick */
34441900Smckusick void
nfs_disconnect(nmp)34540117Smckusick nfs_disconnect(nmp)
34640117Smckusick register struct nfsmount *nmp;
34740117Smckusick {
34841900Smckusick register struct socket *so;
34940117Smckusick
35041900Smckusick if (nmp->nm_so) {
35141900Smckusick so = nmp->nm_so;
35241900Smckusick nmp->nm_so = (struct socket *)0;
35341900Smckusick soshutdown(so, 2);
35441900Smckusick soclose(so);
35540117Smckusick }
35640117Smckusick }
35740117Smckusick
35840117Smckusick /*
35941900Smckusick * This is the nfs send routine. For connection based socket types, it
36052196Smckusick * must be called with an nfs_sndlock() on the socket.
36141900Smckusick * "rep == NULL" indicates that it has been called from a server.
36252196Smckusick * For the client side:
36352196Smckusick * - return EINTR if the RPC is terminated, 0 otherwise
36452196Smckusick * - set R_MUSTRESEND if the send fails for any reason
36552196Smckusick * - do any cleanup required by recoverable socket errors (???)
36652196Smckusick * For the server side:
36752196Smckusick * - return EINTR or ERESTART if interrupted by a signal
36852196Smckusick * - return EPIPE if a connection is lost for connection based sockets (TCP...)
36952196Smckusick * - do any cleanup required by recoverable socket errors (???)
37040117Smckusick */
371*68653Smckusick int
nfs_send(so,nam,top,rep)37241900Smckusick nfs_send(so, nam, top, rep)
37338414Smckusick register struct socket *so;
37438414Smckusick struct mbuf *nam;
37541900Smckusick register struct mbuf *top;
37641900Smckusick struct nfsreq *rep;
37738414Smckusick {
37841900Smckusick struct mbuf *sendnam;
37952196Smckusick int error, soflags, flags;
38038414Smckusick
38141900Smckusick if (rep) {
38241900Smckusick if (rep->r_flags & R_SOFTTERM) {
38340117Smckusick m_freem(top);
38441900Smckusick return (EINTR);
38540117Smckusick }
38652196Smckusick if ((so = rep->r_nmp->nm_so) == NULL) {
38752196Smckusick rep->r_flags |= R_MUSTRESEND;
38852196Smckusick m_freem(top);
38952196Smckusick return (0);
39052196Smckusick }
39141900Smckusick rep->r_flags &= ~R_MUSTRESEND;
39241900Smckusick soflags = rep->r_nmp->nm_soflags;
39341900Smckusick } else
39441900Smckusick soflags = so->so_proto->pr_flags;
39541900Smckusick if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
39641900Smckusick sendnam = (struct mbuf *)0;
39741900Smckusick else
39841900Smckusick sendnam = nam;
39952196Smckusick if (so->so_type == SOCK_SEQPACKET)
40052196Smckusick flags = MSG_EOR;
40152196Smckusick else
40252196Smckusick flags = 0;
40341900Smckusick
40441900Smckusick error = sosend(so, sendnam, (struct uio *)0, top,
40552196Smckusick (struct mbuf *)0, flags);
40652196Smckusick if (error) {
40752196Smckusick if (rep) {
40852934Smckusick log(LOG_INFO, "nfs send error %d for server %s\n",error,
40952934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
41052196Smckusick /*
41152196Smckusick * Deal with errors for the client side.
41252196Smckusick */
41352196Smckusick if (rep->r_flags & R_SOFTTERM)
41452196Smckusick error = EINTR;
41552196Smckusick else
41652196Smckusick rep->r_flags |= R_MUSTRESEND;
41752934Smckusick } else
41852934Smckusick log(LOG_INFO, "nfsd send error %d\n", error);
41952196Smckusick
42052196Smckusick /*
42152196Smckusick * Handle any recoverable (soft) socket errors here. (???)
42252196Smckusick */
42352196Smckusick if (error != EINTR && error != ERESTART &&
42452196Smckusick error != EWOULDBLOCK && error != EPIPE)
42541900Smckusick error = 0;
42638414Smckusick }
42738414Smckusick return (error);
42838414Smckusick }
42938414Smckusick
43038414Smckusick /*
43141900Smckusick * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
43241900Smckusick * done by soreceive(), but for SOCK_STREAM we must deal with the Record
43341900Smckusick * Mark and consolidate the data into a new mbuf list.
43441900Smckusick * nb: Sometimes TCP passes the data up to soreceive() in long lists of
43541900Smckusick * small mbufs.
43641900Smckusick * For SOCK_STREAM we must be very careful to read an entire record once
43741900Smckusick * we have read any of it, even if the system call has been interrupted.
43838414Smckusick */
439*68653Smckusick int
nfs_receive(rep,aname,mp)44052196Smckusick nfs_receive(rep, aname, mp)
44152196Smckusick register struct nfsreq *rep;
44238414Smckusick struct mbuf **aname;
44338414Smckusick struct mbuf **mp;
44438414Smckusick {
44552196Smckusick register struct socket *so;
44641900Smckusick struct uio auio;
44741900Smckusick struct iovec aio;
44838414Smckusick register struct mbuf *m;
44952196Smckusick struct mbuf *control;
45041900Smckusick u_long len;
45141900Smckusick struct mbuf **getnam;
45252196Smckusick int error, sotype, rcvflg;
45352932Smckusick struct proc *p = curproc; /* XXX */
45438414Smckusick
45541900Smckusick /*
45641900Smckusick * Set up arguments for soreceive()
45741900Smckusick */
45841900Smckusick *mp = (struct mbuf *)0;
45941900Smckusick *aname = (struct mbuf *)0;
46052196Smckusick sotype = rep->r_nmp->nm_sotype;
46138414Smckusick
46241900Smckusick /*
46341900Smckusick * For reliable protocols, lock against other senders/receivers
46441900Smckusick * in case a reconnect is necessary.
46541900Smckusick * For SOCK_STREAM, first get the Record Mark to find out how much
46641900Smckusick * more there is to get.
46741900Smckusick * We must lock the socket against other receivers
46841900Smckusick * until we have an entire rpc request/reply.
46941900Smckusick */
47052196Smckusick if (sotype != SOCK_DGRAM) {
471*68653Smckusick error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
472*68653Smckusick if (error)
47352196Smckusick return (error);
47441900Smckusick tryagain:
47541900Smckusick /*
47641900Smckusick * Check for fatal errors and resending request.
47741900Smckusick */
47852196Smckusick /*
47952196Smckusick * Ugh: If a reconnect attempt just happened, nm_so
48052196Smckusick * would have changed. NULL indicates a failed
48152196Smckusick * attempt that has essentially shut down this
48252196Smckusick * mount point.
48352196Smckusick */
48452196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
48552196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag);
48652196Smckusick return (EINTR);
48752196Smckusick }
488*68653Smckusick so = rep->r_nmp->nm_so;
489*68653Smckusick if (!so) {
490*68653Smckusick error = nfs_reconnect(rep);
491*68653Smckusick if (error) {
49252196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag);
49352196Smckusick return (error);
49440117Smckusick }
49552196Smckusick goto tryagain;
49641900Smckusick }
49752196Smckusick while (rep->r_flags & R_MUSTRESEND) {
49852196Smckusick m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
49952196Smckusick nfsstats.rpcretries++;
500*68653Smckusick error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
501*68653Smckusick if (error) {
50252196Smckusick if (error == EINTR || error == ERESTART ||
50352196Smckusick (error = nfs_reconnect(rep))) {
50452196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag);
50552196Smckusick return (error);
50652196Smckusick }
50752196Smckusick goto tryagain;
50852196Smckusick }
50952196Smckusick }
51052196Smckusick nfs_sndunlock(&rep->r_nmp->nm_flag);
51152196Smckusick if (sotype == SOCK_STREAM) {
51241900Smckusick aio.iov_base = (caddr_t) &len;
51341900Smckusick aio.iov_len = sizeof(u_long);
51441900Smckusick auio.uio_iov = &aio;
51541900Smckusick auio.uio_iovcnt = 1;
51641900Smckusick auio.uio_segflg = UIO_SYSSPACE;
51741900Smckusick auio.uio_rw = UIO_READ;
51841900Smckusick auio.uio_offset = 0;
51941900Smckusick auio.uio_resid = sizeof(u_long);
52052932Smckusick auio.uio_procp = p;
52141900Smckusick do {
52252196Smckusick rcvflg = MSG_WAITALL;
52352196Smckusick error = soreceive(so, (struct mbuf **)0, &auio,
52441900Smckusick (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
52552196Smckusick if (error == EWOULDBLOCK && rep) {
52641900Smckusick if (rep->r_flags & R_SOFTTERM)
52741900Smckusick return (EINTR);
52852196Smckusick }
52941900Smckusick } while (error == EWOULDBLOCK);
53047737Skarels if (!error && auio.uio_resid > 0) {
53152934Smckusick log(LOG_INFO,
53252934Smckusick "short receive (%d/%d) from nfs server %s\n",
53352934Smckusick sizeof(u_long) - auio.uio_resid,
53452934Smckusick sizeof(u_long),
53547737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
53647737Skarels error = EPIPE;
53747737Skarels }
53840761Skarels if (error)
53941900Smckusick goto errout;
54041900Smckusick len = ntohl(len) & ~0x80000000;
54141900Smckusick /*
54241900Smckusick * This is SERIOUS! We are out of sync with the sender
54341900Smckusick * and forcing a disconnect/reconnect is all I can do.
54441900Smckusick */
54541900Smckusick if (len > NFS_MAXPACKET) {
54652934Smckusick log(LOG_ERR, "%s (%d) from nfs server %s\n",
54752934Smckusick "impossible packet length",
54852934Smckusick len,
54952934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
55047737Skarels error = EFBIG;
55147737Skarels goto errout;
55241900Smckusick }
55341900Smckusick auio.uio_resid = len;
55441900Smckusick do {
55547737Skarels rcvflg = MSG_WAITALL;
55641900Smckusick error = soreceive(so, (struct mbuf **)0,
55741900Smckusick &auio, mp, (struct mbuf **)0, &rcvflg);
55841900Smckusick } while (error == EWOULDBLOCK || error == EINTR ||
55941900Smckusick error == ERESTART);
56047737Skarels if (!error && auio.uio_resid > 0) {
56152934Smckusick log(LOG_INFO,
56252934Smckusick "short receive (%d/%d) from nfs server %s\n",
56352934Smckusick len - auio.uio_resid, len,
56452934Smckusick rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
56547737Skarels error = EPIPE;
56647737Skarels }
56740117Smckusick } else {
56852196Smckusick /*
56952196Smckusick * NB: Since uio_resid is big, MSG_WAITALL is ignored
57052196Smckusick * and soreceive() will return when it has either a
57152196Smckusick * control msg or a data msg.
57252196Smckusick * We have no use for control msg., but must grab them
57352196Smckusick * and then throw them away so we know what is going
57452196Smckusick * on.
57552196Smckusick */
57652196Smckusick auio.uio_resid = len = 100000000; /* Anything Big */
57752932Smckusick auio.uio_procp = p;
57841900Smckusick do {
57947737Skarels rcvflg = 0;
58041900Smckusick error = soreceive(so, (struct mbuf **)0,
58152196Smckusick &auio, mp, &control, &rcvflg);
58252196Smckusick if (control)
58352196Smckusick m_freem(control);
58441900Smckusick if (error == EWOULDBLOCK && rep) {
58541900Smckusick if (rep->r_flags & R_SOFTTERM)
58641900Smckusick return (EINTR);
58741900Smckusick }
58852196Smckusick } while (error == EWOULDBLOCK ||
58952196Smckusick (!error && *mp == NULL && control));
59052196Smckusick if ((rcvflg & MSG_EOR) == 0)
59152196Smckusick printf("Egad!!\n");
59241900Smckusick if (!error && *mp == NULL)
59341900Smckusick error = EPIPE;
59441900Smckusick len -= auio.uio_resid;
59540117Smckusick }
59641900Smckusick errout:
59752196Smckusick if (error && error != EINTR && error != ERESTART) {
59841900Smckusick m_freem(*mp);
59941900Smckusick *mp = (struct mbuf *)0;
60052934Smckusick if (error != EPIPE)
60147737Skarels log(LOG_INFO,
60247737Skarels "receive error %d from nfs server %s\n",
60347737Skarels error,
60447737Skarels rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
60552196Smckusick error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
60641900Smckusick if (!error)
60752196Smckusick error = nfs_reconnect(rep);
60852196Smckusick if (!error)
60941900Smckusick goto tryagain;
61040117Smckusick }
61141900Smckusick } else {
61252196Smckusick if ((so = rep->r_nmp->nm_so) == NULL)
61352196Smckusick return (EACCES);
61441900Smckusick if (so->so_state & SS_ISCONNECTED)
61541900Smckusick getnam = (struct mbuf **)0;
61641900Smckusick else
61741900Smckusick getnam = aname;
61841900Smckusick auio.uio_resid = len = 1000000;
61952932Smckusick auio.uio_procp = p;
62041900Smckusick do {
62147737Skarels rcvflg = 0;
62241900Smckusick error = soreceive(so, getnam, &auio, mp,
62341900Smckusick (struct mbuf **)0, &rcvflg);
62452196Smckusick if (error == EWOULDBLOCK &&
62541900Smckusick (rep->r_flags & R_SOFTTERM))
62641900Smckusick return (EINTR);
62741900Smckusick } while (error == EWOULDBLOCK);
62841900Smckusick len -= auio.uio_resid;
62941900Smckusick }
63041900Smckusick if (error) {
63141900Smckusick m_freem(*mp);
63241900Smckusick *mp = (struct mbuf *)0;
63341900Smckusick }
63441900Smckusick /*
63552196Smckusick * Search for any mbufs that are not a multiple of 4 bytes long
63652196Smckusick * or with m_data not longword aligned.
63741900Smckusick * These could cause pointer alignment problems, so copy them to
63841900Smckusick * well aligned mbufs.
63941900Smckusick */
64052196Smckusick nfs_realign(*mp, 5 * NFSX_UNSIGNED);
64138414Smckusick return (error);
64238414Smckusick }
64338414Smckusick
64438414Smckusick /*
64541900Smckusick * Implement receipt of reply on a socket.
64638414Smckusick * We must search through the list of received datagrams matching them
64738414Smckusick * with outstanding requests using the xid, until ours is found.
64838414Smckusick */
64941900Smckusick /* ARGSUSED */
650*68653Smckusick int
nfs_reply(myrep)65152196Smckusick nfs_reply(myrep)
65239344Smckusick struct nfsreq *myrep;
65338414Smckusick {
65438414Smckusick register struct nfsreq *rep;
65552196Smckusick register struct nfsmount *nmp = myrep->r_nmp;
65652196Smckusick register long t1;
65752196Smckusick struct mbuf *mrep, *nam, *md;
65852196Smckusick u_long rxid, *tl;
65952196Smckusick caddr_t dpos, cp2;
66052196Smckusick int error;
66138414Smckusick
66241900Smckusick /*
66341900Smckusick * Loop around until we get our own reply
66441900Smckusick */
66541900Smckusick for (;;) {
66641900Smckusick /*
66741900Smckusick * Lock against other receivers so that I don't get stuck in
66841900Smckusick * sbwait() after someone else has received my reply for me.
66941900Smckusick * Also necessary for connection based protocols to avoid
67041900Smckusick * race conditions during a reconnect.
67141900Smckusick */
672*68653Smckusick error = nfs_rcvlock(myrep);
673*68653Smckusick if (error)
67452196Smckusick return (error);
67541900Smckusick /* Already received, bye bye */
67641900Smckusick if (myrep->r_mrep != NULL) {
67752196Smckusick nfs_rcvunlock(&nmp->nm_flag);
67841900Smckusick return (0);
67940117Smckusick }
68041900Smckusick /*
68141900Smckusick * Get the next Rpc reply off the socket
68241900Smckusick */
68352196Smckusick error = nfs_receive(myrep, &nam, &mrep);
68452196Smckusick nfs_rcvunlock(&nmp->nm_flag);
68552196Smckusick if (error) {
68638414Smckusick
68741900Smckusick /*
68841900Smckusick * Ignore routing errors on connectionless protocols??
68941900Smckusick */
69041900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
69141900Smckusick nmp->nm_so->so_error = 0;
69259387Smckusick if (myrep->r_flags & R_GETONEREP)
69359387Smckusick return (0);
69441900Smckusick continue;
69541900Smckusick }
69641900Smckusick return (error);
69738414Smckusick }
69852196Smckusick if (nam)
69952196Smckusick m_freem(nam);
70041900Smckusick
70141900Smckusick /*
70241900Smckusick * Get the xid and check that it is an rpc reply
70341900Smckusick */
70452196Smckusick md = mrep;
70552196Smckusick dpos = mtod(md, caddr_t);
70652196Smckusick nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
70752196Smckusick rxid = *tl++;
70852196Smckusick if (*tl != rpc_reply) {
70952196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) {
71052196Smckusick if (nqnfs_callback(nmp, mrep, md, dpos))
71152196Smckusick nfsstats.rpcinvalid++;
71252196Smckusick } else {
71352196Smckusick nfsstats.rpcinvalid++;
71452196Smckusick m_freem(mrep);
71552196Smckusick }
71652196Smckusick nfsmout:
71759387Smckusick if (myrep->r_flags & R_GETONEREP)
71859387Smckusick return (0);
71941900Smckusick continue;
72038414Smckusick }
72152196Smckusick
72241900Smckusick /*
72341900Smckusick * Loop through the request list to match up the reply
72441900Smckusick * Iff no match, just drop the datagram
72541900Smckusick */
72667708Smckusick for (rep = nfs_reqq.tqh_first; rep != 0;
72767708Smckusick rep = rep->r_chain.tqe_next) {
72845281Smckusick if (rep->r_mrep == NULL && rxid == rep->r_xid) {
72941900Smckusick /* Found it.. */
73052196Smckusick rep->r_mrep = mrep;
73152196Smckusick rep->r_md = md;
73252196Smckusick rep->r_dpos = dpos;
73352196Smckusick if (nfsrtton) {
73452196Smckusick struct rttl *rt;
73552196Smckusick
73652196Smckusick rt = &nfsrtt.rttl[nfsrtt.pos];
73752196Smckusick rt->proc = rep->r_procnum;
73852196Smckusick rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
73952196Smckusick rt->sent = nmp->nm_sent;
74052196Smckusick rt->cwnd = nmp->nm_cwnd;
74152196Smckusick rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
74252196Smckusick rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
74352196Smckusick rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
74452196Smckusick rt->tstamp = time;
74552196Smckusick if (rep->r_flags & R_TIMING)
74652196Smckusick rt->rtt = rep->r_rtt;
74752196Smckusick else
74852196Smckusick rt->rtt = 1000000;
74952196Smckusick nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
75052196Smckusick }
75141900Smckusick /*
75252196Smckusick * Update congestion window.
75352196Smckusick * Do the additive increase of
75452196Smckusick * one rpc/rtt.
75541900Smckusick */
75652196Smckusick if (nmp->nm_cwnd <= nmp->nm_sent) {
75752196Smckusick nmp->nm_cwnd +=
75852196Smckusick (NFS_CWNDSCALE * NFS_CWNDSCALE +
75952196Smckusick (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
76052196Smckusick if (nmp->nm_cwnd > NFS_MAXCWND)
76152196Smckusick nmp->nm_cwnd = NFS_MAXCWND;
76252196Smckusick }
76356671Smckusick rep->r_flags &= ~R_SENT;
76456671Smckusick nmp->nm_sent -= NFS_CWNDSCALE;
76552196Smckusick /*
76652196Smckusick * Update rtt using a gain of 0.125 on the mean
76752196Smckusick * and a gain of 0.25 on the deviation.
76852196Smckusick */
76941900Smckusick if (rep->r_flags & R_TIMING) {
77052196Smckusick /*
77152196Smckusick * Since the timer resolution of
77252196Smckusick * NFS_HZ is so course, it can often
77352196Smckusick * result in r_rtt == 0. Since
77452196Smckusick * r_rtt == N means that the actual
77552196Smckusick * rtt is between N+dt and N+2-dt ticks,
77652196Smckusick * add 1.
77752196Smckusick */
77852196Smckusick t1 = rep->r_rtt + 1;
77952196Smckusick t1 -= (NFS_SRTT(rep) >> 3);
78052196Smckusick NFS_SRTT(rep) += t1;
78152196Smckusick if (t1 < 0)
78252196Smckusick t1 = -t1;
78352196Smckusick t1 -= (NFS_SDRTT(rep) >> 2);
78452196Smckusick NFS_SDRTT(rep) += t1;
78541900Smckusick }
78652196Smckusick nmp->nm_timeouts = 0;
78740117Smckusick break;
78838414Smckusick }
78938414Smckusick }
79041900Smckusick /*
79141900Smckusick * If not matched to a request, drop it.
79241900Smckusick * If it's mine, get out.
79341900Smckusick */
79467708Smckusick if (rep == 0) {
79541900Smckusick nfsstats.rpcunexpected++;
79652196Smckusick m_freem(mrep);
79753426Smckusick } else if (rep == myrep) {
79853426Smckusick if (rep->r_mrep == NULL)
79953426Smckusick panic("nfsreply nil");
80041900Smckusick return (0);
80153426Smckusick }
80259387Smckusick if (myrep->r_flags & R_GETONEREP)
80359387Smckusick return (0);
80438414Smckusick }
80538414Smckusick }
80638414Smckusick
80738414Smckusick /*
80838414Smckusick * nfs_request - goes something like this
80938414Smckusick * - fill in request struct
81038414Smckusick * - links it into list
81141900Smckusick * - calls nfs_send() for first transmit
81241900Smckusick * - calls nfs_receive() to get reply
81338414Smckusick * - break down rpc header and return with nfs reply pointed to
81438414Smckusick * by mrep or error
81538414Smckusick * nb: always frees up mreq mbuf list
81638414Smckusick */
817*68653Smckusick int
nfs_request(vp,mrest,procnum,procp,cred,mrp,mdp,dposp)81852196Smckusick nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
81938414Smckusick struct vnode *vp;
82052196Smckusick struct mbuf *mrest;
82141900Smckusick int procnum;
82241900Smckusick struct proc *procp;
82352196Smckusick struct ucred *cred;
82438414Smckusick struct mbuf **mrp;
82538414Smckusick struct mbuf **mdp;
82638414Smckusick caddr_t *dposp;
82738414Smckusick {
82838414Smckusick register struct mbuf *m, *mrep;
82938414Smckusick register struct nfsreq *rep;
83048048Smckusick register u_long *tl;
83152196Smckusick register int i;
83241900Smckusick struct nfsmount *nmp;
83352196Smckusick struct mbuf *md, *mheadend;
83465623Sbostic struct nfsnode *np;
835*68653Smckusick char nickv[RPCX_NICKVERF];
83652196Smckusick time_t reqtime, waituntil;
83752196Smckusick caddr_t dpos, cp2;
83852196Smckusick int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
83952196Smckusick int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
840*68653Smckusick int verf_len, verf_type;
84152196Smckusick u_long xid;
84256286Smckusick u_quad_t frev;
843*68653Smckusick char *auth_str, *verf_str;
844*68653Smckusick NFSKERBKEY_T key; /* save session key */
84538414Smckusick
84652196Smckusick nmp = VFSTONFS(vp->v_mount);
84738414Smckusick MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
84841900Smckusick rep->r_nmp = nmp;
84938414Smckusick rep->r_vp = vp;
85041900Smckusick rep->r_procp = procp;
85152196Smckusick rep->r_procnum = procnum;
85252196Smckusick i = 0;
85352196Smckusick m = mrest;
85438414Smckusick while (m) {
85552196Smckusick i += m->m_len;
85638414Smckusick m = m->m_next;
85738414Smckusick }
85852196Smckusick mrest_len = i;
85952196Smckusick
86052196Smckusick /*
86152196Smckusick * Get the RPC header with authorization.
86252196Smckusick */
86352196Smckusick kerbauth:
864*68653Smckusick verf_str = auth_str = (char *)0;
86552196Smckusick if (nmp->nm_flag & NFSMNT_KERB) {
866*68653Smckusick verf_str = nickv;
867*68653Smckusick verf_len = sizeof (nickv);
868*68653Smckusick auth_type = RPCAUTH_KERB4;
869*68653Smckusick bzero((caddr_t)key, sizeof (key));
870*68653Smckusick if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
871*68653Smckusick &auth_len, verf_str, verf_len)) {
872*68653Smckusick error = nfs_getauth(nmp, rep, cred, &auth_str,
873*68653Smckusick &auth_len, verf_str, &verf_len, key);
87452196Smckusick if (error) {
87552196Smckusick free((caddr_t)rep, M_NFSREQ);
87652196Smckusick m_freem(mrest);
87752196Smckusick return (error);
87852196Smckusick }
87945281Smckusick }
88052196Smckusick } else {
88152196Smckusick auth_type = RPCAUTH_UNIX;
88253426Smckusick if (cred->cr_ngroups < 1)
88353426Smckusick panic("nfsreq nogrps");
88452196Smckusick auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
88552196Smckusick nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
88652196Smckusick 5 * NFSX_UNSIGNED;
88745281Smckusick }
888*68653Smckusick m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
889*68653Smckusick auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
89052196Smckusick if (auth_str)
89152196Smckusick free(auth_str, M_TEMP);
89252196Smckusick
89341900Smckusick /*
89452196Smckusick * For stream protocols, insert a Sun RPC Record Mark.
89541900Smckusick */
89652196Smckusick if (nmp->nm_sotype == SOCK_STREAM) {
89752196Smckusick M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
89852196Smckusick *mtod(m, u_long *) = htonl(0x80000000 |
89952196Smckusick (m->m_pkthdr.len - NFSX_UNSIGNED));
90041900Smckusick }
90152196Smckusick rep->r_mreq = m;
90252196Smckusick rep->r_xid = xid;
90352196Smckusick tryagain:
90452196Smckusick if (nmp->nm_flag & NFSMNT_SOFT)
90552196Smckusick rep->r_retry = nmp->nm_retry;
90652196Smckusick else
90752196Smckusick rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
90852196Smckusick rep->r_rtt = rep->r_rexmit = 0;
90952196Smckusick if (proct[procnum] > 0)
91052196Smckusick rep->r_flags = R_TIMING;
91152196Smckusick else
91252196Smckusick rep->r_flags = 0;
91352196Smckusick rep->r_mrep = NULL;
91438414Smckusick
91540117Smckusick /*
91640117Smckusick * Do the client side RPC.
91740117Smckusick */
91840117Smckusick nfsstats.rpcrequests++;
91941900Smckusick /*
92041900Smckusick * Chain request into list of outstanding requests. Be sure
92141900Smckusick * to put it LAST so timer finds oldest requests first.
92241900Smckusick */
92352196Smckusick s = splsoftclock();
92467708Smckusick TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
92552196Smckusick
92652196Smckusick /* Get send time for nqnfs */
92752196Smckusick reqtime = time.tv_sec;
92852196Smckusick
92940117Smckusick /*
93040117Smckusick * If backing off another request or avoiding congestion, don't
93140117Smckusick * send this one now but let timer do it. If not timing a request,
93240117Smckusick * do it now.
93340117Smckusick */
93452196Smckusick if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
93552196Smckusick (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
93652196Smckusick nmp->nm_sent < nmp->nm_cwnd)) {
93740117Smckusick splx(s);
93841900Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED)
93952196Smckusick error = nfs_sndlock(&nmp->nm_flag, rep);
94052196Smckusick if (!error) {
94152196Smckusick m = m_copym(m, 0, M_COPYALL, M_WAIT);
94252196Smckusick error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
94352196Smckusick if (nmp->nm_soflags & PR_CONNREQUIRED)
94452196Smckusick nfs_sndunlock(&nmp->nm_flag);
94552196Smckusick }
94652196Smckusick if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
94752196Smckusick nmp->nm_sent += NFS_CWNDSCALE;
94852196Smckusick rep->r_flags |= R_SENT;
94952196Smckusick }
95052196Smckusick } else {
95141900Smckusick splx(s);
95252196Smckusick rep->r_rtt = -1;
95352196Smckusick }
95438414Smckusick
95538414Smckusick /*
95640117Smckusick * Wait for the reply from our send or the timer's.
95740117Smckusick */
95854610Smckusick if (!error || error == EPIPE)
95952196Smckusick error = nfs_reply(rep);
96038414Smckusick
96140117Smckusick /*
96240117Smckusick * RPC done, unlink the request.
96340117Smckusick */
96452196Smckusick s = splsoftclock();
96567708Smckusick TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
96638414Smckusick splx(s);
96741900Smckusick
96841900Smckusick /*
96956602Smckusick * Decrement the outstanding request count.
97056602Smckusick */
97156671Smckusick if (rep->r_flags & R_SENT) {
97256671Smckusick rep->r_flags &= ~R_SENT; /* paranoia */
97356602Smckusick nmp->nm_sent -= NFS_CWNDSCALE;
97456671Smckusick }
97556602Smckusick
97656602Smckusick /*
97741900Smckusick * If there was a successful reply and a tprintf msg.
97841900Smckusick * tprintf a response.
97941900Smckusick */
98047737Skarels if (!error && (rep->r_flags & R_TPRINTFMSG))
98147737Skarels nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
98247737Skarels "is alive again");
98345281Smckusick mrep = rep->r_mrep;
98452196Smckusick md = rep->r_md;
98552196Smckusick dpos = rep->r_dpos;
98652196Smckusick if (error) {
98752196Smckusick m_freem(rep->r_mreq);
98852196Smckusick free((caddr_t)rep, M_NFSREQ);
98938414Smckusick return (error);
99052196Smckusick }
99138414Smckusick
99238414Smckusick /*
99338414Smckusick * break down the rpc header and check if ok
99438414Smckusick */
995*68653Smckusick nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
99648048Smckusick if (*tl++ == rpc_msgdenied) {
99748048Smckusick if (*tl == rpc_mismatch)
99838414Smckusick error = EOPNOTSUPP;
99952196Smckusick else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1000*68653Smckusick if (!failed_auth) {
100152196Smckusick failed_auth++;
100252196Smckusick mheadend->m_next = (struct mbuf *)0;
100352196Smckusick m_freem(mrep);
100452196Smckusick m_freem(rep->r_mreq);
100552196Smckusick goto kerbauth;
100652196Smckusick } else
100752196Smckusick error = EAUTH;
100852196Smckusick } else
100938414Smckusick error = EACCES;
101038414Smckusick m_freem(mrep);
101152196Smckusick m_freem(rep->r_mreq);
101252196Smckusick free((caddr_t)rep, M_NFSREQ);
101338414Smckusick return (error);
101438414Smckusick }
101552196Smckusick
101638414Smckusick /*
1017*68653Smckusick * Grab any Kerberos verifier, otherwise just throw it away.
101838414Smckusick */
1019*68653Smckusick verf_type = fxdr_unsigned(int, *tl++);
1020*68653Smckusick i = fxdr_unsigned(int, *tl);
1021*68653Smckusick if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1022*68653Smckusick error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1023*68653Smckusick if (error)
1024*68653Smckusick goto nfsmout;
1025*68653Smckusick } else if (i > 0)
1026*68653Smckusick nfsm_adv(nfsm_rndup(i));
102752196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
102838414Smckusick /* 0 == ok */
102948048Smckusick if (*tl == 0) {
103052196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
103148048Smckusick if (*tl != 0) {
103248048Smckusick error = fxdr_unsigned(int, *tl);
1033*68653Smckusick if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1034*68653Smckusick error == NFSERR_TRYLATER) {
1035*68653Smckusick m_freem(mrep);
103652196Smckusick error = 0;
103752196Smckusick waituntil = time.tv_sec + trylater_delay;
103852196Smckusick while (time.tv_sec < waituntil)
103952196Smckusick (void) tsleep((caddr_t)&lbolt,
104052196Smckusick PSOCK, "nqnfstry", 0);
104152196Smckusick trylater_delay *= nfs_backoff[trylater_cnt];
104252196Smckusick if (trylater_cnt < 7)
104352196Smckusick trylater_cnt++;
104452196Smckusick goto tryagain;
104552196Smckusick }
104656286Smckusick
104756286Smckusick /*
104856286Smckusick * If the File Handle was stale, invalidate the
104956286Smckusick * lookup cache, just in case.
105056286Smckusick */
105156286Smckusick if (error == ESTALE)
105256286Smckusick cache_purge(vp);
1053*68653Smckusick if (nmp->nm_flag & NFSMNT_NFSV3) {
1054*68653Smckusick *mrp = mrep;
1055*68653Smckusick *mdp = md;
1056*68653Smckusick *dposp = dpos;
1057*68653Smckusick error |= NFSERR_RETERR;
1058*68653Smckusick } else
1059*68653Smckusick m_freem(mrep);
106052196Smckusick m_freem(rep->r_mreq);
106152196Smckusick free((caddr_t)rep, M_NFSREQ);
106238414Smckusick return (error);
106338414Smckusick }
106452196Smckusick
106552196Smckusick /*
106652196Smckusick * For nqnfs, get any lease in reply
106752196Smckusick */
106852196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) {
106952196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
107052196Smckusick if (*tl) {
107152196Smckusick np = VTONFS(vp);
107252196Smckusick nqlflag = fxdr_unsigned(int, *tl);
107352196Smckusick nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
107452196Smckusick cachable = fxdr_unsigned(int, *tl++);
107552196Smckusick reqtime += fxdr_unsigned(int, *tl++);
107652196Smckusick if (reqtime > time.tv_sec) {
107756286Smckusick fxdr_hyper(tl, &frev);
107856286Smckusick nqnfs_clientlease(nmp, np, nqlflag,
107956286Smckusick cachable, reqtime, frev);
108052196Smckusick }
108152196Smckusick }
108252196Smckusick }
108338414Smckusick *mrp = mrep;
108438414Smckusick *mdp = md;
108538414Smckusick *dposp = dpos;
108652196Smckusick m_freem(rep->r_mreq);
108752196Smckusick FREE((caddr_t)rep, M_NFSREQ);
108838414Smckusick return (0);
108938414Smckusick }
109038414Smckusick m_freem(mrep);
1091*68653Smckusick error = EPROTONOSUPPORT;
1092*68653Smckusick nfsmout:
109352196Smckusick m_freem(rep->r_mreq);
109452196Smckusick free((caddr_t)rep, M_NFSREQ);
109538414Smckusick return (error);
109638414Smckusick }
109738414Smckusick
109838414Smckusick /*
109938414Smckusick * Generate the rpc reply header
110038414Smckusick * siz arg. is used to decide if adding a cluster is worthwhile
110138414Smckusick */
1102*68653Smckusick int
nfs_rephead(siz,nd,slp,err,cache,frev,mrq,mbp,bposp)1103*68653Smckusick nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
110438414Smckusick int siz;
1105*68653Smckusick struct nfsrv_descript *nd;
1106*68653Smckusick struct nfssvc_sock *slp;
110738414Smckusick int err;
110852196Smckusick int cache;
110952196Smckusick u_quad_t *frev;
111038414Smckusick struct mbuf **mrq;
111138414Smckusick struct mbuf **mbp;
111238414Smckusick caddr_t *bposp;
111338414Smckusick {
111448048Smckusick register u_long *tl;
111552196Smckusick register struct mbuf *mreq;
111639494Smckusick caddr_t bpos;
111752196Smckusick struct mbuf *mb, *mb2;
111838414Smckusick
111952196Smckusick MGETHDR(mreq, M_WAIT, MT_DATA);
112038414Smckusick mb = mreq;
112152196Smckusick /*
112252196Smckusick * If this is a big reply, use a cluster else
112352196Smckusick * try and leave leading space for the lower level headers.
112452196Smckusick */
112552196Smckusick siz += RPC_REPLYSIZ;
112652196Smckusick if (siz >= MINCLSIZE) {
112741900Smckusick MCLGET(mreq, M_WAIT);
112852196Smckusick } else
112952196Smckusick mreq->m_data += max_hdr;
113048048Smckusick tl = mtod(mreq, u_long *);
1131*68653Smckusick mreq->m_len = 6 * NFSX_UNSIGNED;
1132*68653Smckusick bpos = ((caddr_t)tl) + mreq->m_len;
1133*68653Smckusick *tl++ = txdr_unsigned(nd->nd_retxid);
113448048Smckusick *tl++ = rpc_reply;
1135*68653Smckusick if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
113648048Smckusick *tl++ = rpc_msgdenied;
1137*68653Smckusick if (err & NFSERR_AUTHERR) {
113852196Smckusick *tl++ = rpc_autherr;
1139*68653Smckusick *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
114052196Smckusick mreq->m_len -= NFSX_UNSIGNED;
114152196Smckusick bpos -= NFSX_UNSIGNED;
114252196Smckusick } else {
114352196Smckusick *tl++ = rpc_mismatch;
1144*68653Smckusick *tl++ = txdr_unsigned(RPC_VER2);
1145*68653Smckusick *tl = txdr_unsigned(RPC_VER2);
114652196Smckusick }
114738414Smckusick } else {
114848048Smckusick *tl++ = rpc_msgaccepted;
1149*68653Smckusick
1150*68653Smckusick /*
1151*68653Smckusick * For Kerberos authentication, we must send the nickname
1152*68653Smckusick * verifier back, otherwise just RPCAUTH_NULL.
1153*68653Smckusick */
1154*68653Smckusick if (nd->nd_flag & ND_KERBFULL) {
1155*68653Smckusick register struct nfsuid *nuidp;
1156*68653Smckusick struct timeval ktvin, ktvout;
1157*68653Smckusick NFSKERBKEYSCHED_T keys; /* stores key schedule */
1158*68653Smckusick
1159*68653Smckusick for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1160*68653Smckusick nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1161*68653Smckusick if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1162*68653Smckusick (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1163*68653Smckusick &nuidp->nu_haddr, nd->nd_nam2)))
1164*68653Smckusick break;
1165*68653Smckusick }
1166*68653Smckusick if (nuidp) {
1167*68653Smckusick ktvin.tv_sec =
1168*68653Smckusick txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1169*68653Smckusick ktvin.tv_usec =
1170*68653Smckusick txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1171*68653Smckusick
1172*68653Smckusick /*
1173*68653Smckusick * Encrypt the timestamp in ecb mode using the
1174*68653Smckusick * session key.
1175*68653Smckusick */
1176*68653Smckusick #ifdef NFSKERB
1177*68653Smckusick XXX
1178*68653Smckusick #endif
1179*68653Smckusick
1180*68653Smckusick *tl++ = rpc_auth_kerb;
1181*68653Smckusick *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1182*68653Smckusick *tl = ktvout.tv_sec;
1183*68653Smckusick nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1184*68653Smckusick *tl++ = ktvout.tv_usec;
1185*68653Smckusick *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1186*68653Smckusick } else {
1187*68653Smckusick *tl++ = 0;
1188*68653Smckusick *tl++ = 0;
1189*68653Smckusick }
1190*68653Smckusick } else {
1191*68653Smckusick *tl++ = 0;
1192*68653Smckusick *tl++ = 0;
1193*68653Smckusick }
119438414Smckusick switch (err) {
119538414Smckusick case EPROGUNAVAIL:
119648048Smckusick *tl = txdr_unsigned(RPC_PROGUNAVAIL);
119738414Smckusick break;
119838414Smckusick case EPROGMISMATCH:
119948048Smckusick *tl = txdr_unsigned(RPC_PROGMISMATCH);
1200*68653Smckusick nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1201*68653Smckusick if (nd->nd_flag & ND_NQNFS) {
1202*68653Smckusick *tl++ = txdr_unsigned(3);
1203*68653Smckusick *tl = txdr_unsigned(3);
1204*68653Smckusick } else {
1205*68653Smckusick *tl++ = txdr_unsigned(2);
1206*68653Smckusick *tl = txdr_unsigned(3);
1207*68653Smckusick }
120838414Smckusick break;
120938414Smckusick case EPROCUNAVAIL:
121048048Smckusick *tl = txdr_unsigned(RPC_PROCUNAVAIL);
121138414Smckusick break;
1212*68653Smckusick case EBADRPC:
1213*68653Smckusick *tl = txdr_unsigned(RPC_GARBAGE);
1214*68653Smckusick break;
121538414Smckusick default:
121648048Smckusick *tl = 0;
1217*68653Smckusick if (err != NFSERR_RETVOID) {
121848048Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED);
121952196Smckusick if (err)
1220*68653Smckusick *tl = txdr_unsigned(nfsrv_errmap(nd, err));
122152196Smckusick else
1222*68653Smckusick *tl = 0;
122338414Smckusick }
122438414Smckusick break;
122538414Smckusick };
122638414Smckusick }
122752196Smckusick
122852196Smckusick /*
122952196Smckusick * For nqnfs, piggyback lease as requested.
123052196Smckusick */
1231*68653Smckusick if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1232*68653Smckusick if (nd->nd_flag & ND_LEASE) {
1233*68653Smckusick nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1234*68653Smckusick *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
123552196Smckusick *tl++ = txdr_unsigned(cache);
123652196Smckusick *tl++ = txdr_unsigned(nd->nd_duration);
123752196Smckusick txdr_hyper(frev, tl);
123852196Smckusick } else {
123952196Smckusick nfsm_build(tl, u_long *, NFSX_UNSIGNED);
124052196Smckusick *tl = 0;
124152196Smckusick }
124252196Smckusick }
124338414Smckusick *mrq = mreq;
124438414Smckusick *mbp = mb;
124538414Smckusick *bposp = bpos;
1246*68653Smckusick if (err != 0 && err != NFSERR_RETVOID)
124738414Smckusick nfsstats.srvrpc_errs++;
124838414Smckusick return (0);
124938414Smckusick }
125038414Smckusick
125138414Smckusick /*
125238414Smckusick * Nfs timer routine
125338414Smckusick * Scan the nfsreq list and retranmit any requests that have timed out
125438414Smckusick * To avoid retransmission attempts on STREAM sockets (in the future) make
125540117Smckusick * sure to set the r_retry field to 0 (implies nm_retry == 0).
125638414Smckusick */
125755079Smckusick void
nfs_timer(arg)125855079Smckusick nfs_timer(arg)
1259*68653Smckusick void *arg; /* never used */
126038414Smckusick {
126138414Smckusick register struct nfsreq *rep;
126238414Smckusick register struct mbuf *m;
126338414Smckusick register struct socket *so;
126441900Smckusick register struct nfsmount *nmp;
126552196Smckusick register int timeo;
1266*68653Smckusick register struct nfssvc_sock *slp;
126752196Smckusick static long lasttime = 0;
126840117Smckusick int s, error;
1269*68653Smckusick u_quad_t cur_usec;
127038414Smckusick
127138414Smckusick s = splnet();
127267708Smckusick for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
127341900Smckusick nmp = rep->r_nmp;
127452196Smckusick if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
127541900Smckusick continue;
127652196Smckusick if (nfs_sigintr(nmp, rep, rep->r_procp)) {
127741900Smckusick rep->r_flags |= R_SOFTTERM;
127841900Smckusick continue;
127941900Smckusick }
128052196Smckusick if (rep->r_rtt >= 0) {
128152196Smckusick rep->r_rtt++;
128252196Smckusick if (nmp->nm_flag & NFSMNT_DUMBTIMR)
128352196Smckusick timeo = nmp->nm_timeo;
128452196Smckusick else
128552196Smckusick timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
128652196Smckusick if (nmp->nm_timeouts > 0)
128752196Smckusick timeo *= nfs_backoff[nmp->nm_timeouts - 1];
128852196Smckusick if (rep->r_rtt <= timeo)
128952196Smckusick continue;
129052196Smckusick if (nmp->nm_timeouts < 8)
129152196Smckusick nmp->nm_timeouts++;
129240117Smckusick }
129341900Smckusick /*
129441900Smckusick * Check for server not responding
129541900Smckusick */
129641900Smckusick if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
129752196Smckusick rep->r_rexmit > nmp->nm_deadthresh) {
129847737Skarels nfs_msg(rep->r_procp,
129947737Skarels nmp->nm_mountp->mnt_stat.f_mntfromname,
130047737Skarels "not responding");
130141900Smckusick rep->r_flags |= R_TPRINTFMSG;
130241900Smckusick }
130343351Smckusick if (rep->r_rexmit >= rep->r_retry) { /* too many */
130441900Smckusick nfsstats.rpctimeouts++;
130541900Smckusick rep->r_flags |= R_SOFTTERM;
130641900Smckusick continue;
130741900Smckusick }
130852196Smckusick if (nmp->nm_sotype != SOCK_DGRAM) {
130952196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT)
131052196Smckusick rep->r_rexmit = NFS_MAXREXMIT;
131143351Smckusick continue;
131252196Smckusick }
131352196Smckusick if ((so = nmp->nm_so) == NULL)
131452196Smckusick continue;
131541900Smckusick
131641900Smckusick /*
131741900Smckusick * If there is enough space and the window allows..
131841900Smckusick * Resend it
131952196Smckusick * Set r_rtt to -1 in case we fail to send it now.
132041900Smckusick */
132152196Smckusick rep->r_rtt = -1;
132241900Smckusick if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
132352196Smckusick ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
132452196Smckusick (rep->r_flags & R_SENT) ||
132552196Smckusick nmp->nm_sent < nmp->nm_cwnd) &&
132652196Smckusick (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
132741900Smckusick if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
132841900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
132952196Smckusick (struct mbuf *)0, (struct mbuf *)0);
133041900Smckusick else
133141900Smckusick error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
133252196Smckusick nmp->nm_nam, (struct mbuf *)0);
133341900Smckusick if (error) {
133441900Smckusick if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
133541900Smckusick so->so_error = 0;
133641900Smckusick } else {
133741900Smckusick /*
133852196Smckusick * Iff first send, start timing
133952196Smckusick * else turn timing off, backoff timer
134052196Smckusick * and divide congestion window by 2.
134141900Smckusick */
134252196Smckusick if (rep->r_flags & R_SENT) {
134352196Smckusick rep->r_flags &= ~R_TIMING;
134452196Smckusick if (++rep->r_rexmit > NFS_MAXREXMIT)
134552196Smckusick rep->r_rexmit = NFS_MAXREXMIT;
134652196Smckusick nmp->nm_cwnd >>= 1;
134752196Smckusick if (nmp->nm_cwnd < NFS_CWNDSCALE)
134852196Smckusick nmp->nm_cwnd = NFS_CWNDSCALE;
134952196Smckusick nfsstats.rpcretries++;
135052196Smckusick } else {
135152196Smckusick rep->r_flags |= R_SENT;
135252196Smckusick nmp->nm_sent += NFS_CWNDSCALE;
135352196Smckusick }
135452196Smckusick rep->r_rtt = 0;
135541900Smckusick }
135641900Smckusick }
135740117Smckusick }
135852196Smckusick
135952196Smckusick /*
136052196Smckusick * Call the nqnfs server timer once a second to handle leases.
136152196Smckusick */
136252196Smckusick if (lasttime != time.tv_sec) {
136352196Smckusick lasttime = time.tv_sec;
136452196Smckusick nqnfs_serverd();
136552196Smckusick }
1366*68653Smckusick
1367*68653Smckusick /*
1368*68653Smckusick * Scan the write gathering queues for writes that need to be
1369*68653Smckusick * completed now.
1370*68653Smckusick */
1371*68653Smckusick cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
1372*68653Smckusick for (slp = nfssvc_sockhead.tqh_first; slp != 0;
1373*68653Smckusick slp = slp->ns_chain.tqe_next) {
1374*68653Smckusick if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
1375*68653Smckusick nfsrv_wakenfsd(slp);
1376*68653Smckusick }
137740117Smckusick splx(s);
1378*68653Smckusick timeout(nfs_timer, (void *)0, nfs_ticks);
137940117Smckusick }
138040117Smckusick
138140117Smckusick /*
138252196Smckusick * Test for a termination condition pending on the process.
138352196Smckusick * This is used for NFSMNT_INT mounts.
138440117Smckusick */
1385*68653Smckusick int
nfs_sigintr(nmp,rep,p)138652196Smckusick nfs_sigintr(nmp, rep, p)
138752196Smckusick struct nfsmount *nmp;
138852196Smckusick struct nfsreq *rep;
138952196Smckusick register struct proc *p;
139052196Smckusick {
139140117Smckusick
139252196Smckusick if (rep && (rep->r_flags & R_SOFTTERM))
139352196Smckusick return (EINTR);
139452196Smckusick if (!(nmp->nm_flag & NFSMNT_INT))
139552196Smckusick return (0);
139664595Sbostic if (p && p->p_siglist &&
139764595Sbostic (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
139852196Smckusick NFSINT_SIGMASK))
139952196Smckusick return (EINTR);
140052196Smckusick return (0);
140152196Smckusick }
140252196Smckusick
140340117Smckusick /*
140452196Smckusick * Lock a socket against others.
140552196Smckusick * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
140652196Smckusick * and also to avoid race conditions between the processes with nfs requests
140752196Smckusick * in progress when a reconnect is necessary.
140840117Smckusick */
1409*68653Smckusick int
nfs_sndlock(flagp,rep)141052196Smckusick nfs_sndlock(flagp, rep)
141152196Smckusick register int *flagp;
141252196Smckusick struct nfsreq *rep;
141352196Smckusick {
141452196Smckusick struct proc *p;
141557786Smckusick int slpflag = 0, slptimeo = 0;
141640117Smckusick
141757786Smckusick if (rep) {
141852196Smckusick p = rep->r_procp;
141957786Smckusick if (rep->r_nmp->nm_flag & NFSMNT_INT)
142057786Smckusick slpflag = PCATCH;
142157786Smckusick } else
142252196Smckusick p = (struct proc *)0;
142352196Smckusick while (*flagp & NFSMNT_SNDLOCK) {
142452196Smckusick if (nfs_sigintr(rep->r_nmp, rep, p))
142552196Smckusick return (EINTR);
142652196Smckusick *flagp |= NFSMNT_WANTSND;
142757786Smckusick (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
142857786Smckusick slptimeo);
142957786Smckusick if (slpflag == PCATCH) {
143057786Smckusick slpflag = 0;
143157786Smckusick slptimeo = 2 * hz;
143257786Smckusick }
143352196Smckusick }
143452196Smckusick *flagp |= NFSMNT_SNDLOCK;
143552196Smckusick return (0);
143652196Smckusick }
143752196Smckusick
143852196Smckusick /*
143952196Smckusick * Unlock the stream socket for others.
144052196Smckusick */
144152196Smckusick void
nfs_sndunlock(flagp)144252196Smckusick nfs_sndunlock(flagp)
144352196Smckusick register int *flagp;
144440117Smckusick {
144540117Smckusick
144652196Smckusick if ((*flagp & NFSMNT_SNDLOCK) == 0)
144752196Smckusick panic("nfs sndunlock");
144852196Smckusick *flagp &= ~NFSMNT_SNDLOCK;
144952196Smckusick if (*flagp & NFSMNT_WANTSND) {
145052196Smckusick *flagp &= ~NFSMNT_WANTSND;
145152196Smckusick wakeup((caddr_t)flagp);
145240117Smckusick }
145352196Smckusick }
145452196Smckusick
1455*68653Smckusick int
nfs_rcvlock(rep)145652196Smckusick nfs_rcvlock(rep)
145752196Smckusick register struct nfsreq *rep;
145852196Smckusick {
145952196Smckusick register int *flagp = &rep->r_nmp->nm_flag;
146057786Smckusick int slpflag, slptimeo = 0;
146152196Smckusick
146257786Smckusick if (*flagp & NFSMNT_INT)
146357786Smckusick slpflag = PCATCH;
146457786Smckusick else
146557786Smckusick slpflag = 0;
146652196Smckusick while (*flagp & NFSMNT_RCVLOCK) {
146752196Smckusick if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
146852196Smckusick return (EINTR);
146952196Smckusick *flagp |= NFSMNT_WANTRCV;
147057786Smckusick (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
147157786Smckusick slptimeo);
147257786Smckusick if (slpflag == PCATCH) {
147357786Smckusick slpflag = 0;
147457786Smckusick slptimeo = 2 * hz;
147557786Smckusick }
147640117Smckusick }
147752196Smckusick *flagp |= NFSMNT_RCVLOCK;
147852196Smckusick return (0);
147952196Smckusick }
148040117Smckusick
148152196Smckusick /*
148252196Smckusick * Unlock the stream socket for others.
148352196Smckusick */
148452196Smckusick void
nfs_rcvunlock(flagp)148552196Smckusick nfs_rcvunlock(flagp)
148652196Smckusick register int *flagp;
148752196Smckusick {
148852196Smckusick
148952196Smckusick if ((*flagp & NFSMNT_RCVLOCK) == 0)
149052196Smckusick panic("nfs rcvunlock");
149152196Smckusick *flagp &= ~NFSMNT_RCVLOCK;
149252196Smckusick if (*flagp & NFSMNT_WANTRCV) {
149352196Smckusick *flagp &= ~NFSMNT_WANTRCV;
149452196Smckusick wakeup((caddr_t)flagp);
149552196Smckusick }
149652196Smckusick }
149752196Smckusick
149852196Smckusick /*
149952196Smckusick * Check for badly aligned mbuf data areas and
150052196Smckusick * realign data in an mbuf list by copying the data areas up, as required.
150152196Smckusick */
150252196Smckusick void
nfs_realign(m,hsiz)150352196Smckusick nfs_realign(m, hsiz)
150452196Smckusick register struct mbuf *m;
150552196Smckusick int hsiz;
150647737Skarels {
150752196Smckusick register struct mbuf *m2;
150852196Smckusick register int siz, mlen, olen;
150952196Smckusick register caddr_t tcp, fcp;
151052196Smckusick struct mbuf *mnew;
151147737Skarels
151252196Smckusick while (m) {
151352196Smckusick /*
151452196Smckusick * This never happens for UDP, rarely happens for TCP
151552196Smckusick * but frequently happens for iso transport.
151652196Smckusick */
151752196Smckusick if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
151852196Smckusick olen = m->m_len;
151952196Smckusick fcp = mtod(m, caddr_t);
152063481Smckusick if ((int)fcp & 0x3) {
152163481Smckusick m->m_flags &= ~M_PKTHDR;
152263481Smckusick if (m->m_flags & M_EXT)
152363481Smckusick m->m_data = m->m_ext.ext_buf +
152463481Smckusick ((m->m_ext.ext_size - olen) & ~0x3);
152563481Smckusick else
152663481Smckusick m->m_data = m->m_dat;
152763481Smckusick }
152852196Smckusick m->m_len = 0;
152952196Smckusick tcp = mtod(m, caddr_t);
153052196Smckusick mnew = m;
153152196Smckusick m2 = m->m_next;
153252196Smckusick
153352196Smckusick /*
153452196Smckusick * If possible, only put the first invariant part
153552196Smckusick * of the RPC header in the first mbuf.
153652196Smckusick */
153763481Smckusick mlen = M_TRAILINGSPACE(m);
153863481Smckusick if (olen <= hsiz && mlen > hsiz)
153952196Smckusick mlen = hsiz;
154052196Smckusick
154152196Smckusick /*
154252196Smckusick * Loop through the mbuf list consolidating data.
154352196Smckusick */
154452196Smckusick while (m) {
154552196Smckusick while (olen > 0) {
154652196Smckusick if (mlen == 0) {
154752196Smckusick m2->m_flags &= ~M_PKTHDR;
154852196Smckusick if (m2->m_flags & M_EXT)
154952196Smckusick m2->m_data = m2->m_ext.ext_buf;
155052196Smckusick else
155152196Smckusick m2->m_data = m2->m_dat;
155252196Smckusick m2->m_len = 0;
155352196Smckusick mlen = M_TRAILINGSPACE(m2);
155452196Smckusick tcp = mtod(m2, caddr_t);
155552196Smckusick mnew = m2;
155652196Smckusick m2 = m2->m_next;
155752196Smckusick }
155855057Spendry siz = min(mlen, olen);
155952196Smckusick if (tcp != fcp)
156052196Smckusick bcopy(fcp, tcp, siz);
156152196Smckusick mnew->m_len += siz;
156252196Smckusick mlen -= siz;
156352196Smckusick olen -= siz;
156452196Smckusick tcp += siz;
156552196Smckusick fcp += siz;
156652196Smckusick }
156752196Smckusick m = m->m_next;
156852196Smckusick if (m) {
156952196Smckusick olen = m->m_len;
157052196Smckusick fcp = mtod(m, caddr_t);
157152196Smckusick }
157252196Smckusick }
157352196Smckusick
157452196Smckusick /*
157552196Smckusick * Finally, set m_len == 0 for any trailing mbufs that have
157652196Smckusick * been copied out of.
157752196Smckusick */
157852196Smckusick while (m2) {
157952196Smckusick m2->m_len = 0;
158052196Smckusick m2 = m2->m_next;
158152196Smckusick }
158252196Smckusick return;
158352196Smckusick }
158452196Smckusick m = m->m_next;
158552196Smckusick }
158647737Skarels }
158747737Skarels
158841900Smckusick /*
158952196Smckusick * Socket upcall routine for the nfsd sockets.
159052196Smckusick * The caddr_t arg is a pointer to the "struct nfssvc_sock".
159152196Smckusick * Essentially do as much as possible non-blocking, else punt and it will
159252196Smckusick * be called with M_WAIT from an nfsd.
159341900Smckusick */
159452196Smckusick void
nfsrv_rcv(so,arg,waitflag)159552196Smckusick nfsrv_rcv(so, arg, waitflag)
159652196Smckusick struct socket *so;
159752196Smckusick caddr_t arg;
159852196Smckusick int waitflag;
159938414Smckusick {
160052196Smckusick register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
160152196Smckusick register struct mbuf *m;
160252196Smckusick struct mbuf *mp, *nam;
160352196Smckusick struct uio auio;
160452196Smckusick int flags, error;
160540117Smckusick
160652903Smckusick if ((slp->ns_flag & SLP_VALID) == 0)
160752903Smckusick return;
160852903Smckusick #ifdef notdef
160952903Smckusick /*
161052903Smckusick * Define this to test for nfsds handling this under heavy load.
161152903Smckusick */
161252903Smckusick if (waitflag == M_DONTWAIT) {
161352903Smckusick slp->ns_flag |= SLP_NEEDQ; goto dorecs;
161452903Smckusick }
161552903Smckusick #endif
161652932Smckusick auio.uio_procp = NULL;
161752196Smckusick if (so->so_type == SOCK_STREAM) {
161852196Smckusick /*
161952196Smckusick * If there are already records on the queue, defer soreceive()
162052196Smckusick * to an nfsd so that there is feedback to the TCP layer that
162152196Smckusick * the nfs servers are heavily loaded.
162252196Smckusick */
162352196Smckusick if (slp->ns_rec && waitflag == M_DONTWAIT) {
162452196Smckusick slp->ns_flag |= SLP_NEEDQ;
162552903Smckusick goto dorecs;
162652196Smckusick }
162752196Smckusick
162852196Smckusick /*
162952196Smckusick * Do soreceive().
163052196Smckusick */
163152196Smckusick auio.uio_resid = 1000000000;
163252196Smckusick flags = MSG_DONTWAIT;
163352196Smckusick error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
163452196Smckusick if (error || mp == (struct mbuf *)0) {
163552903Smckusick if (error == EWOULDBLOCK)
163652903Smckusick slp->ns_flag |= SLP_NEEDQ;
163752903Smckusick else
163852196Smckusick slp->ns_flag |= SLP_DISCONN;
163952196Smckusick goto dorecs;
164052196Smckusick }
164152196Smckusick m = mp;
164252196Smckusick if (slp->ns_rawend) {
164352196Smckusick slp->ns_rawend->m_next = m;
164452196Smckusick slp->ns_cc += 1000000000 - auio.uio_resid;
164552196Smckusick } else {
164652196Smckusick slp->ns_raw = m;
164752196Smckusick slp->ns_cc = 1000000000 - auio.uio_resid;
164852196Smckusick }
164952196Smckusick while (m->m_next)
165052196Smckusick m = m->m_next;
165152196Smckusick slp->ns_rawend = m;
165252196Smckusick
165352196Smckusick /*
165452196Smckusick * Now try and parse record(s) out of the raw stream data.
165552196Smckusick */
1656*68653Smckusick error = nfsrv_getstream(slp, waitflag);
1657*68653Smckusick if (error) {
165852196Smckusick if (error == EPERM)
165952196Smckusick slp->ns_flag |= SLP_DISCONN;
166052903Smckusick else
166152196Smckusick slp->ns_flag |= SLP_NEEDQ;
166252196Smckusick }
166352196Smckusick } else {
166452196Smckusick do {
166552196Smckusick auio.uio_resid = 1000000000;
166652196Smckusick flags = MSG_DONTWAIT;
166752196Smckusick error = soreceive(so, &nam, &auio, &mp,
166852196Smckusick (struct mbuf **)0, &flags);
166952196Smckusick if (mp) {
167052196Smckusick nfs_realign(mp, 10 * NFSX_UNSIGNED);
167152196Smckusick if (nam) {
167252196Smckusick m = nam;
167352196Smckusick m->m_next = mp;
167452196Smckusick } else
167552196Smckusick m = mp;
167652196Smckusick if (slp->ns_recend)
167752196Smckusick slp->ns_recend->m_nextpkt = m;
167852196Smckusick else
167952196Smckusick slp->ns_rec = m;
168052196Smckusick slp->ns_recend = m;
168152196Smckusick m->m_nextpkt = (struct mbuf *)0;
168252196Smckusick }
168352196Smckusick if (error) {
168452196Smckusick if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
168552196Smckusick && error != EWOULDBLOCK) {
168652196Smckusick slp->ns_flag |= SLP_DISCONN;
168752903Smckusick goto dorecs;
168852196Smckusick }
168952196Smckusick }
169052196Smckusick } while (mp);
169140117Smckusick }
169252196Smckusick
169352196Smckusick /*
169452196Smckusick * Now try and process the request records, non-blocking.
169552196Smckusick */
169652196Smckusick dorecs:
169752903Smckusick if (waitflag == M_DONTWAIT &&
169852903Smckusick (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
169952196Smckusick nfsrv_wakenfsd(slp);
170041900Smckusick }
170140117Smckusick
170241900Smckusick /*
170352196Smckusick * Try and extract an RPC request from the mbuf data list received on a
170452196Smckusick * stream socket. The "waitflag" argument indicates whether or not it
170552196Smckusick * can sleep.
170641900Smckusick */
1707*68653Smckusick int
nfsrv_getstream(slp,waitflag)170852196Smckusick nfsrv_getstream(slp, waitflag)
170952196Smckusick register struct nfssvc_sock *slp;
171052196Smckusick int waitflag;
171141900Smckusick {
1712*68653Smckusick register struct mbuf *m, **mpp;
171352196Smckusick register char *cp1, *cp2;
171452196Smckusick register int len;
1715*68653Smckusick struct mbuf *om, *m2, *recm = 0;
171652196Smckusick u_long recmark;
171741900Smckusick
171852196Smckusick if (slp->ns_flag & SLP_GETSTREAM)
171952196Smckusick panic("nfs getstream");
172052196Smckusick slp->ns_flag |= SLP_GETSTREAM;
172152196Smckusick for (;;) {
172252196Smckusick if (slp->ns_reclen == 0) {
172352196Smckusick if (slp->ns_cc < NFSX_UNSIGNED) {
172452196Smckusick slp->ns_flag &= ~SLP_GETSTREAM;
172552196Smckusick return (0);
172652196Smckusick }
172752196Smckusick m = slp->ns_raw;
172852196Smckusick if (m->m_len >= NFSX_UNSIGNED) {
172952196Smckusick bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
173052196Smckusick m->m_data += NFSX_UNSIGNED;
173152196Smckusick m->m_len -= NFSX_UNSIGNED;
173252196Smckusick } else {
173352196Smckusick cp1 = (caddr_t)&recmark;
173452196Smckusick cp2 = mtod(m, caddr_t);
173552196Smckusick while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
173652196Smckusick while (m->m_len == 0) {
173752196Smckusick m = m->m_next;
173852196Smckusick cp2 = mtod(m, caddr_t);
173952196Smckusick }
174052196Smckusick *cp1++ = *cp2++;
174152196Smckusick m->m_data++;
174252196Smckusick m->m_len--;
174352196Smckusick }
174452196Smckusick }
174552196Smckusick slp->ns_cc -= NFSX_UNSIGNED;
1746*68653Smckusick recmark = ntohl(recmark);
1747*68653Smckusick slp->ns_reclen = recmark & ~0x80000000;
1748*68653Smckusick if (recmark & 0x80000000)
1749*68653Smckusick slp->ns_flag |= SLP_LASTFRAG;
1750*68653Smckusick else
1751*68653Smckusick slp->ns_flag &= ~SLP_LASTFRAG;
175252196Smckusick if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
175352196Smckusick slp->ns_flag &= ~SLP_GETSTREAM;
175452196Smckusick return (EPERM);
175552196Smckusick }
175652196Smckusick }
175752196Smckusick
175852196Smckusick /*
175952196Smckusick * Now get the record part.
176052196Smckusick */
176152196Smckusick if (slp->ns_cc == slp->ns_reclen) {
176252196Smckusick recm = slp->ns_raw;
176352196Smckusick slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
176452196Smckusick slp->ns_cc = slp->ns_reclen = 0;
176552196Smckusick } else if (slp->ns_cc > slp->ns_reclen) {
176652196Smckusick len = 0;
176752196Smckusick m = slp->ns_raw;
176852196Smckusick om = (struct mbuf *)0;
176952196Smckusick while (len < slp->ns_reclen) {
177052196Smckusick if ((len + m->m_len) > slp->ns_reclen) {
177152196Smckusick m2 = m_copym(m, 0, slp->ns_reclen - len,
177252196Smckusick waitflag);
177352196Smckusick if (m2) {
177452196Smckusick if (om) {
177552196Smckusick om->m_next = m2;
177652196Smckusick recm = slp->ns_raw;
177752196Smckusick } else
177852196Smckusick recm = m2;
177952196Smckusick m->m_data += slp->ns_reclen - len;
178052196Smckusick m->m_len -= slp->ns_reclen - len;
178152196Smckusick len = slp->ns_reclen;
178252196Smckusick } else {
178352196Smckusick slp->ns_flag &= ~SLP_GETSTREAM;
178452196Smckusick return (EWOULDBLOCK);
178552196Smckusick }
178652196Smckusick } else if ((len + m->m_len) == slp->ns_reclen) {
178752196Smckusick om = m;
178852196Smckusick len += m->m_len;
178952196Smckusick m = m->m_next;
179052196Smckusick recm = slp->ns_raw;
179152196Smckusick om->m_next = (struct mbuf *)0;
179252196Smckusick } else {
179352196Smckusick om = m;
179452196Smckusick len += m->m_len;
179552196Smckusick m = m->m_next;
179652196Smckusick }
179752196Smckusick }
179852196Smckusick slp->ns_raw = m;
179952196Smckusick slp->ns_cc -= len;
180052196Smckusick slp->ns_reclen = 0;
180152196Smckusick } else {
180252196Smckusick slp->ns_flag &= ~SLP_GETSTREAM;
180352196Smckusick return (0);
180452196Smckusick }
1805*68653Smckusick
1806*68653Smckusick /*
1807*68653Smckusick * Accumulate the fragments into a record.
1808*68653Smckusick */
1809*68653Smckusick mpp = &slp->ns_frag;
1810*68653Smckusick while (*mpp)
1811*68653Smckusick mpp = &((*mpp)->m_next);
1812*68653Smckusick *mpp = recm;
1813*68653Smckusick if (slp->ns_flag & SLP_LASTFRAG) {
1814*68653Smckusick nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED);
1815*68653Smckusick if (slp->ns_recend)
1816*68653Smckusick slp->ns_recend->m_nextpkt = slp->ns_frag;
1817*68653Smckusick else
1818*68653Smckusick slp->ns_rec = slp->ns_frag;
1819*68653Smckusick slp->ns_recend = slp->ns_frag;
1820*68653Smckusick slp->ns_frag = (struct mbuf *)0;
1821*68653Smckusick }
182240117Smckusick }
182338414Smckusick }
182441900Smckusick
182541900Smckusick /*
182652196Smckusick * Parse an RPC header.
182741900Smckusick */
1828*68653Smckusick int
nfsrv_dorec(slp,nfsd,ndp)1829*68653Smckusick nfsrv_dorec(slp, nfsd, ndp)
183052196Smckusick register struct nfssvc_sock *slp;
1831*68653Smckusick struct nfsd *nfsd;
1832*68653Smckusick struct nfsrv_descript **ndp;
183341900Smckusick {
1834*68653Smckusick register struct mbuf *m, *nam;
1835*68653Smckusick register struct nfsrv_descript *nd;
183652196Smckusick int error;
183741900Smckusick
1838*68653Smckusick *ndp = NULL;
183952903Smckusick if ((slp->ns_flag & SLP_VALID) == 0 ||
184052196Smckusick (m = slp->ns_rec) == (struct mbuf *)0)
184152196Smckusick return (ENOBUFS);
1842*68653Smckusick slp->ns_rec = m->m_nextpkt;
1843*68653Smckusick if (slp->ns_rec)
184452196Smckusick m->m_nextpkt = (struct mbuf *)0;
184552196Smckusick else
184652196Smckusick slp->ns_recend = (struct mbuf *)0;
184752196Smckusick if (m->m_type == MT_SONAME) {
1848*68653Smckusick nam = m;
1849*68653Smckusick m = m->m_next;
1850*68653Smckusick nam->m_next = NULL;
1851*68653Smckusick } else
1852*68653Smckusick nam = NULL;
1853*68653Smckusick MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
1854*68653Smckusick M_NFSRVDESC, M_WAITOK);
1855*68653Smckusick nd->nd_md = nd->nd_mrep = m;
1856*68653Smckusick nd->nd_nam2 = nam;
1857*68653Smckusick nd->nd_dpos = mtod(m, caddr_t);
1858*68653Smckusick error = nfs_getreq(nd, nfsd, TRUE);
1859*68653Smckusick if (error) {
1860*68653Smckusick m_freem(nam);
1861*68653Smckusick free((caddr_t)nd, M_NFSRVDESC);
186252196Smckusick return (error);
186352196Smckusick }
1864*68653Smckusick *ndp = nd;
1865*68653Smckusick nfsd->nfsd_nd = nd;
186652196Smckusick return (0);
186752196Smckusick }
186852196Smckusick
186952196Smckusick /*
187052196Smckusick * Parse an RPC request
187152196Smckusick * - verify it
187252196Smckusick * - fill in the cred struct.
187352196Smckusick */
1874*68653Smckusick int
nfs_getreq(nd,nfsd,has_header)1875*68653Smckusick nfs_getreq(nd, nfsd, has_header)
1876*68653Smckusick register struct nfsrv_descript *nd;
1877*68653Smckusick struct nfsd *nfsd;
187852196Smckusick int has_header;
187952196Smckusick {
188052196Smckusick register int len, i;
188152196Smckusick register u_long *tl;
188252196Smckusick register long t1;
188352196Smckusick struct uio uio;
188452196Smckusick struct iovec iov;
1885*68653Smckusick caddr_t dpos, cp2, cp;
188652196Smckusick u_long nfsvers, auth_type;
1887*68653Smckusick uid_t nickuid;
1888*68653Smckusick int error = 0, nqnfs = 0, ticklen;
188952196Smckusick struct mbuf *mrep, *md;
1890*68653Smckusick register struct nfsuid *nuidp;
1891*68653Smckusick struct timeval tvin, tvout;
1892*68653Smckusick NFSKERBKEYSCHED_T keys; /* stores key schedule */
189352196Smckusick
189452196Smckusick mrep = nd->nd_mrep;
189552196Smckusick md = nd->nd_md;
189652196Smckusick dpos = nd->nd_dpos;
189752196Smckusick if (has_header) {
1898*68653Smckusick nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
1899*68653Smckusick nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
190052196Smckusick if (*tl++ != rpc_call) {
190152196Smckusick m_freem(mrep);
190252196Smckusick return (EBADRPC);
190352196Smckusick }
1904*68653Smckusick } else
1905*68653Smckusick nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
190652196Smckusick nd->nd_repstat = 0;
1907*68653Smckusick nd->nd_flag = 0;
190852196Smckusick if (*tl++ != rpc_vers) {
190952196Smckusick nd->nd_repstat = ERPCMISMATCH;
191052196Smckusick nd->nd_procnum = NFSPROC_NOOP;
191141900Smckusick return (0);
191252196Smckusick }
191352196Smckusick if (*tl != nfs_prog) {
1914*68653Smckusick if (*tl == nqnfs_prog)
191552196Smckusick nqnfs++;
1916*68653Smckusick else {
191752196Smckusick nd->nd_repstat = EPROGUNAVAIL;
191852196Smckusick nd->nd_procnum = NFSPROC_NOOP;
191952196Smckusick return (0);
192052196Smckusick }
192152196Smckusick }
192252196Smckusick tl++;
1923*68653Smckusick nfsvers = fxdr_unsigned(u_long, *tl++);
1924*68653Smckusick if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
1925*68653Smckusick (nfsvers != NQNFS_VER3 && nqnfs)) {
192652196Smckusick nd->nd_repstat = EPROGMISMATCH;
192752196Smckusick nd->nd_procnum = NFSPROC_NOOP;
192852196Smckusick return (0);
192952196Smckusick }
1930*68653Smckusick if (nqnfs)
1931*68653Smckusick nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
1932*68653Smckusick else if (nfsvers == NFS_VER3)
1933*68653Smckusick nd->nd_flag = ND_NFSV3;
193452196Smckusick nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
193552196Smckusick if (nd->nd_procnum == NFSPROC_NULL)
193652196Smckusick return (0);
193752196Smckusick if (nd->nd_procnum >= NFS_NPROCS ||
1938*68653Smckusick (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
1939*68653Smckusick (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
194052196Smckusick nd->nd_repstat = EPROCUNAVAIL;
194152196Smckusick nd->nd_procnum = NFSPROC_NOOP;
194252196Smckusick return (0);
194352196Smckusick }
1944*68653Smckusick if ((nd->nd_flag & ND_NFSV3) == 0)
1945*68653Smckusick nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
194652196Smckusick auth_type = *tl++;
194752196Smckusick len = fxdr_unsigned(int, *tl++);
194852196Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) {
194952196Smckusick m_freem(mrep);
195052196Smckusick return (EBADRPC);
195152196Smckusick }
195241900Smckusick
1953*68653Smckusick nd->nd_flag &= ~ND_KERBAUTH;
195441900Smckusick /*
195552196Smckusick * Handle auth_unix or auth_kerb.
195641900Smckusick */
195752196Smckusick if (auth_type == rpc_auth_unix) {
195852196Smckusick len = fxdr_unsigned(int, *++tl);
195952196Smckusick if (len < 0 || len > NFS_MAXNAMLEN) {
196052196Smckusick m_freem(mrep);
196152196Smckusick return (EBADRPC);
196252196Smckusick }
196352196Smckusick nfsm_adv(nfsm_rndup(len));
1964*68653Smckusick nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1965*68653Smckusick bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
1966*68653Smckusick nd->nd_cr.cr_ref = 1;
196752196Smckusick nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
196852196Smckusick nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
196952196Smckusick len = fxdr_unsigned(int, *tl);
197052196Smckusick if (len < 0 || len > RPCAUTH_UNIXGIDS) {
197152196Smckusick m_freem(mrep);
197252196Smckusick return (EBADRPC);
197352196Smckusick }
1974*68653Smckusick nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
197552196Smckusick for (i = 1; i <= len; i++)
1976*68653Smckusick if (i < NGROUPS)
1977*68653Smckusick nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
1978*68653Smckusick else
1979*68653Smckusick tl++;
198052196Smckusick nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
1981*68653Smckusick if (nd->nd_cr.cr_ngroups > 1)
1982*68653Smckusick nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
1983*68653Smckusick len = fxdr_unsigned(int, *++tl);
1984*68653Smckusick if (len < 0 || len > RPCAUTH_MAXSIZ) {
198552196Smckusick m_freem(mrep);
198652196Smckusick return (EBADRPC);
198752196Smckusick }
1988*68653Smckusick if (len > 0)
1989*68653Smckusick nfsm_adv(nfsm_rndup(len));
1990*68653Smckusick } else if (auth_type == rpc_auth_kerb) {
1991*68653Smckusick switch (fxdr_unsigned(int, *tl++)) {
1992*68653Smckusick case RPCAKN_FULLNAME:
1993*68653Smckusick ticklen = fxdr_unsigned(int, *tl);
1994*68653Smckusick *((u_long *)nfsd->nfsd_authstr) = *tl;
1995*68653Smckusick uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
1996*68653Smckusick nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
1997*68653Smckusick if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
1998*68653Smckusick m_freem(mrep);
1999*68653Smckusick return (EBADRPC);
2000*68653Smckusick }
2001*68653Smckusick uio.uio_offset = 0;
2002*68653Smckusick uio.uio_iov = &iov;
2003*68653Smckusick uio.uio_iovcnt = 1;
2004*68653Smckusick uio.uio_segflg = UIO_SYSSPACE;
2005*68653Smckusick iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2006*68653Smckusick iov.iov_len = RPCAUTH_MAXSIZ - 4;
2007*68653Smckusick nfsm_mtouio(&uio, uio.uio_resid);
2008*68653Smckusick nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2009*68653Smckusick if (*tl++ != rpc_auth_kerb ||
2010*68653Smckusick fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2011*68653Smckusick printf("Bad kerb verifier\n");
2012*68653Smckusick nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2013*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2014*68653Smckusick return (0);
2015*68653Smckusick }
2016*68653Smckusick nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2017*68653Smckusick tl = (u_long *)cp;
2018*68653Smckusick if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2019*68653Smckusick printf("Not fullname kerb verifier\n");
2020*68653Smckusick nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2021*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2022*68653Smckusick return (0);
2023*68653Smckusick }
2024*68653Smckusick cp += NFSX_UNSIGNED;
2025*68653Smckusick bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2026*68653Smckusick nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2027*68653Smckusick nd->nd_flag |= ND_KERBFULL;
2028*68653Smckusick nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2029*68653Smckusick break;
2030*68653Smckusick case RPCAKN_NICKNAME:
2031*68653Smckusick if (len != 2 * NFSX_UNSIGNED) {
2032*68653Smckusick printf("Kerb nickname short\n");
2033*68653Smckusick nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2034*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2035*68653Smckusick return (0);
2036*68653Smckusick }
2037*68653Smckusick nickuid = fxdr_unsigned(uid_t, *tl);
2038*68653Smckusick nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2039*68653Smckusick if (*tl++ != rpc_auth_kerb ||
2040*68653Smckusick fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2041*68653Smckusick printf("Kerb nick verifier bad\n");
2042*68653Smckusick nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2043*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2044*68653Smckusick return (0);
2045*68653Smckusick }
2046*68653Smckusick nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2047*68653Smckusick tvin.tv_sec = *tl++;
2048*68653Smckusick tvin.tv_usec = *tl;
204952196Smckusick
2050*68653Smckusick for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2051*68653Smckusick nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2052*68653Smckusick if (nuidp->nu_cr.cr_uid == nickuid &&
2053*68653Smckusick (!nd->nd_nam2 ||
2054*68653Smckusick netaddr_match(NU_NETFAM(nuidp),
2055*68653Smckusick &nuidp->nu_haddr, nd->nd_nam2)))
2056*68653Smckusick break;
2057*68653Smckusick }
2058*68653Smckusick if (!nuidp) {
2059*68653Smckusick nd->nd_repstat =
2060*68653Smckusick (NFSERR_AUTHERR|AUTH_REJECTCRED);
2061*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2062*68653Smckusick return (0);
2063*68653Smckusick }
2064*68653Smckusick
2065*68653Smckusick /*
2066*68653Smckusick * Now, decrypt the timestamp using the session key
2067*68653Smckusick * and validate it.
2068*68653Smckusick */
2069*68653Smckusick #ifdef NFSKERB
2070*68653Smckusick XXX
2071*68653Smckusick #endif
2072*68653Smckusick
2073*68653Smckusick tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2074*68653Smckusick tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2075*68653Smckusick if (nuidp->nu_expire < time.tv_sec ||
2076*68653Smckusick nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2077*68653Smckusick (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2078*68653Smckusick nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2079*68653Smckusick nuidp->nu_expire = 0;
2080*68653Smckusick nd->nd_repstat =
2081*68653Smckusick (NFSERR_AUTHERR|AUTH_REJECTVERF);
2082*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2083*68653Smckusick return (0);
2084*68653Smckusick }
2085*68653Smckusick nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2086*68653Smckusick nd->nd_flag |= ND_KERBNICK;
2087*68653Smckusick };
2088*68653Smckusick } else {
2089*68653Smckusick nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2090*68653Smckusick nd->nd_procnum = NFSPROC_NOOP;
2091*68653Smckusick return (0);
209252196Smckusick }
209352196Smckusick
209452196Smckusick /*
209552196Smckusick * For nqnfs, get piggybacked lease request.
209652196Smckusick */
209752196Smckusick if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
209852196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2099*68653Smckusick nd->nd_flag |= fxdr_unsigned(int, *tl);
2100*68653Smckusick if (nd->nd_flag & ND_LEASE) {
210152196Smckusick nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
210252196Smckusick nd->nd_duration = fxdr_unsigned(int, *tl);
210352196Smckusick } else
210452196Smckusick nd->nd_duration = NQ_MINLEASE;
2105*68653Smckusick } else
210652196Smckusick nd->nd_duration = NQ_MINLEASE;
210752196Smckusick nd->nd_md = md;
210852196Smckusick nd->nd_dpos = dpos;
210941900Smckusick return (0);
211052196Smckusick nfsmout:
211152196Smckusick return (error);
211241900Smckusick }
211341900Smckusick
211441900Smckusick /*
211552196Smckusick * Search for a sleeping nfsd and wake it up.
211652196Smckusick * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
211752196Smckusick * running nfsds will go look for the work in the nfssvc_sock list.
211841900Smckusick */
211952196Smckusick void
nfsrv_wakenfsd(slp)212052196Smckusick nfsrv_wakenfsd(slp)
212152196Smckusick struct nfssvc_sock *slp;
212241900Smckusick {
212367708Smckusick register struct nfsd *nd;
212452196Smckusick
212552903Smckusick if ((slp->ns_flag & SLP_VALID) == 0)
212652903Smckusick return;
2127*68653Smckusick for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
2128*68653Smckusick if (nd->nfsd_flag & NFSD_WAITING) {
2129*68653Smckusick nd->nfsd_flag &= ~NFSD_WAITING;
2130*68653Smckusick if (nd->nfsd_slp)
213152196Smckusick panic("nfsd wakeup");
213252978Smckusick slp->ns_sref++;
2133*68653Smckusick nd->nfsd_slp = slp;
213452196Smckusick wakeup((caddr_t)nd);
213552196Smckusick return;
213652196Smckusick }
213752196Smckusick }
213852903Smckusick slp->ns_flag |= SLP_DOREC;
213967708Smckusick nfsd_head_flag |= NFSD_CHECKSLP;
214041900Smckusick }
214152196Smckusick
2142*68653Smckusick int
nfs_msg(p,server,msg)214352196Smckusick nfs_msg(p, server, msg)
214452196Smckusick struct proc *p;
214552196Smckusick char *server, *msg;
214652196Smckusick {
214752196Smckusick tpr_t tpr;
214852196Smckusick
214952196Smckusick if (p)
215052196Smckusick tpr = tprintf_open(p);
215152196Smckusick else
215252196Smckusick tpr = NULL;
215352196Smckusick tprintf(tpr, "nfs server %s: %s\n", server, msg);
215452196Smckusick tprintf_close(tpr);
2155*68653Smckusick return (0);
215652196Smckusick }
2157