xref: /csrg-svn/sys/nfs/nfs_nqlease.c (revision 56284)
1 /*
2  * Copyright (c) 1992 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_nqlease.c	7.11 (Berkeley) 09/16/92
11  */
12 
13 /*
14  * References:
15  *	Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant
16  *		Mechanism for Distributed File Cache Consistency",
17  *		In Proc. of the Twelfth ACM Symposium on Operating Systems
18  *		Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989.
19  *	Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching
20  *		in the Sprite Network File System", ACM TOCS 6(1),
21  *		pages 134-154, February 1988.
22  *	V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and
23  *		Performance of Cache-Consistency Protocols", Digital
24  *		Equipment Corporation WRL Research Report 89/5, May 1989.
25  */
26 #include <sys/param.h>
27 #include <sys/vnode.h>
28 #include <sys/mount.h>
29 #include <sys/kernel.h>
30 #include <sys/proc.h>
31 #include <sys/systm.h>
32 #include <sys/mbuf.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/file.h>
36 #include <sys/buf.h>
37 #include <sys/stat.h>
38 #include <sys/protosw.h>
39 #include <netinet/in.h>
40 #include <nfs/rpcv2.h>
41 #include <nfs/nfsv2.h>
42 #include <nfs/nfs.h>
43 #include <nfs/nfsm_subs.h>
44 #include <nfs/xdr_subs.h>
45 #include <nfs/nqnfs.h>
46 #include <nfs/nfsnode.h>
47 #include <nfs/nfsmount.h>
48 
49 /*
50  * List head for the lease queue and other global data.
51  * At any time a lease is linked into a list ordered by increasing expiry time.
52  */
53 #define	NQFHHASH(f)	((*((u_long *)(f)))&nqfheadhash)
54 
55 union nqsrvthead nqthead;
56 struct nqlease **nqfhead;
57 u_long nqfheadhash;
58 time_t nqnfsstarttime = (time_t)0;
59 u_long nqnfs_prog, nqnfs_vers;
60 int nqsrv_clockskew = NQ_CLOCKSKEW;
61 int nqsrv_writeslack = NQ_WRITESLACK;
62 int nqsrv_maxlease = NQ_MAXLEASE;
63 int nqsrv_maxnumlease = NQ_MAXNUMLEASE;
64 void nqsrv_instimeq(), nqsrv_send_eviction(), nfs_sndunlock();
65 void nqsrv_unlocklease(), nqsrv_waitfor_expiry(), nfsrv_slpderef();
66 void nqsrv_addhost(), nqsrv_locklease(), nqnfs_serverd();
67 void nqnfs_clientlease();
68 struct mbuf *nfsm_rpchead();
69 
70 /*
71  * Signifies which rpcs can have piggybacked lease requests
72  */
73 int nqnfs_piggy[NFS_NPROCS] = {
74 	0,
75 	NQL_READ,
76 	NQL_WRITE,
77 	0,
78 	NQL_READ,
79 	NQL_READ,
80 	NQL_READ,
81 	0,
82 	NQL_WRITE,
83 	0,
84 	0,
85 	0,
86 	0,
87 	0,
88 	0,
89 	0,
90 	NQL_READ,
91 	0,
92 	NQL_READ,
93 	0,
94 	0,
95 	0,
96 };
97 
98 int nnnnnn = sizeof (struct nqlease);
99 int oooooo = sizeof (struct nfsnode);
100 extern nfstype nfs_type[9];
101 extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
102 extern struct nfsd nfsd_head;
103 extern int nfsd_waiting;
104 
105 #define TRUE	1
106 #define	FALSE	0
107 
108 /*
109  * Get or check for a lease for "vp", based on NQL_CHECK flag.
110  * The rules are as follows:
111  * - if a current non-caching lease, reply non-caching
112  * - if a current lease for same host only, extend lease
113  * - if a read cachable lease and a read lease request
114  *	add host to list any reply cachable
115  * - else { set non-cachable for read-write sharing }
116  *	send eviction notice messages to all other hosts that have lease
117  *	wait for lease termination { either by receiving vacated messages
118  *					from all the other hosts or expiry
119  *					via. timeout }
120  *	modify lease to non-cachable
121  * - else if no current lease, issue new one
122  * - reply
123  * - return boolean TRUE iff nam should be m_freem()'d
124  * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep()
125  *     in here must be framed by nqsrv_locklease() and nqsrv_unlocklease().
126  *     nqsrv_locklease() is coded such that at least one of LC_LOCKED and
127  *     LC_WANTED is set whenever a process is tsleeping in it. The exception
128  *     is when a new lease is being allocated, since it is not in the timer
129  *     queue yet. (Ditto for the splsoftclock() and splx(s) calls)
130  */
131 nqsrv_getlease(vp, duration, flags, nd, nam, cachablep, frev, cred)
132 	struct vnode *vp;
133 	u_long *duration;
134 	int flags;
135 	struct nfsd *nd;
136 	struct mbuf *nam;
137 	int *cachablep;
138 	u_quad_t *frev;
139 	struct ucred *cred;
140 {
141 	register struct nqlease *lp, *lq, **lpp;
142 	register struct nqhost *lph;
143 	struct nqlease *tlp;
144 	struct nqm **lphp;
145 	struct vattr vattr;
146 	fhandle_t fh;
147 	int i, ok, error, s;
148 
149 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
150 		return (0);
151 	if (*duration > nqsrv_maxlease)
152 		*duration = nqsrv_maxlease;
153 	if (error = VOP_GETATTR(vp, &vattr, cred, nd->nd_procp))
154 		return (error);
155 	*frev = vattr.va_filerev;
156 	s = splsoftclock();
157 	tlp = vp->v_lease;
158 	if ((flags & NQL_CHECK) == 0)
159 		nfsstats.srvnqnfs_getleases++;
160 	if (tlp == (struct nqlease *)0) {
161 
162 		/*
163 		 * Find the lease by searching the hash list.
164 		 */
165 		fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
166 		if (error = VFS_VPTOFH(vp, &fh.fh_fid)) {
167 			splx(s);
168 			return (error);
169 		}
170 		lpp = &nqfhead[NQFHHASH(fh.fh_fid.fid_data)];
171 		for (lp = *lpp; lp; lp = lp->lc_fhnext)
172 			if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] &&
173 			    fh.fh_fsid.val[1] == lp->lc_fsid.val[1] &&
174 			    !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata,
175 				  fh.fh_fid.fid_len - sizeof (long))) {
176 				/* Found it */
177 				lp->lc_vp = vp;
178 				vp->v_lease = lp;
179 				tlp = lp;
180 				break;
181 			}
182 	}
183 	lp = tlp;
184 	if (lp) {
185 		if ((lp->lc_flag & LC_NONCACHABLE) ||
186 		    (lp->lc_morehosts == (struct nqm *)0 &&
187 		     nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host)))
188 			goto doreply;
189 		if ((flags & NQL_READ) && (lp->lc_flag & LC_WRITE)==0) {
190 			if (flags & NQL_CHECK)
191 				goto doreply;
192 			if (nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host))
193 				goto doreply;
194 			i = 0;
195 			if (lp->lc_morehosts) {
196 				lph = lp->lc_morehosts->lpm_hosts;
197 				lphp = &lp->lc_morehosts->lpm_next;
198 				ok = 1;
199 			} else {
200 				lphp = &lp->lc_morehosts;
201 				ok = 0;
202 			}
203 			while (ok && (lph->lph_flag & LC_VALID)) {
204 				if (nqsrv_cmpnam(nd->nd_slp, nam, lph))
205 					goto doreply;
206 				if (++i == LC_MOREHOSTSIZ) {
207 					i = 0;
208 					if (*lphp) {
209 						lph = (*lphp)->lpm_hosts;
210 						lphp = &((*lphp)->lpm_next);
211 					} else
212 						ok = 0;
213 				} else
214 					lph++;
215 			}
216 			nqsrv_locklease(lp);
217 			if (!ok) {
218 				*lphp = (struct nqm *)
219 					malloc(sizeof (struct nqm),
220 						M_NQMHOST, M_WAITOK);
221 				bzero((caddr_t)*lphp, sizeof (struct nqm));
222 				lph = (*lphp)->lpm_hosts;
223 			}
224 			nqsrv_addhost(lph, nd->nd_slp, nam);
225 			nqsrv_unlocklease(lp);
226 		} else {
227 			lp->lc_flag |= LC_NONCACHABLE;
228 			nqsrv_locklease(lp);
229 			VOP_UNLOCK(vp);
230 			nqsrv_send_eviction(vp, lp, nd->nd_slp, nam, cred);
231 			nqsrv_waitfor_expiry(lp);
232 			VOP_LOCK(vp);
233 			nqsrv_unlocklease(lp);
234 		}
235 doreply:
236 		/*
237 		 * Update the lease and return
238 		 */
239 		if ((flags & NQL_CHECK) == 0)
240 			nqsrv_instimeq(lp, *duration);
241 		if (lp->lc_flag & LC_NONCACHABLE)
242 			*cachablep = 0;
243 		else {
244 			*cachablep = 1;
245 			if (flags & NQL_WRITE)
246 				lp->lc_flag |= LC_WRITTEN;
247 		}
248 		splx(s);
249 		return (0);
250 	}
251 	splx(s);
252 	if (flags & NQL_CHECK)
253 		return (0);
254 
255 	/*
256 	 * Allocate new lease
257 	 * The value of nqsrv_maxnumlease should be set generously, so that
258 	 * the following "printf" happens infrequently.
259 	 */
260 	if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) {
261 		printf("Nqnfs server, too many leases\n");
262 		do {
263 			(void) tsleep((caddr_t)&lbolt, PSOCK,
264 					"nqsrvnuml", 0);
265 		} while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease);
266 	}
267 	MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK);
268 	bzero((caddr_t)lp, sizeof (struct nqlease));
269 	if (flags & NQL_WRITE)
270 		lp->lc_flag |= (LC_WRITE | LC_WRITTEN);
271 	nqsrv_addhost(&lp->lc_host, nd->nd_slp, nam);
272 	lp->lc_vp = vp;
273 	lp->lc_fsid = fh.fh_fsid;
274 	bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, fh.fh_fid.fid_len - sizeof (long));
275 	if (lq = *lpp)
276 		lq->lc_fhprev = &lp->lc_fhnext;
277 	lp->lc_fhnext = lq;
278 	lp->lc_fhprev = lpp;
279 	*lpp = lp;
280 	vp->v_lease = lp;
281 	s = splsoftclock();
282 	nqsrv_instimeq(lp, *duration);
283 	splx(s);
284 	*cachablep = 1;
285 	if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases)
286 		nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases;
287 	return (0);
288 }
289 
290 /*
291  * Local lease check for server syscalls.
292  * Just set up args and let nqsrv_getlease() do the rest.
293  */
294 void
295 lease_check(vp, p, cred, flag)
296 	struct vnode *vp;
297 	struct proc *p;
298 	struct ucred *cred;
299 	int flag;
300 {
301 	int duration, cache;
302 	struct nfsd nfsd;
303 	u_quad_t frev;
304 
305 	nfsd.nd_slp = NQLOCALSLP;
306 	nfsd.nd_procp = p;
307 	(void) nqsrv_getlease(vp, &duration, NQL_CHECK | flag, &nfsd,
308 		(struct mbuf *)0, &cache, &frev, cred);
309 }
310 
311 /*
312  * Add a host to an nqhost structure for a lease.
313  */
314 void
315 nqsrv_addhost(lph, slp, nam)
316 	register struct nqhost *lph;
317 	struct nfssvc_sock *slp;
318 	struct mbuf *nam;
319 {
320 	register struct sockaddr_in *saddr;
321 
322 	if (slp == NQLOCALSLP)
323 		lph->lph_flag |= (LC_VALID | LC_LOCAL);
324 	else if (slp == nfs_udpsock) {
325 		saddr = mtod(nam, struct sockaddr_in *);
326 		lph->lph_flag |= (LC_VALID | LC_UDP);
327 		lph->lph_inetaddr = saddr->sin_addr.s_addr;
328 		lph->lph_port = saddr->sin_port;
329 	} else if (slp == nfs_cltpsock) {
330 		lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
331 		lph->lph_flag |= (LC_VALID | LC_CLTP);
332 	} else {
333 		lph->lph_flag |= (LC_VALID | LC_SREF);
334 		lph->lph_slp = slp;
335 		slp->ns_sref++;
336 	}
337 }
338 
339 /*
340  * Update the lease expiry time and position it in the timer queue correctly.
341  */
342 void
343 nqsrv_instimeq(lp, duration)
344 	register struct nqlease *lp;
345 	u_long duration;
346 {
347 	register struct nqlease *tlp;
348 	time_t newexpiry;
349 
350 	newexpiry = time.tv_sec + duration + nqsrv_clockskew;
351 	if (lp->lc_expiry == newexpiry)
352 		return;
353 	if (lp->lc_chain1[0])
354 		remque(lp);
355 	lp->lc_expiry = newexpiry;
356 
357 	/*
358 	 * Find where in the queue it should be.
359 	 */
360 	tlp = nqthead.th_chain[1];
361 	while (tlp->lc_expiry > newexpiry && tlp != (struct nqlease *)&nqthead)
362 		tlp = tlp->lc_chain1[1];
363 	if (tlp == nqthead.th_chain[1])
364 		NQSTORENOVRAM(newexpiry);
365 	insque(lp, tlp);
366 }
367 
368 /*
369  * Compare the requesting host address with the lph entry in the lease.
370  * Return true iff it is the same.
371  * This is somewhat messy due to the union in the nqhost structure.
372  * The local host is indicated by the special value of NQLOCALSLP for slp.
373  */
374 nqsrv_cmpnam(slp, nam, lph)
375 	register struct nfssvc_sock *slp;
376 	struct mbuf *nam;
377 	register struct nqhost *lph;
378 {
379 	register struct sockaddr_in *saddr;
380 	struct mbuf *addr;
381 	union nethostaddr lhaddr;
382 	int ret;
383 
384 	if (slp == NQLOCALSLP) {
385 		if (lph->lph_flag & LC_LOCAL)
386 			return (1);
387 		else
388 			return (0);
389 	}
390 	if (slp == nfs_udpsock || slp == nfs_cltpsock)
391 		addr = nam;
392 	else
393 		addr = slp->ns_nam;
394 	if (lph->lph_flag & LC_UDP)
395 		ret = netaddr_match(AF_INET, &lph->lph_haddr,
396 			(union nethostaddr *)0, addr);
397 	else if (lph->lph_flag & LC_CLTP)
398 		ret = netaddr_match(AF_ISO, &lph->lph_claddr,
399 			(union nethostaddr *)0, addr);
400 	else {
401 		if ((lph->lph_slp->ns_flag & SLP_VALID) == 0)
402 			return (0);
403 		saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *);
404 		if (saddr->sin_family == AF_INET)
405 			lhaddr.had_inetaddr = saddr->sin_addr.s_addr;
406 		else
407 			lhaddr.had_nam = lph->lph_slp->ns_nam;
408 		ret = netaddr_match(saddr->sin_family, &lhaddr,
409 			(union nethostaddr *)0, addr);
410 	}
411 	return (ret);
412 }
413 
414 /*
415  * Send out eviction notice messages to all other hosts for the lease.
416  */
417 void
418 nqsrv_send_eviction(vp, lp, slp, nam, cred)
419 	struct vnode *vp;
420 	register struct nqlease *lp;
421 	struct nfssvc_sock *slp;
422 	struct mbuf *nam;
423 	struct ucred *cred;
424 {
425 	register struct nqhost *lph = &lp->lc_host;
426 	register struct mbuf *m;
427 	register int siz;
428 	struct nqm *lphnext = lp->lc_morehosts;
429 	struct mbuf *mreq, *mb, *mb2, *nam2, *mheadend;
430 	struct socket *so;
431 	struct sockaddr_in *saddr;
432 	fhandle_t *fhp;
433 	caddr_t bpos, cp;
434 	u_long xid;
435 	int len = 1, ok = 1, i = 0;
436 	int sotype, *solockp;
437 
438 	while (ok && (lph->lph_flag & LC_VALID)) {
439 		if (nqsrv_cmpnam(slp, nam, lph))
440 			lph->lph_flag |= LC_VACATED;
441 		else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
442 			if (lph->lph_flag & LC_UDP) {
443 				MGET(nam2, M_WAIT, MT_SONAME);
444 				saddr = mtod(nam2, struct sockaddr_in *);
445 				nam2->m_len = saddr->sin_len =
446 					sizeof (struct sockaddr_in);
447 				saddr->sin_family = AF_INET;
448 				saddr->sin_addr.s_addr = lph->lph_inetaddr;
449 				saddr->sin_port = lph->lph_port;
450 				so = nfs_udpsock->ns_so;
451 			} else if (lph->lph_flag & LC_CLTP) {
452 				nam2 = lph->lph_nam;
453 				so = nfs_cltpsock->ns_so;
454 			} else if (lph->lph_slp->ns_flag & SLP_VALID) {
455 				nam2 = (struct mbuf *)0;
456 				so = lph->lph_slp->ns_so;
457 			} else
458 				goto nextone;
459 			sotype = so->so_type;
460 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
461 				solockp = &lph->lph_slp->ns_solock;
462 			else
463 				solockp = (int *)0;
464 			nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED,
465 				NFSX_FH);
466 			nfsm_build(cp, caddr_t, NFSX_FH);
467 			bzero(cp, NFSX_FH);
468 			fhp = (fhandle_t *)cp;
469 			fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
470 			VFS_VPTOFH(vp, &fhp->fh_fid);
471 			m = mreq;
472 			siz = 0;
473 			while (m) {
474 				siz += m->m_len;
475 				m = m->m_next;
476 			}
477 			if (siz <= 0 || siz > NFS_MAXPACKET) {
478 				printf("mbuf siz=%d\n",siz);
479 				panic("Bad nfs svc reply");
480 			}
481 			m = nfsm_rpchead(cred, TRUE, NQNFSPROC_EVICTED,
482 				RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
483 				mreq, siz, &mheadend, &xid);
484 			/*
485 			 * For stream protocols, prepend a Sun RPC
486 			 * Record Mark.
487 			 */
488 			if (sotype == SOCK_STREAM) {
489 				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
490 				*mtod(m, u_long *) = htonl(0x80000000 |
491 					(m->m_pkthdr.len - NFSX_UNSIGNED));
492 			}
493 			if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 &&
494 			    (lph->lph_slp->ns_flag & SLP_VALID) == 0) ||
495 			    (solockp && (*solockp & NFSMNT_SNDLOCK)))
496 				m_freem(m);
497 			else {
498 				if (solockp)
499 					*solockp |= NFSMNT_SNDLOCK;
500 				(void) nfs_send(so, nam2, m,
501 						(struct nfsreq *)0);
502 				if (solockp)
503 					nfs_sndunlock(solockp);
504 			}
505 			if (lph->lph_flag & LC_UDP)
506 				MFREE(nam2, m);
507 		}
508 nextone:
509 		if (++i == len) {
510 			if (lphnext) {
511 				i = 0;
512 				len = LC_MOREHOSTSIZ;
513 				lph = lphnext->lpm_hosts;
514 				lphnext = lphnext->lpm_next;
515 			} else
516 				ok = 0;
517 		} else
518 			lph++;
519 	}
520 }
521 
522 /*
523  * Wait for the lease to expire.
524  * This will occur when all clients have sent "vacated" messages to
525  * this server OR when it expires do to timeout.
526  */
527 void
528 nqsrv_waitfor_expiry(lp)
529 	register struct nqlease *lp;
530 {
531 	register struct nqhost *lph;
532 	register int i;
533 	struct nqm *lphnext;
534 	int len, ok;
535 
536 tryagain:
537 	if (time.tv_sec > lp->lc_expiry)
538 		return;
539 	lph = &lp->lc_host;
540 	lphnext = lp->lc_morehosts;
541 	len = 1;
542 	i = 0;
543 	ok = 1;
544 	while (ok && (lph->lph_flag & LC_VALID)) {
545 		if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
546 			lp->lc_flag |= LC_EXPIREDWANTED;
547 			(void) tsleep((caddr_t)&lp->lc_flag, PSOCK,
548 					"nqexp", 0);
549 			goto tryagain;
550 		}
551 		if (++i == len) {
552 			if (lphnext) {
553 				i = 0;
554 				len = LC_MOREHOSTSIZ;
555 				lph = lphnext->lpm_hosts;
556 				lphnext = lphnext->lpm_next;
557 			} else
558 				ok = 0;
559 		} else
560 			lph++;
561 	}
562 }
563 
564 /*
565  * Nqnfs server timer that maintains the server lease queue.
566  * Scan the lease queue for expired entries:
567  * - when one is found, wakeup anyone waiting for it
568  *   else dequeue and free
569  */
570 void
571 nqnfs_serverd()
572 {
573 	register struct nqlease *lp, *lq;
574 	register struct nqhost *lph;
575 	struct nqlease *nextlp;
576 	struct nqm *lphnext, *olphnext;
577 	struct mbuf *n;
578 	union nqsrvthead *lhp;
579 	int i, len, ok;
580 
581 	lp = nqthead.th_chain[0];
582 	while (lp != (struct nqlease *)&nqthead) {
583 		if (lp->lc_expiry >= time.tv_sec)
584 			break;
585 		nextlp = lp->lc_chain1[0];
586 		if (lp->lc_flag & LC_EXPIREDWANTED) {
587 			lp->lc_flag &= ~LC_EXPIREDWANTED;
588 			wakeup((caddr_t)&lp->lc_flag);
589 		} else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) {
590 		    /*
591 		     * Make a best effort at keeping a write caching lease long
592 		     * enough by not deleting it until it has been explicitly
593 		     * vacated or there have been no writes in the previous
594 		     * write_slack seconds since expiry and the nfsds are not
595 		     * all busy. The assumption is that if the nfsds are not
596 		     * all busy now (no queue of nfs requests), then the client
597 		     * would have been able to do at least one write to the
598 		     * file during the last write_slack seconds if it was still
599 		     * trying to push writes to the server.
600 		     */
601 		    if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE &&
602 			((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) {
603 			lp->lc_flag &= ~LC_WRITTEN;
604 			nqsrv_instimeq(lp, nqsrv_writeslack);
605 		    } else {
606 			remque(lp);
607 			if (lq = lp->lc_fhnext)
608 				lq->lc_fhprev = lp->lc_fhprev;
609 			*lp->lc_fhprev = lq;
610 			/*
611 			 * This soft reference may no longer be valid, but
612 			 * no harm done. The worst case is if the vnode was
613 			 * recycled and has another valid lease reference,
614 			 * which is dereferenced prematurely.
615 			 */
616 			lp->lc_vp->v_lease = (struct nqlease *)0;
617 			lph = &lp->lc_host;
618 			lphnext = lp->lc_morehosts;
619 			olphnext = (struct nqm *)0;
620 			len = 1;
621 			i = 0;
622 			ok = 1;
623 			while (ok && (lph->lph_flag & LC_VALID)) {
624 				if (lph->lph_flag & LC_CLTP)
625 					MFREE(lph->lph_nam, n);
626 				if (lph->lph_flag & LC_SREF)
627 					nfsrv_slpderef(lph->lph_slp);
628 				if (++i == len) {
629 					if (olphnext) {
630 						free((caddr_t)olphnext, M_NQMHOST);
631 						olphnext = (struct nqm *)0;
632 					}
633 					if (lphnext) {
634 						olphnext = lphnext;
635 						i = 0;
636 						len = LC_MOREHOSTSIZ;
637 						lph = lphnext->lpm_hosts;
638 						lphnext = lphnext->lpm_next;
639 					} else
640 						ok = 0;
641 				} else
642 					lph++;
643 			}
644 			FREE((caddr_t)lp, M_NQLEASE);
645 			if (olphnext)
646 				free((caddr_t)olphnext, M_NQMHOST);
647 			nfsstats.srvnqnfs_leases--;
648 		    }
649 		}
650 		lp = nextlp;
651 	}
652 }
653 
654 /*
655  * Called from nfssvc_nfsd() for a getlease rpc request.
656  * Do the from/to xdr translation and call nqsrv_getlease() to
657  * do the real work.
658  */
659 nqnfsrv_getlease(nfsd, mrep, md, dpos, cred, nam, mrq)
660 	struct nfsd *nfsd;
661 	struct mbuf *mrep, *md;
662 	caddr_t dpos;
663 	struct ucred *cred;
664 	struct mbuf *nam, **mrq;
665 {
666 	register struct nfsv2_fattr *fp;
667 	struct vattr va;
668 	register struct vattr *vap = &va;
669 	struct vnode *vp;
670 	nfsv2fh_t nfh;
671 	fhandle_t *fhp;
672 	register u_long *tl;
673 	register long t1;
674 	u_quad_t frev;
675 	caddr_t bpos;
676 	int error = 0;
677 	char *cp2;
678 	struct mbuf *mb, *mb2, *mreq;
679 	int flags, rdonly, cache;
680 
681 	fhp = &nfh.fh_generic;
682 	nfsm_srvmtofh(fhp);
683 	nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
684 	flags = fxdr_unsigned(int, *tl++);
685 	nfsd->nd_duration = fxdr_unsigned(int, *tl);
686 	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
687 		nfsm_reply(0);
688 	if (rdonly && flags == NQL_WRITE) {
689 		error = EROFS;
690 		nfsm_reply(0);
691 	}
692 	(void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, nfsd,
693 		nam, &cache, &frev, cred);
694 	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
695 	vput(vp);
696 	nfsm_reply(NFSX_NQFATTR + 4*NFSX_UNSIGNED);
697 	nfsm_build(tl, u_long *, 4*NFSX_UNSIGNED);
698 	*tl++ = txdr_unsigned(cache);
699 	*tl++ = txdr_unsigned(nfsd->nd_duration);
700 	txdr_hyper(&frev, tl);
701 	nfsm_build(fp, struct nfsv2_fattr *, NFSX_NQFATTR);
702 	nfsm_srvfillattr;
703 	nfsm_srvdone;
704 }
705 
706 /*
707  * Called from nfssvc_nfsd() when a "vacated" message is received from a
708  * client. Find the entry and expire it.
709  */
710 nqnfsrv_vacated(nfsd, mrep, md, dpos, cred, nam, mrq)
711 	struct nfsd *nfsd;
712 	struct mbuf *mrep, *md;
713 	caddr_t dpos;
714 	struct ucred *cred;
715 	struct mbuf *nam, **mrq;
716 {
717 	register struct nqlease *lp;
718 	register struct nqhost *lph;
719 	struct nqlease *tlp = (struct nqlease *)0;
720 	struct vnode *vp;
721 	nfsv2fh_t nfh;
722 	fhandle_t *fhp;
723 	register u_long *tl;
724 	register long t1;
725 	struct nqm *lphnext;
726 	union nqsrvthead *lhp;
727 	u_quad_t frev;
728 	int error = 0, i, len, ok, rdonly, gotit = 0;
729 	char *cp2;
730 
731 	fhp = &nfh.fh_generic;
732 	nfsm_srvmtofh(fhp);
733 	if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
734 		return (error);
735 	m_freem(mrep);
736 	tlp = vp->v_lease;
737 	if (tlp == (struct nqlease *)0) {
738 		/*
739 		 * Find the lease by searching the hash list.
740 		 */
741 		for (lp = nqfhead[NQFHHASH(fhp->fh_fid.fid_data)]; lp;
742 		     lp = lp->lc_fhnext)
743 			if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] &&
744 			    fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] &&
745 			    !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata,
746 				  MAXFIDSZ)) {
747 				/* Found it */
748 				lp->lc_vp = vp;
749 				vp->v_lease = lp;
750 				tlp = lp;
751 				break;
752 			}
753 	}
754 	vrele(vp);
755 	if (tlp) {
756 		lp = tlp;
757 		len = 1;
758 		i = 0;
759 		lph = &lp->lc_host;
760 		lphnext = lp->lc_morehosts;
761 		ok = 1;
762 		while (ok && (lph->lph_flag & LC_VALID)) {
763 			if (nqsrv_cmpnam(nfsd->nd_slp, nam, lph)) {
764 				lph->lph_flag |= LC_VACATED;
765 				gotit++;
766 				break;
767 			}
768 			if (++i == len) {
769 				if (lphnext) {
770 					len = LC_MOREHOSTSIZ;
771 					i = 0;
772 					lph = lphnext->lpm_hosts;
773 					lphnext = lphnext->lpm_next;
774 				} else
775 					ok = 0;
776 			} else
777 				lph++;
778 		}
779 		if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) {
780 			lp->lc_flag &= ~LC_EXPIREDWANTED;
781 			wakeup((caddr_t)&lp->lc_flag);
782 		}
783 nfsmout:
784 		return (EPERM);
785 	}
786 	return (EPERM);
787 }
788 
789 /*
790  * Client get lease rpc function.
791  */
792 nqnfs_getlease(vp, rwflag, cred, p)
793 	register struct vnode *vp;
794 	int rwflag;
795 	struct ucred *cred;
796 	struct proc *p;
797 {
798 	register u_long *tl;
799 	register caddr_t cp;
800 	register long t1;
801 	register struct nfsnode *np, *tp;
802 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
803 	caddr_t bpos, dpos, cp2;
804 	time_t reqtime;
805 	int error = 0;
806 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
807 	int cachable;
808 	u_quad_t frev;
809 
810 	nfsstats.rpccnt[NQNFSPROC_GETLEASE]++;
811 	mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_FH+2*NFSX_UNSIGNED,
812 		 &bpos);
813 	nfsm_fhtom(vp);
814 	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
815 	*tl++ = txdr_unsigned(rwflag);
816 	*tl = txdr_unsigned(nmp->nm_leaseterm);
817 	reqtime = time.tv_sec;
818 	nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred);
819 	np = VTONFS(vp);
820 	nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
821 	cachable = fxdr_unsigned(int, *tl++);
822 	reqtime += fxdr_unsigned(int, *tl++);
823 	if (reqtime > time.tv_sec) {
824 		fxdr_hyper(tl, &frev);
825 		nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev);
826 		nfsm_loadattr(vp, (struct vattr *)0);
827 	} else
828 		error = NQNFS_EXPIRED;
829 	nfsm_reqdone;
830 	return (error);
831 }
832 
833 /*
834  * Client vacated message function.
835  */
836 nqnfs_vacated(vp, cred)
837 	register struct vnode *vp;
838 	struct ucred *cred;
839 {
840 	register caddr_t cp;
841 	register struct mbuf *m;
842 	register int i;
843 	caddr_t bpos;
844 	u_long xid;
845 	int error = 0;
846 	struct mbuf *mreq, *mb, *mb2, *mheadend;
847 	struct nfsmount *nmp;
848 	struct nfsreq myrep;
849 
850 	nmp = VFSTONFS(vp->v_mount);
851 	nfsstats.rpccnt[NQNFSPROC_VACATED]++;
852 	nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH);
853 	nfsm_fhtom(vp);
854 	m = mreq;
855 	i = 0;
856 	while (m) {
857 		i += m->m_len;
858 		m = m->m_next;
859 	}
860 	m = nfsm_rpchead(cred, TRUE, NQNFSPROC_VACATED,
861 		RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
862 		mreq, i, &mheadend, &xid);
863 	if (nmp->nm_sotype == SOCK_STREAM) {
864 		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
865 		*mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len -
866 			NFSX_UNSIGNED));
867 	}
868 	myrep.r_flags = 0;
869 	myrep.r_nmp = nmp;
870 	if (nmp->nm_soflags & PR_CONNREQUIRED)
871 		(void) nfs_sndlock(&nmp->nm_flag, (struct nfsreq *)0);
872 	(void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep);
873 	if (nmp->nm_soflags & PR_CONNREQUIRED)
874 		nfs_sndunlock(&nmp->nm_flag);
875 	return (error);
876 }
877 
878 /*
879  * Called for client side callbacks
880  */
881 nqnfs_callback(nmp, mrep, md, dpos)
882 	struct nfsmount *nmp;
883 	struct mbuf *mrep, *md;
884 	caddr_t dpos;
885 {
886 	register struct vnode *vp;
887 	register u_long *tl;
888 	register long t1;
889 	nfsv2fh_t nfh;
890 	fhandle_t *fhp;
891 	struct nfsnode *np;
892 	struct nfsd nd;
893 	int error;
894 	char *cp2;
895 
896 	nd.nd_mrep = mrep;
897 	nd.nd_md = md;
898 	nd.nd_dpos = dpos;
899 	if (error = nfs_getreq(&nd, FALSE))
900 		return (error);
901 	md = nd.nd_md;
902 	dpos = nd.nd_dpos;
903 	if (nd.nd_procnum != NQNFSPROC_EVICTED) {
904 		m_freem(mrep);
905 		return (EPERM);
906 	}
907 	fhp = &nfh.fh_generic;
908 	nfsm_srvmtofh(fhp);
909 	m_freem(mrep);
910 	if (error = nfs_nget(nmp->nm_mountp, fhp, &np))
911 		return (error);
912 	vp = NFSTOV(np);
913 	if (np->n_tnext) {
914 		np->n_expiry = 0;
915 		np->n_flag |= NQNFSEVICTED;
916 		if (np->n_tprev != (struct nfsnode *)nmp) {
917 			if (np->n_tnext == (struct nfsnode *)nmp)
918 				nmp->nm_tprev = np->n_tprev;
919 			else
920 				np->n_tnext->n_tprev = np->n_tprev;
921 			np->n_tprev->n_tnext = np->n_tnext;
922 			np->n_tnext = nmp->nm_tnext;
923 			nmp->nm_tnext = np;
924 			np->n_tprev = (struct nfsnode *)nmp;
925 			if (np->n_tnext == (struct nfsnode *)nmp)
926 				nmp->nm_tprev = np;
927 			else
928 				np->n_tnext->n_tprev = np;
929 		}
930 	}
931 	vrele(vp);
932 	nfsm_srvdone;
933 }
934 
935 /*
936  * Nqnfs client helper daemon. Runs once a second to expire leases.
937  * It also get authorization strings for "kerb" mounts.
938  * It must start at the beginning of the list again after any potential
939  * "sleep" since nfs_reclaim() called from vclean() can pull a node off
940  * the list asynchronously.
941  */
942 nqnfs_clientd(nmp, cred, ncd, flag, argp, p)
943 	register struct nfsmount *nmp;
944 	struct ucred *cred;
945 	struct nfsd_cargs *ncd;
946 	int flag;
947 	caddr_t argp;
948 	struct proc *p;
949 {
950 	register struct nfsnode *np;
951 	struct vnode *vp;
952 	int error, vpid;
953 
954 	/*
955 	 * First initialize some variables
956 	 */
957 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
958 	nqnfs_vers = txdr_unsigned(NQNFS_VER1);
959 
960 	/*
961 	 * If an authorization string is being passed in, get it.
962 	 */
963 	if ((flag & NFSSVC_GOTAUTH) &&
964 		(nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT)) == 0) {
965 		if (nmp->nm_flag & NFSMNT_HASAUTH)
966 			panic("cld kerb");
967 		if ((flag & NFSSVC_AUTHINFAIL) == 0) {
968 			if (ncd->ncd_authlen <= RPCAUTH_MAXSIZ &&
969 				copyin(ncd->ncd_authstr, nmp->nm_authstr,
970 				ncd->ncd_authlen) == 0) {
971 				nmp->nm_authtype = ncd->ncd_authtype;
972 				nmp->nm_authlen = ncd->ncd_authlen;
973 			} else
974 				nmp->nm_flag |= NFSMNT_AUTHERR;
975 		} else
976 			nmp->nm_flag |= NFSMNT_AUTHERR;
977 		nmp->nm_flag |= NFSMNT_HASAUTH;
978 		wakeup((caddr_t)&nmp->nm_authlen);
979 	} else
980 		nmp->nm_flag |= NFSMNT_WAITAUTH;
981 
982 	/*
983 	 * Loop every second updating queue until there is a termination sig.
984 	 */
985 	while ((nmp->nm_flag & NFSMNT_DISMNT) == 0) {
986 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
987 		np = nmp->nm_tnext;
988 		while (np != (struct nfsnode *)nmp &&
989 		       (nmp->nm_flag & NFSMNT_DISMINPROG) == 0) {
990 			vp = NFSTOV(np);
991 if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash2");
992 			vpid = vp->v_id;
993 			if (np->n_expiry < time.tv_sec) {
994 			   if (vget(vp) == 0) {
995 			     nmp->nm_inprog = vp;
996 			     if (vpid == vp->v_id) {
997 if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash3");
998 				if (np->n_tnext == (struct nfsnode *)nmp)
999 					nmp->nm_tprev = np->n_tprev;
1000 				else
1001 					np->n_tnext->n_tprev = np->n_tprev;
1002 				if (np->n_tprev == (struct nfsnode *)nmp)
1003 					nmp->nm_tnext = np->n_tnext;
1004 				else
1005 					np->n_tprev->n_tnext = np->n_tnext;
1006 				np->n_tnext = (struct nfsnode *)0;
1007 				if ((np->n_flag & (NMODIFIED | NQNFSEVICTED))
1008 				    && vp->v_type == VREG) {
1009 					np->n_flag &= ~NMODIFIED;
1010 					if (np->n_flag & NQNFSEVICTED) {
1011 						(void) vinvalbuf(vp, TRUE,
1012 						    cred, p);
1013 						np->n_flag &= ~NQNFSEVICTED;
1014 						(void) nqnfs_vacated(vp, cred);
1015 					} else
1016 						(void) VOP_FSYNC(vp, cred,
1017 						    MNT_WAIT, p);
1018 				}
1019 			      }
1020 			      vrele(vp);
1021 			      nmp->nm_inprog = NULLVP;
1022 			    }
1023 			    if (np != nmp->nm_tnext)
1024 				np = nmp->nm_tnext;
1025 			    else
1026 				break;
1027 			} else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) {
1028 			    if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE))
1029 				 == NQNFSWRITE && vp->v_dirtyblkhd &&
1030 				 vget(vp) == 0) {
1031 				 nmp->nm_inprog = vp;
1032 if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash4");
1033 				 if (vpid == vp->v_id &&
1034 				     nqnfs_getlease(vp, NQL_WRITE, cred, p)==0)
1035 					np->n_brev = np->n_lrev;
1036 				 vrele(vp);
1037 				 nmp->nm_inprog = NULLVP;
1038 			    }
1039 			    if (np != nmp->nm_tnext)
1040 				np = nmp->nm_tnext;
1041 			    else
1042 				break;
1043 			} else
1044 				break;
1045 		}
1046 	    }
1047 
1048 	    /*
1049 	     * Get an authorization string, if required.
1050 	     */
1051 	    if ((nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT | NFSMNT_HASAUTH)) == 0) {
1052 		ncd->ncd_authuid = nmp->nm_authuid;
1053 		if (copyout((caddr_t)ncd, argp, sizeof (*ncd)))
1054 			nmp->nm_flag |= NFSMNT_WAITAUTH;
1055 		else
1056 			return (ENEEDAUTH);
1057 	    }
1058 
1059 	    /*
1060 	     * Wait a bit (no pun) and do it again.
1061 	     */
1062 	    if ((nmp->nm_flag & NFSMNT_DISMNT) == 0 &&
1063 		(nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_HASAUTH))) {
1064 		    error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
1065 			"nqnfstimr", hz / 3);
1066 		    if (error == EINTR || error == ERESTART)
1067 			(void) dounmount(nmp->nm_mountp, MNT_NOFORCE);
1068 	    }
1069 	}
1070 	free((caddr_t)nmp, M_NFSMNT);
1071 	if (error == EWOULDBLOCK)
1072 		error = 0;
1073 	return (error);
1074 }
1075 
1076 /*
1077  * Adjust all timer queue expiry times when the time of day clock is changed.
1078  * Called from the settimeofday() syscall.
1079  */
1080 void
1081 lease_updatetime(deltat)
1082 	register int deltat;
1083 {
1084 	register struct nqlease *lp;
1085 	register struct nfsnode *np;
1086 	struct mount *mp;
1087 	struct nfsmount *nmp;
1088 	int s;
1089 
1090 	if (nqnfsstarttime != 0)
1091 		nqnfsstarttime += deltat;
1092 	s = splsoftclock();
1093 	lp = nqthead.th_chain[0];
1094 	while (lp != (struct nqlease *)&nqthead) {
1095 		lp->lc_expiry += deltat;
1096 		lp = lp->lc_chain1[0];
1097 	}
1098 	splx(s);
1099 
1100 	/*
1101 	 * Search the mount list for all nqnfs mounts and do their timer
1102 	 * queues.
1103 	 */
1104 	mp = rootfs;
1105 	do {
1106 		if (mp->mnt_stat.f_fsid.val[1] == MOUNT_NFS) {
1107 			nmp = VFSTONFS(mp);
1108 			if (nmp->nm_flag & NFSMNT_NQNFS) {
1109 				np = nmp->nm_tnext;
1110 				while (np != (struct nfsnode *)nmp) {
1111 					np->n_expiry += deltat;
1112 					np = np->n_tnext;
1113 				}
1114 			}
1115 		}
1116 		mp = mp->mnt_next;
1117 	} while (mp != rootfs);
1118 }
1119 
1120 /*
1121  * Lock a server lease.
1122  */
1123 void
1124 nqsrv_locklease(lp)
1125 	struct nqlease *lp;
1126 {
1127 
1128 	while (lp->lc_flag & LC_LOCKED) {
1129 		lp->lc_flag |= LC_WANTED;
1130 		(void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0);
1131 	}
1132 	lp->lc_flag |= LC_LOCKED;
1133 	lp->lc_flag &= ~LC_WANTED;
1134 }
1135 
1136 /*
1137  * Unlock a server lease.
1138  */
1139 void
1140 nqsrv_unlocklease(lp)
1141 	struct nqlease *lp;
1142 {
1143 
1144 	lp->lc_flag &= ~LC_LOCKED;
1145 	if (lp->lc_flag & LC_WANTED)
1146 		wakeup((caddr_t)lp);
1147 }
1148 
1149 /*
1150  * Update a client lease.
1151  */
1152 void
1153 nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev)
1154 	register struct nfsmount *nmp;
1155 	register struct nfsnode *np;
1156 	int rwflag, cachable;
1157 	time_t expiry;
1158 	u_quad_t frev;
1159 {
1160 	register struct nfsnode *tp;
1161 
1162 	if (np->n_tnext) {
1163 		if (np->n_tnext == (struct nfsnode *)nmp)
1164 			nmp->nm_tprev = np->n_tprev;
1165 		else
1166 			np->n_tnext->n_tprev = np->n_tprev;
1167 		if (np->n_tprev == (struct nfsnode *)nmp)
1168 			nmp->nm_tnext = np->n_tnext;
1169 		else
1170 			np->n_tprev->n_tnext = np->n_tnext;
1171 		if (rwflag == NQL_WRITE)
1172 			np->n_flag |= NQNFSWRITE;
1173 	} else if (rwflag == NQL_READ)
1174 		np->n_flag &= ~NQNFSWRITE;
1175 	else
1176 		np->n_flag |= NQNFSWRITE;
1177 	if (cachable)
1178 		np->n_flag &= ~NQNFSNONCACHE;
1179 	else
1180 		np->n_flag |= NQNFSNONCACHE;
1181 	np->n_expiry = expiry;
1182 	np->n_lrev = frev;
1183 	tp = nmp->nm_tprev;
1184 	while (tp != (struct nfsnode *)nmp && tp->n_expiry > np->n_expiry)
1185 		tp = tp->n_tprev;
1186 	if (tp == (struct nfsnode *)nmp) {
1187 		np->n_tnext = nmp->nm_tnext;
1188 		nmp->nm_tnext = np;
1189 	} else {
1190 		np->n_tnext = tp->n_tnext;
1191 		tp->n_tnext = np;
1192 	}
1193 	np->n_tprev = tp;
1194 	if (np->n_tnext == (struct nfsnode *)nmp)
1195 		nmp->nm_tprev = np;
1196 	else
1197 		np->n_tnext->n_tprev = np;
1198 }
1199