xref: /openbsd-src/sys/nfs/nfs_syscalls.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: nfs_syscalls.c,v 1.76 2009/01/28 12:02:00 bluhm Exp $	*/
2 /*	$NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Rick Macklem at The University of Guelph.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/file.h>
42 #include <sys/stat.h>
43 #include <sys/vnode.h>
44 #include <sys/mount.h>
45 #include <sys/proc.h>
46 #include <sys/uio.h>
47 #include <sys/malloc.h>
48 #include <sys/buf.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/domain.h>
53 #include <sys/protosw.h>
54 #include <sys/namei.h>
55 #include <sys/syslog.h>
56 #include <sys/filedesc.h>
57 #include <sys/signalvar.h>
58 #include <sys/kthread.h>
59 #include <sys/queue.h>
60 
61 #include <sys/syscallargs.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/tcp.h>
65 #include <nfs/xdr_subs.h>
66 #include <nfs/rpcv2.h>
67 #include <nfs/nfsproto.h>
68 #include <nfs/nfs.h>
69 #include <nfs/nfsm_subs.h>
70 #include <nfs/nfsrvcache.h>
71 #include <nfs/nfsmount.h>
72 #include <nfs/nfsnode.h>
73 #include <nfs/nfsrtt.h>
74 #include <nfs/nfs_var.h>
75 
76 /* Global defs. */
77 extern int nfs_numasync;
78 extern int nfsrtton;
79 extern struct nfsstats nfsstats;
80 extern int nfsrvw_procrastinate;
81 extern struct timeval nfsrvw_procrastinate_tv;
82 struct nfssvc_sock *nfs_udpsock;
83 int nfsd_waiting = 0;
84 
85 #ifdef NFSSERVER
86 static int nfs_numnfsd = 0;
87 static struct nfsdrt nfsdrt;
88 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *,
89     struct nfssvc_sock *, struct proc *, struct mbuf **) = {
90 	nfsrv_null,
91 	nfsrv_getattr,
92 	nfsrv_setattr,
93 	nfsrv_lookup,
94 	nfsrv3_access,
95 	nfsrv_readlink,
96 	nfsrv_read,
97 	nfsrv_write,
98 	nfsrv_create,
99 	nfsrv_mkdir,
100 	nfsrv_symlink,
101 	nfsrv_mknod,
102 	nfsrv_remove,
103 	nfsrv_rmdir,
104 	nfsrv_rename,
105 	nfsrv_link,
106 	nfsrv_readdir,
107 	nfsrv_readdirplus,
108 	nfsrv_statfs,
109 	nfsrv_fsinfo,
110 	nfsrv_pathconf,
111 	nfsrv_commit,
112 	nfsrv_noop,
113 	nfsrv_noop,
114 	nfsrv_noop,
115 	nfsrv_noop
116 };
117 #endif
118 
119 struct nfssvc_sockhead nfssvc_sockhead;
120 struct nfsdhead nfsd_head;
121 
122 int nfssvc_sockhead_flag;
123 int nfsd_head_flag;
124 
125 #ifdef NFSCLIENT
126 struct proc *nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
127 int nfs_niothreads = -1;
128 #endif
129 
130 #ifdef NFSSERVER
131 static void nfsd_rt(int, struct nfsrv_descript *, int);
132 #endif
133 
134 /*
135  * NFS server pseudo system call for the nfsd's
136  * Based on the flag value it either:
137  * - adds a socket to the selection list
138  * - remains in the kernel as an nfsd
139  */
140 int
141 sys_nfssvc(struct proc *p, void *v, register_t *retval)
142 {
143 	int error = 0;
144 #ifdef NFSSERVER
145 	struct sys_nfssvc_args /* {
146 		syscallarg(int) flag;
147 		syscallarg(caddr_t) argp;
148 	} */ *uap = v;
149 	int flags = SCARG(uap, flag);
150 	struct file *fp;
151 	struct mbuf *nam;
152 	struct nfsd_args nfsdarg;
153 	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
154 #endif
155 
156 	/* Must be super user */
157 	error = suser(p, 0);
158 	if (error)
159 		return (error);
160 
161 #ifndef NFSSERVER
162 	error = ENOSYS;
163 #else
164 
165 	while (nfssvc_sockhead_flag & SLP_INIT) {
166 		nfssvc_sockhead_flag |= SLP_WANTINIT;
167 		tsleep(&nfssvc_sockhead, PSOCK, "nfsd init", 0);
168 	}
169 
170 	switch (flags) {
171 	case NFSSVC_ADDSOCK:
172 		error = copyin(SCARG(uap, argp), &nfsdarg, sizeof(nfsdarg));
173 		if (error)
174 			return (error);
175 
176 		error = getsock(p->p_fd, nfsdarg.sock, &fp);
177 		if (error)
178 			return (error);
179 
180 		/*
181 		 * Get the client address for connected sockets.
182 		 */
183 		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
184 			nam = NULL;
185 		else {
186 			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
187 				MT_SONAME);
188 			if (error) {
189 				FRELE(fp);
190 				return (error);
191 			}
192 		}
193 		error = nfssvc_addsock(fp, nam);
194 		FRELE(fp);
195 		break;
196 	case NFSSVC_NFSD:
197 		error = copyin(SCARG(uap, argp), nsd, sizeof(*nsd));
198 		if (error)
199 			return (error);
200 
201 		error = nfssvc_nfsd(nsd, SCARG(uap, argp), p);
202 		break;
203 	default:
204 		error = EINVAL;
205 		break;
206 	}
207 
208 	if (error == EINTR || error == ERESTART)
209 		error = 0;
210 #endif	/* !NFSSERVER */
211 
212 	return (error);
213 }
214 
215 #ifdef NFSSERVER
216 /*
217  * Adds a socket to the list for servicing by nfsds.
218  */
219 int
220 nfssvc_addsock(fp, mynam)
221 	struct file *fp;
222 	struct mbuf *mynam;
223 {
224 	struct mbuf *m;
225 	int siz;
226 	struct nfssvc_sock *slp;
227 	struct socket *so;
228 	struct nfssvc_sock *tslp;
229 	int error, s;
230 
231 	so = (struct socket *)fp->f_data;
232 	tslp = (struct nfssvc_sock *)0;
233 	/*
234 	 * Add it to the list, as required.
235 	 */
236 	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
237 		tslp = nfs_udpsock;
238 		if (tslp->ns_flag & SLP_VALID) {
239 			m_freem(mynam);
240 			return (EPERM);
241 		}
242 	}
243 	if (so->so_type == SOCK_STREAM)
244 		siz = NFS_MAXPACKET + sizeof (u_long);
245 	else
246 		siz = NFS_MAXPACKET;
247 	error = soreserve(so, siz, siz);
248 	if (error) {
249 		m_freem(mynam);
250 		return (error);
251 	}
252 
253 	/*
254 	 * Set protocol specific options { for now TCP only } and
255 	 * reserve some space. For datagram sockets, this can get called
256 	 * repeatedly for the same socket, but that isn't harmful.
257 	 */
258 	if (so->so_type == SOCK_STREAM) {
259 		MGET(m, M_WAIT, MT_SOOPTS);
260 		*mtod(m, int32_t *) = 1;
261 		m->m_len = sizeof(int32_t);
262 		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
263 	}
264 	if (so->so_proto->pr_domain->dom_family == AF_INET &&
265 	    so->so_proto->pr_protocol == IPPROTO_TCP) {
266 		MGET(m, M_WAIT, MT_SOOPTS);
267 		*mtod(m, int32_t *) = 1;
268 		m->m_len = sizeof(int32_t);
269 		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
270 	}
271 	so->so_rcv.sb_flags &= ~SB_NOINTR;
272 	so->so_rcv.sb_timeo = 0;
273 	so->so_snd.sb_flags &= ~SB_NOINTR;
274 	so->so_snd.sb_timeo = 0;
275 	if (tslp)
276 		slp = tslp;
277 	else {
278 		slp = malloc(sizeof(struct nfssvc_sock), M_NFSSVC,
279 		    M_WAITOK|M_ZERO);
280 		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
281 	}
282 	slp->ns_so = so;
283 	slp->ns_nam = mynam;
284 	fp->f_count++;
285 	slp->ns_fp = fp;
286 	s = splsoftnet();
287 	so->so_upcallarg = (caddr_t)slp;
288 	so->so_upcall = nfsrv_rcv;
289 	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
290 	nfsrv_wakenfsd(slp);
291 	splx(s);
292 	return (0);
293 }
294 
295 /*
296  * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
297  * until it is killed by a signal.
298  */
299 int
300 nfssvc_nfsd(nsd, argp, p)
301 	struct nfsd_srvargs *nsd;
302 	caddr_t argp;
303 	struct proc *p;
304 {
305 	struct mbuf *m;
306 	int siz;
307 	struct nfssvc_sock *slp;
308 	struct socket *so;
309 	int *solockp;
310 	struct nfsd *nfsd = nsd->nsd_nfsd;
311 	struct nfsrv_descript *nd = NULL;
312 	struct mbuf *mreq;
313 	int error = 0, cacherep, s, sotype, writes_todo;
314 	struct timeval tv;
315 
316 	cacherep = RC_DOIT;
317 	writes_todo = 0;
318 
319 	s = splsoftnet();
320 	if (nfsd == NULL) {
321 		nsd->nsd_nfsd = nfsd = malloc(sizeof(struct nfsd), M_NFSD,
322 		    M_WAITOK|M_ZERO);
323 		nfsd->nfsd_procp = p;
324 		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
325 		nfs_numnfsd++;
326 	}
327 	/*
328 	 * Loop getting rpc requests until SIGKILL.
329 	 */
330 	for (;;) {
331 		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
332 			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
333 			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
334 				nfsd->nfsd_flag |= NFSD_WAITING;
335 				nfsd_waiting++;
336 				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
337 				    "nfsd", 0);
338 				nfsd_waiting--;
339 				if (error)
340 					goto done;
341 			}
342 			if (nfsd->nfsd_slp == NULL &&
343 			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
344 				TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
345 				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
346 					== (SLP_VALID | SLP_DOREC)) {
347 					    slp->ns_flag &= ~SLP_DOREC;
348 					    slp->ns_sref++;
349 					    nfsd->nfsd_slp = slp;
350 					    break;
351 				    }
352 				}
353 				if (slp == 0)
354 					nfsd_head_flag &= ~NFSD_CHECKSLP;
355 			}
356 			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
357 				continue;
358 			if (slp->ns_flag & SLP_VALID) {
359 				if (slp->ns_flag & SLP_DISCONN)
360 					nfsrv_zapsock(slp);
361 				else if (slp->ns_flag & SLP_NEEDQ) {
362 					slp->ns_flag &= ~SLP_NEEDQ;
363 					(void) nfs_sndlock(&slp->ns_solock,
364 						(struct nfsreq *)0);
365 					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
366 						M_WAIT);
367 					nfs_sndunlock(&slp->ns_solock);
368 				}
369 				error = nfsrv_dorec(slp, nfsd, &nd);
370 				getmicrotime(&tv);
371 				if (error && LIST_FIRST(&slp->ns_tq) &&
372 				    timercmp(&LIST_FIRST(&slp->ns_tq)->nd_time,
373 				    &tv, <=)) {
374 					error = 0;
375 					cacherep = RC_DOIT;
376 					writes_todo = 1;
377 				} else
378 					writes_todo = 0;
379 				nfsd->nfsd_flag |= NFSD_REQINPROG;
380 			}
381 		} else {
382 			error = 0;
383 			slp = nfsd->nfsd_slp;
384 		}
385 		if (error || (slp->ns_flag & SLP_VALID) == 0) {
386 			if (nd) {
387 				free((caddr_t)nd, M_NFSRVDESC);
388 				nd = NULL;
389 			}
390 			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
391 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
392 			nfsrv_slpderef(slp);
393 			continue;
394 		}
395 		splx(s);
396 		so = slp->ns_so;
397 		sotype = so->so_type;
398 		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
399 			solockp = &slp->ns_solock;
400 		else
401 			solockp = (int *)0;
402 		if (nd) {
403 		    getmicrotime(&nd->nd_starttime);
404 		    if (nd->nd_nam2)
405 			nd->nd_nam = nd->nd_nam2;
406 		    else
407 			nd->nd_nam = slp->ns_nam;
408 
409 		    cacherep = nfsrv_getcache(nd, slp, &mreq);
410 		}
411 
412 		/*
413 		 * Loop to get all the write rpc relies that have been
414 		 * gathered together.
415 		 */
416 		do {
417 		    switch (cacherep) {
418 		    case RC_DOIT:
419 			if (writes_todo || (!(nd->nd_flag & ND_NFSV3) &&
420 			    nd->nd_procnum == NFSPROC_WRITE &&
421 			    nfsrvw_procrastinate > 0))
422 				error = nfsrv_writegather(&nd, slp,
423 				    nfsd->nfsd_procp, &mreq);
424 			else
425 				error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
426 				    slp, nfsd->nfsd_procp, &mreq);
427 			if (mreq == NULL) {
428 				if (nd != NULL) {
429 					m_freem(nd->nd_nam2);
430 					m_freem(nd->nd_mrep);
431 				}
432 				break;
433 			}
434 			if (error) {
435 				nfsstats.srv_errs++;
436 				nfsrv_updatecache(nd, 0, mreq);
437 				if (nd->nd_nam2)
438 					m_freem(nd->nd_nam2);
439 				break;
440 			}
441 			nfsstats.srvrpccnt[nd->nd_procnum]++;
442 			nfsrv_updatecache(nd, 1, mreq);
443 			nd->nd_mrep = (struct mbuf *)0;
444 
445 			/* FALLTHROUGH */
446 		    case RC_REPLY:
447 			m = mreq;
448 			siz = 0;
449 			while (m) {
450 				siz += m->m_len;
451 				m = m->m_next;
452 			}
453 			if (siz <= 0 || siz > NFS_MAXPACKET) {
454 				printf("mbuf siz=%d\n",siz);
455 				panic("Bad nfs svc reply");
456 			}
457 			m = mreq;
458 			m->m_pkthdr.len = siz;
459 			m->m_pkthdr.rcvif = (struct ifnet *)0;
460 			/*
461 			 * For stream protocols, prepend a Sun RPC
462 			 * Record Mark.
463 			 */
464 			if (sotype == SOCK_STREAM) {
465 				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
466 				*mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
467 			}
468 			if (solockp)
469 				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
470 			if (slp->ns_flag & SLP_VALID)
471 			    error = nfs_send(so, nd->nd_nam2, m, NULL);
472 			else {
473 			    error = EPIPE;
474 			    m_freem(m);
475 			}
476 			if (nfsrtton)
477 				nfsd_rt(sotype, nd, cacherep);
478 			if (nd->nd_nam2)
479 				m_freem(nd->nd_nam2);
480 			if (nd->nd_mrep)
481 				m_freem(nd->nd_mrep);
482 			if (error == EPIPE)
483 				nfsrv_zapsock(slp);
484 			if (solockp)
485 				nfs_sndunlock(solockp);
486 			if (error == EINTR || error == ERESTART) {
487 				free((caddr_t)nd, M_NFSRVDESC);
488 				nfsrv_slpderef(slp);
489 				s = splsoftnet();
490 				goto done;
491 			}
492 			break;
493 		    case RC_DROPIT:
494 			if (nfsrtton)
495 				nfsd_rt(sotype, nd, cacherep);
496 			m_freem(nd->nd_mrep);
497 			m_freem(nd->nd_nam2);
498 			break;
499 		    };
500 		    if (nd) {
501 		    	free(nd, M_NFSRVDESC);
502 			nd = NULL;
503 		    }
504 
505 		    /*
506 		     * Check to see if there are outstanding writes that
507 		     * need to be serviced.
508 		     */
509 		    getmicrotime(&tv);
510 		    s = splsoftclock();
511 		    if (LIST_FIRST(&slp->ns_tq) &&
512 			timercmp(&LIST_FIRST(&slp->ns_tq)->nd_time, &tv, <=)) {
513 			cacherep = RC_DOIT;
514 			writes_todo = 1;
515 		    } else
516 			writes_todo = 0;
517 		    splx(s);
518 		} while (writes_todo);
519 		s = splsoftnet();
520 		if (nfsrv_dorec(slp, nfsd, &nd)) {
521 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
522 			nfsd->nfsd_slp = NULL;
523 			nfsrv_slpderef(slp);
524 		}
525 	}
526 done:
527 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
528 	splx(s);
529 	free((caddr_t)nfsd, M_NFSD);
530 	nsd->nsd_nfsd = (struct nfsd *)0;
531 	if (--nfs_numnfsd == 0)
532 		nfsrv_init(1);	/* Reinitialize everything */
533 	return (error);
534 }
535 
536 /*
537  * Shut down a socket associated with an nfssvc_sock structure.
538  * Should be called with the send lock set, if required.
539  * The trick here is to increment the sref at the start, so that the nfsds
540  * will stop using it and clear ns_flag at the end so that it will not be
541  * reassigned during cleanup.
542  */
543 void
544 nfsrv_zapsock(slp)
545 	struct nfssvc_sock *slp;
546 {
547 	struct nfsrv_descript *nwp, *nnwp;
548 	struct socket *so;
549 	struct file *fp;
550 	struct mbuf *m, *n;
551 	int s;
552 
553 	slp->ns_flag &= ~SLP_ALLFLAGS;
554 	fp = slp->ns_fp;
555 	if (fp) {
556 		FREF(fp);
557 		slp->ns_fp = NULL;
558 		so = slp->ns_so;
559 		so->so_upcall = NULL;
560 		soshutdown(so, SHUT_RDWR);
561 		closef(fp, NULL);
562 		if (slp->ns_nam)
563 			MFREE(slp->ns_nam, m);
564 		m_freem(slp->ns_raw);
565 		m = slp->ns_rec;
566 		while (m) {
567 			n = m->m_nextpkt;
568 			m_freem(m);
569 			m = n;
570 		}
571 		s = splsoftclock();
572 		for (nwp = LIST_FIRST(&slp->ns_tq); nwp != NULL; nwp = nnwp) {
573 			nnwp = LIST_NEXT(nwp, nd_tq);
574 			LIST_REMOVE(nwp, nd_tq);
575 			free((caddr_t)nwp, M_NFSRVDESC);
576 		}
577 		LIST_INIT(&slp->ns_tq);
578 		splx(s);
579 	}
580 }
581 
582 /*
583  * Derefence a server socket structure. If it has no more references and
584  * is no longer valid, you can throw it away.
585  */
586 void
587 nfsrv_slpderef(slp)
588 	struct nfssvc_sock *slp;
589 {
590 	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
591 		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
592 		free((caddr_t)slp, M_NFSSVC);
593 	}
594 }
595 
596 /*
597  * Initialize the data structures for the server.
598  * Handshake with any new nfsds starting up to avoid any chance of
599  * corruption.
600  */
601 void
602 nfsrv_init(terminating)
603 	int terminating;
604 {
605 	struct nfssvc_sock *slp, *nslp;
606 
607 	if (nfssvc_sockhead_flag & SLP_INIT)
608 		panic("nfsd init");
609 	nfssvc_sockhead_flag |= SLP_INIT;
610 	if (terminating) {
611 		for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != NULL;
612 		    slp = nslp) {
613 			nslp = TAILQ_NEXT(slp, ns_chain);
614 			if (slp->ns_flag & SLP_VALID)
615 				nfsrv_zapsock(slp);
616 			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
617 			free((caddr_t)slp, M_NFSSVC);
618 		}
619 		nfsrv_cleancache();	/* And clear out server cache */
620 	}
621 
622 	TAILQ_INIT(&nfssvc_sockhead);
623 	nfssvc_sockhead_flag &= ~SLP_INIT;
624 	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
625 		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
626 		wakeup((caddr_t)&nfssvc_sockhead);
627 	}
628 
629 	TAILQ_INIT(&nfsd_head);
630 	nfsd_head_flag &= ~NFSD_CHECKSLP;
631 
632 	nfs_udpsock =  malloc(sizeof(struct nfssvc_sock), M_NFSSVC,
633 	    M_WAITOK|M_ZERO);
634 	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
635 }
636 
637 /*
638  * Add entries to the server monitor log.
639  */
640 static void
641 nfsd_rt(sotype, nd, cacherep)
642 	int sotype;
643 	struct nfsrv_descript *nd;
644 	int cacherep;
645 {
646 	struct drt *rt;
647 
648 	rt = &nfsdrt.drt[nfsdrt.pos];
649 	if (cacherep == RC_DOIT)
650 		rt->flag = 0;
651 	else if (cacherep == RC_REPLY)
652 		rt->flag = DRT_CACHEREPLY;
653 	else
654 		rt->flag = DRT_CACHEDROP;
655 	if (sotype == SOCK_STREAM)
656 		rt->flag |= DRT_TCP;
657 	else if (nd->nd_flag & ND_NFSV3)
658 		rt->flag |= DRT_NFSV3;
659 	rt->proc = nd->nd_procnum;
660 	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
661 		rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
662 	else
663 		rt->ipadr = INADDR_ANY;
664 	getmicrotime(&rt->tstamp);
665 	rt->resptime =
666 	    ((rt->tstamp.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
667 		(rt->tstamp.tv_usec - nd->nd_starttime.tv_usec);
668 	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
669 }
670 #endif /* NFSSERVER */
671 
672 #ifdef NFSCLIENT
673 /*
674  * Asynchronous I/O threads for client nfs.
675  * They do read-ahead and write-behind operations on the block I/O cache.
676  * Never returns unless it fails or gets killed.
677  */
678 void
679 nfssvc_iod(void *arg)
680 {
681 	struct proc *p = (struct proc *)arg;
682 	struct buf *bp, *nbp;
683 	int i, myiod;
684 	struct vnode *vp;
685 	int error = 0, s, bufcount;
686 
687 	bufcount = 256;	/* XXX: Big enough? sysctl, constant ? */
688 
689 	/* Assign my position or return error if too many already running. */
690 	myiod = -1;
691 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
692 		if (nfs_asyncdaemon[i] == NULL) {
693 			myiod = i;
694 			break;
695 		}
696 	}
697 	if (myiod == -1)
698 		kthread_exit(EBUSY);
699 
700 	nfs_asyncdaemon[myiod] = p;
701 	nfs_numasync++;
702 
703 	/* Upper limit on how many bufs we'll queue up for this iod. */
704 	if (nfs_bufqmax > bcstats.numbufs / 4) {
705 		nfs_bufqmax = bcstats.numbufs / 4; /* limit to 1/4 of bufs */
706 		bufcount = 0;
707 	}
708 
709 	nfs_bufqmax += bufcount;
710 
711 	/* Just loop around doin our stuff until SIGKILL. */
712 	for (;;) {
713 	    while (TAILQ_FIRST(&nfs_bufq) == NULL && error == 0) {
714 		    error = tsleep(&nfs_bufq,
715 			PWAIT | PCATCH, "nfsidl", 0);
716 	    }
717 	    while ((bp = TAILQ_FIRST(&nfs_bufq)) != NULL) {
718 		/* Take one off the front of the list */
719 		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
720 		nfs_bufqlen--;
721 		if (bp->b_flags & B_READ)
722 		    (void) nfs_doio(bp, NULL);
723 		else do {
724 		    /*
725 		     * Look for a delayed write for the same vnode, so I can do
726 		     * it now. We must grab it before calling nfs_doio() to
727 		     * avoid any risk of the vnode getting vclean()'d while
728 		     * we are doing the write rpc.
729 		     */
730 		    vp = bp->b_vp;
731 		    s = splbio();
732 		    LIST_FOREACH(nbp, &vp->v_dirtyblkhd, b_vnbufs) {
733 			if ((nbp->b_flags &
734 			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
735 			    continue;
736 			bremfree(nbp);
737 			nbp->b_flags |= B_ASYNC;
738 			buf_acquire(nbp);
739 			break;
740 		    }
741 		    /*
742 		     * For the delayed write, do the first part of nfs_bwrite()
743 		     * up to, but not including nfs_strategy().
744 		     */
745 		    if (nbp) {
746 			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
747 			buf_undirty(nbp);
748 			nbp->b_vp->v_numoutput++;
749 		    }
750 		    splx(s);
751 
752 		    (void) nfs_doio(bp, NULL);
753 		} while ((bp = nbp) != NULL);
754 	    }
755 	    if (error) {
756 		nfs_asyncdaemon[myiod] = NULL;
757 		nfs_numasync--;
758 		nfs_bufqmax -= bufcount;
759 		kthread_exit(error);
760 	    }
761 	}
762 }
763 
764 void
765 nfs_getset_niothreads(set)
766 	int set;
767 {
768 	struct proc *p;
769 	int i, have, start;
770 
771 	for (have = 0, i = 0; i < NFS_MAXASYNCDAEMON; i++)
772 		if (nfs_asyncdaemon[i] != NULL)
773 			have++;
774 
775 	if (set) {
776 		/* clamp to sane range */
777 		nfs_niothreads = max(0, min(nfs_niothreads, NFS_MAXASYNCDAEMON));
778 
779 		start = nfs_niothreads - have;
780 
781 		while (start > 0) {
782 			kthread_create(nfssvc_iod, p, &p, "nfsio");
783 			start--;
784 		}
785 
786 		for (i = 0; (start < 0) && (i < NFS_MAXASYNCDAEMON); i++)
787 			if (nfs_asyncdaemon[i] != NULL) {
788 				psignal(nfs_asyncdaemon[i], SIGKILL);
789 				start++;
790 			}
791 	} else {
792 		if (nfs_niothreads >= 0)
793 			nfs_niothreads = have;
794 	}
795 }
796 #endif /* NFSCLIENT */
797