19ec7b004SRick Macklem /*-
251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni *
49ec7b004SRick Macklem * Copyright (c) 1989, 1993
59ec7b004SRick Macklem * The Regents of the University of California. All rights reserved.
69ec7b004SRick Macklem *
79ec7b004SRick Macklem * This code is derived from software contributed to Berkeley by
89ec7b004SRick Macklem * Rick Macklem at The University of Guelph.
99ec7b004SRick Macklem *
109ec7b004SRick Macklem * Redistribution and use in source and binary forms, with or without
119ec7b004SRick Macklem * modification, are permitted provided that the following conditions
129ec7b004SRick Macklem * are met:
139ec7b004SRick Macklem * 1. Redistributions of source code must retain the above copyright
149ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer.
159ec7b004SRick Macklem * 2. Redistributions in binary form must reproduce the above copyright
169ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer in the
179ec7b004SRick Macklem * documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
199ec7b004SRick Macklem * may be used to endorse or promote products derived from this software
209ec7b004SRick Macklem * without specific prior written permission.
219ec7b004SRick Macklem *
229ec7b004SRick Macklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239ec7b004SRick Macklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249ec7b004SRick Macklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259ec7b004SRick Macklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269ec7b004SRick Macklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279ec7b004SRick Macklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289ec7b004SRick Macklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299ec7b004SRick Macklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309ec7b004SRick Macklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319ec7b004SRick Macklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329ec7b004SRick Macklem * SUCH DAMAGE.
339ec7b004SRick Macklem *
349ec7b004SRick Macklem * from nfs_syscalls.c 8.5 (Berkeley) 3/30/95
359ec7b004SRick Macklem */
369ec7b004SRick Macklem
379ec7b004SRick Macklem #include <sys/param.h>
389ec7b004SRick Macklem #include <sys/systm.h>
399ec7b004SRick Macklem #include <sys/sysproto.h>
409ec7b004SRick Macklem #include <sys/kernel.h>
419ec7b004SRick Macklem #include <sys/sysctl.h>
429ec7b004SRick Macklem #include <sys/file.h>
439ec7b004SRick Macklem #include <sys/vnode.h>
449ec7b004SRick Macklem #include <sys/malloc.h>
459ec7b004SRick Macklem #include <sys/mount.h>
469ec7b004SRick Macklem #include <sys/proc.h>
479ec7b004SRick Macklem #include <sys/bio.h>
489ec7b004SRick Macklem #include <sys/buf.h>
499ec7b004SRick Macklem #include <sys/mbuf.h>
509ec7b004SRick Macklem #include <sys/socket.h>
519ec7b004SRick Macklem #include <sys/socketvar.h>
529ec7b004SRick Macklem #include <sys/domain.h>
539ec7b004SRick Macklem #include <sys/protosw.h>
549ec7b004SRick Macklem #include <sys/namei.h>
559ec7b004SRick Macklem #include <sys/unistd.h>
569ec7b004SRick Macklem #include <sys/kthread.h>
579ec7b004SRick Macklem #include <sys/fcntl.h>
589ec7b004SRick Macklem #include <sys/lockf.h>
599ec7b004SRick Macklem #include <sys/mutex.h>
607b8c319bSRick Macklem #include <sys/taskqueue.h>
619ec7b004SRick Macklem
629ec7b004SRick Macklem #include <netinet/in.h>
639ec7b004SRick Macklem #include <netinet/tcp.h>
649ec7b004SRick Macklem
659ec7b004SRick Macklem #include <fs/nfs/nfsport.h>
669ec7b004SRick Macklem #include <fs/nfsclient/nfsmount.h>
679ec7b004SRick Macklem #include <fs/nfsclient/nfs.h>
689ec7b004SRick Macklem #include <fs/nfsclient/nfsnode.h>
699ec7b004SRick Macklem
709ec7b004SRick Macklem extern struct mtx ncl_iod_mutex;
717b8c319bSRick Macklem extern struct task ncl_nfsiodnew_task;
729ec7b004SRick Macklem
739ec7b004SRick Macklem int ncl_numasync;
747b8c319bSRick Macklem enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
757b8c319bSRick Macklem struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
769ec7b004SRick Macklem
779ec7b004SRick Macklem static void nfssvc_iod(void *);
789ec7b004SRick Macklem
797b8c319bSRick Macklem static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
809ec7b004SRick Macklem
811f376590SRick Macklem SYSCTL_DECL(_vfs_nfs);
829ec7b004SRick Macklem
839ec7b004SRick Macklem /* Maximum number of seconds a nfsiod kthread will sleep before exiting */
847b8c319bSRick Macklem static unsigned int nfs_iodmaxidle = 120;
851f376590SRick Macklem SYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0,
867b8c319bSRick Macklem "Max number of seconds an nfsiod kthread will sleep before exiting");
879ec7b004SRick Macklem
889ec7b004SRick Macklem /* Maximum number of nfsiod kthreads */
897b8c319bSRick Macklem unsigned int ncl_iodmax = 20;
909ec7b004SRick Macklem
919ec7b004SRick Macklem /* Minimum number of nfsiod kthreads to keep as spares */
929ec7b004SRick Macklem static unsigned int nfs_iodmin = 0;
939ec7b004SRick Macklem
947b8c319bSRick Macklem static int nfs_nfsiodnew_sync(void);
957b8c319bSRick Macklem
969ec7b004SRick Macklem static int
sysctl_iodmin(SYSCTL_HANDLER_ARGS)979ec7b004SRick Macklem sysctl_iodmin(SYSCTL_HANDLER_ARGS)
989ec7b004SRick Macklem {
999ec7b004SRick Macklem int error, i;
1009ec7b004SRick Macklem int newmin;
1019ec7b004SRick Macklem
1029ec7b004SRick Macklem newmin = nfs_iodmin;
1039ec7b004SRick Macklem error = sysctl_handle_int(oidp, &newmin, 0, req);
1049ec7b004SRick Macklem if (error || (req->newptr == NULL))
1059ec7b004SRick Macklem return (error);
106b662b41eSRick Macklem NFSLOCKIOD();
1079ec7b004SRick Macklem if (newmin > ncl_iodmax) {
1089ec7b004SRick Macklem error = EINVAL;
1099ec7b004SRick Macklem goto out;
1109ec7b004SRick Macklem }
1119ec7b004SRick Macklem nfs_iodmin = newmin;
1129ec7b004SRick Macklem if (ncl_numasync >= nfs_iodmin)
1139ec7b004SRick Macklem goto out;
1149ec7b004SRick Macklem /*
1159ec7b004SRick Macklem * If the current number of nfsiod is lower
1169ec7b004SRick Macklem * than the new minimum, create some more.
1179ec7b004SRick Macklem */
1189ec7b004SRick Macklem for (i = nfs_iodmin - ncl_numasync; i > 0; i--)
1197b8c319bSRick Macklem nfs_nfsiodnew_sync();
1209ec7b004SRick Macklem out:
121b662b41eSRick Macklem NFSUNLOCKIOD();
1229ec7b004SRick Macklem return (0);
1239ec7b004SRick Macklem }
1247029da5cSPawel Biernacki SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin,
12595c01e9bSZhenlei Huang CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
12695c01e9bSZhenlei Huang 0, sizeof (nfs_iodmin), sysctl_iodmin, "IU",
1277b8c319bSRick Macklem "Min number of nfsiod kthreads to keep as spares");
1289ec7b004SRick Macklem
1299ec7b004SRick Macklem static int
sysctl_iodmax(SYSCTL_HANDLER_ARGS)1309ec7b004SRick Macklem sysctl_iodmax(SYSCTL_HANDLER_ARGS)
1319ec7b004SRick Macklem {
1329ec7b004SRick Macklem int error, i;
1339ec7b004SRick Macklem int iod, newmax;
1349ec7b004SRick Macklem
1359ec7b004SRick Macklem newmax = ncl_iodmax;
1369ec7b004SRick Macklem error = sysctl_handle_int(oidp, &newmax, 0, req);
1379ec7b004SRick Macklem if (error || (req->newptr == NULL))
1389ec7b004SRick Macklem return (error);
1397b8c319bSRick Macklem if (newmax > NFS_MAXASYNCDAEMON)
1409ec7b004SRick Macklem return (EINVAL);
141b662b41eSRick Macklem NFSLOCKIOD();
1429ec7b004SRick Macklem ncl_iodmax = newmax;
1439ec7b004SRick Macklem if (ncl_numasync <= ncl_iodmax)
1449ec7b004SRick Macklem goto out;
1459ec7b004SRick Macklem /*
1469ec7b004SRick Macklem * If there are some asleep nfsiods that should
1479ec7b004SRick Macklem * exit, wakeup() them so that they check ncl_iodmax
1489ec7b004SRick Macklem * and exit. Those who are active will exit as
1499ec7b004SRick Macklem * soon as they finish I/O.
1509ec7b004SRick Macklem */
1519ec7b004SRick Macklem iod = ncl_numasync - 1;
1529ec7b004SRick Macklem for (i = 0; i < ncl_numasync - ncl_iodmax; i++) {
15380169e41SRick Macklem if (ncl_iodwant[iod] == NFSIOD_AVAILABLE)
1549ec7b004SRick Macklem wakeup(&ncl_iodwant[iod]);
1559ec7b004SRick Macklem iod--;
1569ec7b004SRick Macklem }
1579ec7b004SRick Macklem out:
158b662b41eSRick Macklem NFSUNLOCKIOD();
1599ec7b004SRick Macklem return (0);
1609ec7b004SRick Macklem }
1617029da5cSPawel Biernacki SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax,
1627029da5cSPawel Biernacki CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof (ncl_iodmax),
1637029da5cSPawel Biernacki sysctl_iodmax, "IU",
1647b8c319bSRick Macklem "Max number of nfsiod kthreads");
1659ec7b004SRick Macklem
1667b8c319bSRick Macklem static int
nfs_nfsiodnew_sync(void)1677b8c319bSRick Macklem nfs_nfsiodnew_sync(void)
1689ec7b004SRick Macklem {
1699ec7b004SRick Macklem int error, i;
1709ec7b004SRick Macklem
171ee7201a7SRick Macklem NFSASSERTIOD();
1727b8c319bSRick Macklem for (i = 0; i < ncl_iodmax; i++) {
1739ec7b004SRick Macklem if (nfs_asyncdaemon[i] == 0) {
1747b8c319bSRick Macklem nfs_asyncdaemon[i] = 1;
1759ec7b004SRick Macklem break;
1769ec7b004SRick Macklem }
17780169e41SRick Macklem }
1787b8c319bSRick Macklem if (i == ncl_iodmax)
1797b8c319bSRick Macklem return (0);
180b662b41eSRick Macklem NFSUNLOCKIOD();
1817b8c319bSRick Macklem error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL,
1827b8c319bSRick Macklem RFHIGHPID, 0, "newnfs %d", i);
183b662b41eSRick Macklem NFSLOCKIOD();
1847b8c319bSRick Macklem if (error == 0) {
1859ec7b004SRick Macklem ncl_numasync++;
1867b8c319bSRick Macklem ncl_iodwant[i] = NFSIOD_AVAILABLE;
1877b8c319bSRick Macklem } else
1887b8c319bSRick Macklem nfs_asyncdaemon[i] = 0;
1897b8c319bSRick Macklem return (error);
1907b8c319bSRick Macklem }
1917b8c319bSRick Macklem
1927b8c319bSRick Macklem void
ncl_nfsiodnew_tq(__unused void * arg,int pending)1937b8c319bSRick Macklem ncl_nfsiodnew_tq(__unused void *arg, int pending)
1947b8c319bSRick Macklem {
1957b8c319bSRick Macklem
196b662b41eSRick Macklem NFSLOCKIOD();
1977b8c319bSRick Macklem while (pending > 0) {
1987b8c319bSRick Macklem pending--;
1997b8c319bSRick Macklem nfs_nfsiodnew_sync();
2007b8c319bSRick Macklem }
201b662b41eSRick Macklem NFSUNLOCKIOD();
2027b8c319bSRick Macklem }
2037b8c319bSRick Macklem
2047b8c319bSRick Macklem void
ncl_nfsiodnew(void)2057b8c319bSRick Macklem ncl_nfsiodnew(void)
2067b8c319bSRick Macklem {
2077b8c319bSRick Macklem
208ee7201a7SRick Macklem NFSASSERTIOD();
2097b8c319bSRick Macklem taskqueue_enqueue(taskqueue_thread, &ncl_nfsiodnew_task);
2109ec7b004SRick Macklem }
2119ec7b004SRick Macklem
2129ec7b004SRick Macklem static void
nfsiod_setup(void * dummy)2139ec7b004SRick Macklem nfsiod_setup(void *dummy)
2149ec7b004SRick Macklem {
2159ec7b004SRick Macklem int error;
2169ec7b004SRick Macklem
2171f376590SRick Macklem TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin);
2189ec7b004SRick Macklem nfscl_init();
219b662b41eSRick Macklem NFSLOCKIOD();
2209ec7b004SRick Macklem /* Silently limit the start number of nfsiod's */
2217b8c319bSRick Macklem if (nfs_iodmin > NFS_MAXASYNCDAEMON)
2227b8c319bSRick Macklem nfs_iodmin = NFS_MAXASYNCDAEMON;
2239ec7b004SRick Macklem
2247b8c319bSRick Macklem while (ncl_numasync < nfs_iodmin) {
2257b8c319bSRick Macklem error = nfs_nfsiodnew_sync();
2269ec7b004SRick Macklem if (error == -1)
2277b8c319bSRick Macklem panic("nfsiod_setup: nfs_nfsiodnew failed");
2289ec7b004SRick Macklem }
229b662b41eSRick Macklem NFSUNLOCKIOD();
2309ec7b004SRick Macklem }
2319ec7b004SRick Macklem SYSINIT(newnfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
2329ec7b004SRick Macklem
2339ec7b004SRick Macklem static int nfs_defect = 0;
2341f376590SRick Macklem SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0,
2357b8c319bSRick Macklem "Allow nfsiods to migrate serving different mounts");
2369ec7b004SRick Macklem
2379ec7b004SRick Macklem /*
2389ec7b004SRick Macklem * Asynchronous I/O daemons for client nfs.
2399ec7b004SRick Macklem * They do read-ahead and write-behind operations on the block I/O cache.
2409ec7b004SRick Macklem * Returns if we hit the timeout defined by the iodmaxidle sysctl.
2419ec7b004SRick Macklem */
2429ec7b004SRick Macklem static void
nfssvc_iod(void * instance)2439ec7b004SRick Macklem nfssvc_iod(void *instance)
2449ec7b004SRick Macklem {
2459ec7b004SRick Macklem struct buf *bp;
2469ec7b004SRick Macklem struct nfsmount *nmp;
2479ec7b004SRick Macklem int myiod, timo;
2489ec7b004SRick Macklem int error = 0;
2499ec7b004SRick Macklem
250b662b41eSRick Macklem NFSLOCKIOD();
2519ec7b004SRick Macklem myiod = (int *)instance - nfs_asyncdaemon;
2529ec7b004SRick Macklem /*
2539ec7b004SRick Macklem * Main loop
2549ec7b004SRick Macklem */
2559ec7b004SRick Macklem for (;;) {
2569ec7b004SRick Macklem while (((nmp = ncl_iodmount[myiod]) == NULL)
2579ec7b004SRick Macklem || !TAILQ_FIRST(&nmp->nm_bufq)) {
2589ec7b004SRick Macklem if (myiod >= ncl_iodmax)
2599ec7b004SRick Macklem goto finish;
2609ec7b004SRick Macklem if (nmp)
2619ec7b004SRick Macklem nmp->nm_bufqiods--;
26280169e41SRick Macklem if (ncl_iodwant[myiod] == NFSIOD_NOT_AVAILABLE)
26380169e41SRick Macklem ncl_iodwant[myiod] = NFSIOD_AVAILABLE;
2649ec7b004SRick Macklem ncl_iodmount[myiod] = NULL;
2659ec7b004SRick Macklem /*
2669ec7b004SRick Macklem * Always keep at least nfs_iodmin kthreads.
2679ec7b004SRick Macklem */
2687b8c319bSRick Macklem timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz;
2699ec7b004SRick Macklem error = msleep(&ncl_iodwant[myiod], &ncl_iod_mutex, PWAIT | PCATCH,
2709ec7b004SRick Macklem "-", timo);
2719ec7b004SRick Macklem if (error) {
2729ec7b004SRick Macklem nmp = ncl_iodmount[myiod];
2739ec7b004SRick Macklem /*
2749ec7b004SRick Macklem * Rechecking the nm_bufq closes a rare race where the
2759ec7b004SRick Macklem * nfsiod is woken up at the exact time the idle timeout
2769ec7b004SRick Macklem * fires
2779ec7b004SRick Macklem */
2789ec7b004SRick Macklem if (nmp && TAILQ_FIRST(&nmp->nm_bufq))
2799ec7b004SRick Macklem error = 0;
2809ec7b004SRick Macklem break;
2819ec7b004SRick Macklem }
2829ec7b004SRick Macklem }
2839ec7b004SRick Macklem if (error)
2849ec7b004SRick Macklem break;
2859ec7b004SRick Macklem while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) {
2869ec7b004SRick Macklem /* Take one off the front of the list */
2879ec7b004SRick Macklem TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
2889ec7b004SRick Macklem nmp->nm_bufqlen--;
2899ec7b004SRick Macklem if (nmp->nm_bufqwant && nmp->nm_bufqlen <= ncl_numasync) {
2909ec7b004SRick Macklem nmp->nm_bufqwant = 0;
2919ec7b004SRick Macklem wakeup(&nmp->nm_bufq);
2929ec7b004SRick Macklem }
293b662b41eSRick Macklem NFSUNLOCKIOD();
294*03a39a17SRick Macklem KASSERT((bp->b_flags & B_DIRECT) == 0,
295*03a39a17SRick Macklem ("nfssvc_iod: B_DIRECT set"));
2969ec7b004SRick Macklem if (bp->b_iocmd == BIO_READ)
29767c5c2d2SRick Macklem (void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
29867c5c2d2SRick Macklem NULL, 0);
2999ec7b004SRick Macklem else
30067c5c2d2SRick Macklem (void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
30167c5c2d2SRick Macklem NULL, 0);
302b662b41eSRick Macklem NFSLOCKIOD();
3039ec7b004SRick Macklem /*
30464a0e848SRick Macklem * Make sure the nmp hasn't been dismounted as soon as
30564a0e848SRick Macklem * ncl_doio() completes for the last buffer.
30664a0e848SRick Macklem */
30764a0e848SRick Macklem nmp = ncl_iodmount[myiod];
30864a0e848SRick Macklem if (nmp == NULL)
30964a0e848SRick Macklem break;
31064a0e848SRick Macklem
31164a0e848SRick Macklem /*
3129ec7b004SRick Macklem * If there are more than one iod on this mount, then defect
3139ec7b004SRick Macklem * so that the iods can be shared out fairly between the mounts
3149ec7b004SRick Macklem */
3159ec7b004SRick Macklem if (nfs_defect && nmp->nm_bufqiods > 1) {
3169ec7b004SRick Macklem NFS_DPF(ASYNCIO,
3179ec7b004SRick Macklem ("nfssvc_iod: iod %d defecting from mount %p\n",
3189ec7b004SRick Macklem myiod, nmp));
3199ec7b004SRick Macklem ncl_iodmount[myiod] = NULL;
3209ec7b004SRick Macklem nmp->nm_bufqiods--;
3219ec7b004SRick Macklem break;
3229ec7b004SRick Macklem }
3239ec7b004SRick Macklem }
3249ec7b004SRick Macklem }
3259ec7b004SRick Macklem finish:
3269ec7b004SRick Macklem nfs_asyncdaemon[myiod] = 0;
3279ec7b004SRick Macklem if (nmp)
3289ec7b004SRick Macklem nmp->nm_bufqiods--;
32980169e41SRick Macklem ncl_iodwant[myiod] = NFSIOD_NOT_AVAILABLE;
3309ec7b004SRick Macklem ncl_iodmount[myiod] = NULL;
3319ec7b004SRick Macklem /* Someone may be waiting for the last nfsiod to terminate. */
3329ec7b004SRick Macklem if (--ncl_numasync == 0)
3339ec7b004SRick Macklem wakeup(&ncl_numasync);
334b662b41eSRick Macklem NFSUNLOCKIOD();
3359ec7b004SRick Macklem if ((error == 0) || (error == EWOULDBLOCK))
3369ec7b004SRick Macklem kproc_exit(0);
3379ec7b004SRick Macklem /* Abnormal termination */
3389ec7b004SRick Macklem kproc_exit(1);
3399ec7b004SRick Macklem }
340