15fd012e0SMatthew Dillon /*
25fd012e0SMatthew Dillon * Copyright (c) 1989, 1993
35fd012e0SMatthew Dillon * The Regents of the University of California. All rights reserved.
45fd012e0SMatthew Dillon * (c) UNIX System Laboratories, Inc.
55fd012e0SMatthew Dillon * All or some portions of this file are derived from material licensed
65fd012e0SMatthew Dillon * to the University of California by American Telephone and Telegraph
75fd012e0SMatthew Dillon * Co. or Unix System Laboratories, Inc. and are reproduced herein with
85fd012e0SMatthew Dillon * the permission of UNIX System Laboratories, Inc.
95fd012e0SMatthew Dillon *
105fd012e0SMatthew Dillon * Redistribution and use in source and binary forms, with or without
115fd012e0SMatthew Dillon * modification, are permitted provided that the following conditions
125fd012e0SMatthew Dillon * are met:
135fd012e0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright
145fd012e0SMatthew Dillon * notice, this list of conditions and the following disclaimer.
155fd012e0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright
165fd012e0SMatthew Dillon * notice, this list of conditions and the following disclaimer in the
175fd012e0SMatthew Dillon * documentation and/or other materials provided with the distribution.
18dc71b7abSJustin C. Sherrill * 3. Neither the name of the University nor the names of its contributors
195fd012e0SMatthew Dillon * may be used to endorse or promote products derived from this software
205fd012e0SMatthew Dillon * without specific prior written permission.
215fd012e0SMatthew Dillon *
225fd012e0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
235fd012e0SMatthew Dillon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
245fd012e0SMatthew Dillon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
255fd012e0SMatthew Dillon * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
265fd012e0SMatthew Dillon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
275fd012e0SMatthew Dillon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
285fd012e0SMatthew Dillon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
295fd012e0SMatthew Dillon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
305fd012e0SMatthew Dillon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
315fd012e0SMatthew Dillon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
325fd012e0SMatthew Dillon * SUCH DAMAGE.
335fd012e0SMatthew Dillon *
345fd012e0SMatthew Dillon * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
355fd012e0SMatthew Dillon * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $
365fd012e0SMatthew Dillon */
375fd012e0SMatthew Dillon
385fd012e0SMatthew Dillon /*
395fd012e0SMatthew Dillon * External virtual filesystem routines
405fd012e0SMatthew Dillon */
415fd012e0SMatthew Dillon
425fd012e0SMatthew Dillon #include <sys/param.h>
435fd012e0SMatthew Dillon #include <sys/systm.h>
445fd012e0SMatthew Dillon #include <sys/buf.h>
455fd012e0SMatthew Dillon #include <sys/conf.h>
465fd012e0SMatthew Dillon #include <sys/dirent.h>
475fd012e0SMatthew Dillon #include <sys/domain.h>
485fd012e0SMatthew Dillon #include <sys/eventhandler.h>
495fd012e0SMatthew Dillon #include <sys/fcntl.h>
505fd012e0SMatthew Dillon #include <sys/kernel.h>
515fd012e0SMatthew Dillon #include <sys/kthread.h>
525fd012e0SMatthew Dillon #include <sys/malloc.h>
535fd012e0SMatthew Dillon #include <sys/mbuf.h>
545fd012e0SMatthew Dillon #include <sys/mount.h>
555fd012e0SMatthew Dillon #include <sys/proc.h>
565fd012e0SMatthew Dillon #include <sys/reboot.h>
575fd012e0SMatthew Dillon #include <sys/socket.h>
585fd012e0SMatthew Dillon #include <sys/stat.h>
595fd012e0SMatthew Dillon #include <sys/sysctl.h>
605fd012e0SMatthew Dillon #include <sys/syslog.h>
615fd012e0SMatthew Dillon #include <sys/vmmeter.h>
625fd012e0SMatthew Dillon #include <sys/vnode.h>
635fd012e0SMatthew Dillon
645fd012e0SMatthew Dillon #include <machine/limits.h>
655fd012e0SMatthew Dillon
665fd012e0SMatthew Dillon #include <vm/vm.h>
675fd012e0SMatthew Dillon #include <vm/vm_object.h>
685fd012e0SMatthew Dillon #include <vm/vm_extern.h>
695fd012e0SMatthew Dillon #include <vm/vm_kern.h>
705fd012e0SMatthew Dillon #include <vm/pmap.h>
715fd012e0SMatthew Dillon #include <vm/vm_map.h>
725fd012e0SMatthew Dillon #include <vm/vm_page.h>
735fd012e0SMatthew Dillon #include <vm/vm_pager.h>
745fd012e0SMatthew Dillon #include <vm/vnode_pager.h>
755fd012e0SMatthew Dillon
765fd012e0SMatthew Dillon #include <sys/buf2.h>
775fd012e0SMatthew Dillon
785fd012e0SMatthew Dillon /*
795fd012e0SMatthew Dillon * The workitem queue.
805fd012e0SMatthew Dillon */
815fd012e0SMatthew Dillon #define SYNCER_MAXDELAY 32
82bf9f24c1SMatthew Dillon static int sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS);
835fd012e0SMatthew Dillon time_t syncdelay = 30; /* max time to delay syncing data */
84bf9f24c1SMatthew Dillon SYSCTL_PROC(_kern, OID_AUTO, syncdelay, CTLTYPE_INT | CTLFLAG_RW, 0, 0,
85bf9f24c1SMatthew Dillon sysctl_kern_syncdelay, "I", "VFS data synchronization delay");
865fd012e0SMatthew Dillon time_t filedelay = 30; /* time to delay syncing files */
875fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW,
885fd012e0SMatthew Dillon &filedelay, 0, "File synchronization delay");
895fd012e0SMatthew Dillon time_t dirdelay = 29; /* time to delay syncing directories */
905fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW,
915fd012e0SMatthew Dillon &dirdelay, 0, "Directory synchronization delay");
925fd012e0SMatthew Dillon time_t metadelay = 28; /* time to delay syncing metadata */
935fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW,
945fd012e0SMatthew Dillon &metadelay, 0, "VFS metadata synchronization delay");
9555a5a1baSMatthew Dillon time_t retrydelay = 1; /* retry delay after failure */
9655a5a1baSMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, retrydelay, CTLFLAG_RW,
9755a5a1baSMatthew Dillon &retrydelay, 0, "VFS retry synchronization delay");
985fd012e0SMatthew Dillon static int rushjob; /* number of slots to run ASAP */
995fd012e0SMatthew Dillon static int stat_rush_requests; /* number of times I/O speeded up */
1005fd012e0SMatthew Dillon SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW,
1015fd012e0SMatthew Dillon &stat_rush_requests, 0, "");
1025fd012e0SMatthew Dillon
1035fd012e0SMatthew Dillon LIST_HEAD(synclist, vnode);
10450e4012aSVenkatesh Srinivas
10550e4012aSVenkatesh Srinivas #define SC_FLAG_EXIT (0x1) /* request syncer exit */
10650e4012aSVenkatesh Srinivas #define SC_FLAG_DONE (0x2) /* syncer confirm exit */
10750e4012aSVenkatesh Srinivas
10850e4012aSVenkatesh Srinivas struct syncer_ctx {
10950e4012aSVenkatesh Srinivas struct mount *sc_mp;
11050e4012aSVenkatesh Srinivas struct lwkt_token sc_token;
11150e4012aSVenkatesh Srinivas struct thread *sc_thread;
11250e4012aSVenkatesh Srinivas int sc_flags;
11350e4012aSVenkatesh Srinivas struct synclist *syncer_workitem_pending;
11450e4012aSVenkatesh Srinivas long syncer_mask;
11550e4012aSVenkatesh Srinivas int syncer_delayno;
116bf9f24c1SMatthew Dillon int syncer_forced;
1171c222fafSMatthew Dillon int syncer_rushjob; /* sequence vnodes faster */
1181c222fafSMatthew Dillon int syncer_trigger; /* trigger full sync */
11955a5a1baSMatthew Dillon long syncer_count;
12050e4012aSVenkatesh Srinivas };
12150e4012aSVenkatesh Srinivas
12250e4012aSVenkatesh Srinivas static void syncer_thread(void *);
12350e4012aSVenkatesh Srinivas
124bf9f24c1SMatthew Dillon static int
sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS)125bf9f24c1SMatthew Dillon sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS)
126bf9f24c1SMatthew Dillon {
127bf9f24c1SMatthew Dillon int error;
128bf9f24c1SMatthew Dillon int v = syncdelay;
129bf9f24c1SMatthew Dillon
130bf9f24c1SMatthew Dillon error = sysctl_handle_int(oidp, &v, 0, req);
131bf9f24c1SMatthew Dillon if (error || !req->newptr)
132bf9f24c1SMatthew Dillon return (error);
133bf9f24c1SMatthew Dillon if (v < 1)
134bf9f24c1SMatthew Dillon v = 1;
135bf9f24c1SMatthew Dillon if (v > SYNCER_MAXDELAY)
136bf9f24c1SMatthew Dillon v = SYNCER_MAXDELAY;
137bf9f24c1SMatthew Dillon syncdelay = v;
138bf9f24c1SMatthew Dillon
139bf9f24c1SMatthew Dillon return(0);
140bf9f24c1SMatthew Dillon }
141bf9f24c1SMatthew Dillon
1425fd012e0SMatthew Dillon /*
1435fd012e0SMatthew Dillon * The workitem queue.
1445fd012e0SMatthew Dillon *
1455fd012e0SMatthew Dillon * It is useful to delay writes of file data and filesystem metadata
1465fd012e0SMatthew Dillon * for tens of seconds so that quickly created and deleted files need
1475fd012e0SMatthew Dillon * not waste disk bandwidth being created and removed. To realize this,
1485fd012e0SMatthew Dillon * we append vnodes to a "workitem" queue. When running with a soft
1495fd012e0SMatthew Dillon * updates implementation, most pending metadata dependencies should
1505fd012e0SMatthew Dillon * not wait for more than a few seconds. Thus, mounted on block devices
1515fd012e0SMatthew Dillon * are delayed only about a half the time that file data is delayed.
1525fd012e0SMatthew Dillon * Similarly, directory updates are more critical, so are only delayed
1535fd012e0SMatthew Dillon * about a third the time that file data is delayed. Thus, there are
1545fd012e0SMatthew Dillon * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
1555fd012e0SMatthew Dillon * one each second (driven off the filesystem syncer process). The
1565fd012e0SMatthew Dillon * syncer_delayno variable indicates the next queue that is to be processed.
1575fd012e0SMatthew Dillon * Items that need to be processed soon are placed in this queue:
1585fd012e0SMatthew Dillon *
1595fd012e0SMatthew Dillon * syncer_workitem_pending[syncer_delayno]
1605fd012e0SMatthew Dillon *
1615fd012e0SMatthew Dillon * A delay of fifteen seconds is done by placing the request fifteen
1625fd012e0SMatthew Dillon * entries later in the queue:
1635fd012e0SMatthew Dillon *
1645fd012e0SMatthew Dillon * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
1655fd012e0SMatthew Dillon *
1665fd012e0SMatthew Dillon */
1675fd012e0SMatthew Dillon
1685fd012e0SMatthew Dillon /*
16955a5a1baSMatthew Dillon * Return the number of vnodes on the syncer's timed list. This will
17055a5a1baSMatthew Dillon * include the syncer vnode (mp->mnt_syncer) so if used, a minimum
17155a5a1baSMatthew Dillon * value of 1 will be returned.
17255a5a1baSMatthew Dillon */
17355a5a1baSMatthew Dillon long
vn_syncer_count(struct mount * mp)17455a5a1baSMatthew Dillon vn_syncer_count(struct mount *mp)
17555a5a1baSMatthew Dillon {
17655a5a1baSMatthew Dillon struct syncer_ctx *ctx;
17755a5a1baSMatthew Dillon
17855a5a1baSMatthew Dillon ctx = mp->mnt_syncer_ctx;
17955a5a1baSMatthew Dillon if (ctx)
18055a5a1baSMatthew Dillon return (ctx->syncer_count);
18155a5a1baSMatthew Dillon return 0;
18255a5a1baSMatthew Dillon }
18355a5a1baSMatthew Dillon
18455a5a1baSMatthew Dillon /*
1855fd012e0SMatthew Dillon * Add an item to the syncer work queue.
186b1c20cfaSMatthew Dillon *
18777912481SMatthew Dillon * WARNING: Cannot get vp->v_token here if not already held, we must
18877912481SMatthew Dillon * depend on the syncer_token (which might already be held by
18977912481SMatthew Dillon * the caller) to protect v_synclist and VONWORKLST.
19077912481SMatthew Dillon *
1911c222fafSMatthew Dillon * WARNING: The syncer depends on this function not blocking if the caller
1921c222fafSMatthew Dillon * already holds the syncer token.
1935fd012e0SMatthew Dillon */
1945fd012e0SMatthew Dillon void
vn_syncer_add(struct vnode * vp,int delay)19577912481SMatthew Dillon vn_syncer_add(struct vnode *vp, int delay)
1965fd012e0SMatthew Dillon {
19750e4012aSVenkatesh Srinivas struct syncer_ctx *ctx;
1985fd012e0SMatthew Dillon int slot;
1995fd012e0SMatthew Dillon
200cf6a53caSMatthew Dillon ctx = vp->v_mount->mnt_syncer_ctx;
20150e4012aSVenkatesh Srinivas lwkt_gettoken(&ctx->sc_token);
2025fd012e0SMatthew Dillon
20355a5a1baSMatthew Dillon if (vp->v_flag & VONWORKLST) {
2045fd012e0SMatthew Dillon LIST_REMOVE(vp, v_synclist);
20555a5a1baSMatthew Dillon --ctx->syncer_count;
20655a5a1baSMatthew Dillon }
207bf9f24c1SMatthew Dillon if (delay <= 0) {
208bf9f24c1SMatthew Dillon slot = -delay & ctx->syncer_mask;
209bf9f24c1SMatthew Dillon } else {
210bf9f24c1SMatthew Dillon if (delay > SYNCER_MAXDELAY - 2)
211bf9f24c1SMatthew Dillon delay = SYNCER_MAXDELAY - 2;
21250e4012aSVenkatesh Srinivas slot = (ctx->syncer_delayno + delay) & ctx->syncer_mask;
213bf9f24c1SMatthew Dillon }
2145fd012e0SMatthew Dillon
21550e4012aSVenkatesh Srinivas LIST_INSERT_HEAD(&ctx->syncer_workitem_pending[slot], vp, v_synclist);
2162247fe02SMatthew Dillon vsetflags(vp, VONWORKLST);
21755a5a1baSMatthew Dillon ++ctx->syncer_count;
2180202303bSMatthew Dillon
21950e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
2205fd012e0SMatthew Dillon }
2215fd012e0SMatthew Dillon
22277912481SMatthew Dillon /*
22377912481SMatthew Dillon * Removes the vnode from the syncer list. Since we might block while
224ffd3e597SMatthew Dillon * acquiring the syncer_token we have to [re]check conditions to determine
225ffd3e597SMatthew Dillon * that it is ok to remove the vnode.
22677912481SMatthew Dillon *
227f4428f2fSMatthew Dillon * Force removal if force != 0. This can only occur during a forced unmount.
228f4428f2fSMatthew Dillon *
22977912481SMatthew Dillon * vp->v_token held on call
23077912481SMatthew Dillon */
23177912481SMatthew Dillon void
vn_syncer_remove(struct vnode * vp,int force)232f4428f2fSMatthew Dillon vn_syncer_remove(struct vnode *vp, int force)
23377912481SMatthew Dillon {
23450e4012aSVenkatesh Srinivas struct syncer_ctx *ctx;
23550e4012aSVenkatesh Srinivas
236cf6a53caSMatthew Dillon ctx = vp->v_mount->mnt_syncer_ctx;
23750e4012aSVenkatesh Srinivas lwkt_gettoken(&ctx->sc_token);
23877912481SMatthew Dillon
239eddc656aSMatthew Dillon if ((vp->v_flag & (VISDIRTY | VONWORKLST | VOBJDIRTY)) == VONWORKLST &&
2400f79f6b2SMatthew Dillon RB_EMPTY(&vp->v_rbdirty_tree)) {
24177912481SMatthew Dillon vclrflags(vp, VONWORKLST);
24277912481SMatthew Dillon LIST_REMOVE(vp, v_synclist);
24355a5a1baSMatthew Dillon --ctx->syncer_count;
244f4428f2fSMatthew Dillon } else if (force && (vp->v_flag & VONWORKLST)) {
245f4428f2fSMatthew Dillon vclrflags(vp, VONWORKLST);
246f4428f2fSMatthew Dillon LIST_REMOVE(vp, v_synclist);
24755a5a1baSMatthew Dillon --ctx->syncer_count;
24877912481SMatthew Dillon }
24977912481SMatthew Dillon
25050e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
25150e4012aSVenkatesh Srinivas }
25250e4012aSVenkatesh Srinivas
25350e4012aSVenkatesh Srinivas /*
2540f79f6b2SMatthew Dillon * vnode must be locked
2550f79f6b2SMatthew Dillon */
2560f79f6b2SMatthew Dillon void
vclrisdirty(struct vnode * vp)2570f79f6b2SMatthew Dillon vclrisdirty(struct vnode *vp)
2580f79f6b2SMatthew Dillon {
2590f79f6b2SMatthew Dillon vclrflags(vp, VISDIRTY);
2600f79f6b2SMatthew Dillon if (vp->v_flag & VONWORKLST)
261f4428f2fSMatthew Dillon vn_syncer_remove(vp, 0);
2620f79f6b2SMatthew Dillon }
2630f79f6b2SMatthew Dillon
264fd2da346SMatthew Dillon void
vclrobjdirty(struct vnode * vp)265fd2da346SMatthew Dillon vclrobjdirty(struct vnode *vp)
266fd2da346SMatthew Dillon {
267fd2da346SMatthew Dillon vclrflags(vp, VOBJDIRTY);
268fd2da346SMatthew Dillon if (vp->v_flag & VONWORKLST)
269f4428f2fSMatthew Dillon vn_syncer_remove(vp, 0);
270fd2da346SMatthew Dillon }
271fd2da346SMatthew Dillon
2720f79f6b2SMatthew Dillon /*
2730f79f6b2SMatthew Dillon * vnode must be stable
2740f79f6b2SMatthew Dillon */
2750f79f6b2SMatthew Dillon void
vsetisdirty(struct vnode * vp)2760f79f6b2SMatthew Dillon vsetisdirty(struct vnode *vp)
2770f79f6b2SMatthew Dillon {
278fd2da346SMatthew Dillon struct syncer_ctx *ctx;
279fd2da346SMatthew Dillon
2800f79f6b2SMatthew Dillon if ((vp->v_flag & VISDIRTY) == 0) {
281cf6a53caSMatthew Dillon ctx = vp->v_mount->mnt_syncer_ctx;
2820f79f6b2SMatthew Dillon vsetflags(vp, VISDIRTY);
283fd2da346SMatthew Dillon lwkt_gettoken(&ctx->sc_token);
284fd2da346SMatthew Dillon if ((vp->v_flag & VONWORKLST) == 0)
2850f79f6b2SMatthew Dillon vn_syncer_add(vp, syncdelay);
286fd2da346SMatthew Dillon lwkt_reltoken(&ctx->sc_token);
287fd2da346SMatthew Dillon }
288fd2da346SMatthew Dillon }
289fd2da346SMatthew Dillon
290fd2da346SMatthew Dillon void
vsetobjdirty(struct vnode * vp)291fd2da346SMatthew Dillon vsetobjdirty(struct vnode *vp)
292fd2da346SMatthew Dillon {
293fd2da346SMatthew Dillon struct syncer_ctx *ctx;
294fd2da346SMatthew Dillon
295fd2da346SMatthew Dillon if ((vp->v_flag & VOBJDIRTY) == 0) {
296cf6a53caSMatthew Dillon ctx = vp->v_mount->mnt_syncer_ctx;
297fd2da346SMatthew Dillon vsetflags(vp, VOBJDIRTY);
298fd2da346SMatthew Dillon lwkt_gettoken(&ctx->sc_token);
299fd2da346SMatthew Dillon if ((vp->v_flag & VONWORKLST) == 0)
300fd2da346SMatthew Dillon vn_syncer_add(vp, syncdelay);
301fd2da346SMatthew Dillon lwkt_reltoken(&ctx->sc_token);
3020f79f6b2SMatthew Dillon }
3030f79f6b2SMatthew Dillon }
3040f79f6b2SMatthew Dillon
3050f79f6b2SMatthew Dillon /*
30650e4012aSVenkatesh Srinivas * Create per-filesystem syncer process
30750e4012aSVenkatesh Srinivas */
30850e4012aSVenkatesh Srinivas void
vn_syncer_thr_create(struct mount * mp)30950e4012aSVenkatesh Srinivas vn_syncer_thr_create(struct mount *mp)
31050e4012aSVenkatesh Srinivas {
31150e4012aSVenkatesh Srinivas struct syncer_ctx *ctx;
31250e4012aSVenkatesh Srinivas static int syncalloc = 0;
31350e4012aSVenkatesh Srinivas
314cf6a53caSMatthew Dillon ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK | M_ZERO);
315cf6a53caSMatthew Dillon ctx->sc_mp = mp;
316cf6a53caSMatthew Dillon ctx->sc_flags = 0;
317cf6a53caSMatthew Dillon ctx->syncer_workitem_pending = hashinit(SYNCER_MAXDELAY, M_DEVBUF,
318cf6a53caSMatthew Dillon &ctx->syncer_mask);
319cf6a53caSMatthew Dillon ctx->syncer_delayno = 0;
320cf6a53caSMatthew Dillon lwkt_token_init(&ctx->sc_token, "syncer");
32150e4012aSVenkatesh Srinivas mp->mnt_syncer_ctx = ctx;
322cf6a53caSMatthew Dillon kthread_create(syncer_thread, ctx, &ctx->sc_thread,
323cf6a53caSMatthew Dillon "syncer%d", ++syncalloc & 0x7FFFFFFF);
3242f05c7ffSVenkatesh Srinivas }
3252f05c7ffSVenkatesh Srinivas
32650e4012aSVenkatesh Srinivas /*
32750e4012aSVenkatesh Srinivas * Stop per-filesystem syncer process
32850e4012aSVenkatesh Srinivas */
32950e4012aSVenkatesh Srinivas void
vn_syncer_thr_stop(struct mount * mp)330bf9f24c1SMatthew Dillon vn_syncer_thr_stop(struct mount *mp)
33150e4012aSVenkatesh Srinivas {
33250e4012aSVenkatesh Srinivas struct syncer_ctx *ctx;
33350e4012aSVenkatesh Srinivas
334bf9f24c1SMatthew Dillon ctx = mp->mnt_syncer_ctx;
335cf6a53caSMatthew Dillon if (ctx == NULL)
336bf9f24c1SMatthew Dillon return;
33750e4012aSVenkatesh Srinivas
33850e4012aSVenkatesh Srinivas lwkt_gettoken(&ctx->sc_token);
33950e4012aSVenkatesh Srinivas
34050e4012aSVenkatesh Srinivas /* Signal the syncer process to exit */
34150e4012aSVenkatesh Srinivas ctx->sc_flags |= SC_FLAG_EXIT;
34250e4012aSVenkatesh Srinivas wakeup(ctx);
34350e4012aSVenkatesh Srinivas
34450e4012aSVenkatesh Srinivas /* Wait till syncer process exits */
34500369c4aSMatthew Dillon while ((ctx->sc_flags & SC_FLAG_DONE) == 0) {
34600369c4aSMatthew Dillon tsleep_interlock(&ctx->sc_flags, 0);
34700369c4aSMatthew Dillon lwkt_reltoken(&ctx->sc_token);
34800369c4aSMatthew Dillon tsleep(&ctx->sc_flags, PINTERLOCKED, "syncexit", hz);
34900369c4aSMatthew Dillon lwkt_gettoken(&ctx->sc_token);
35000369c4aSMatthew Dillon }
35150e4012aSVenkatesh Srinivas
352bf9f24c1SMatthew Dillon mp->mnt_syncer_ctx = NULL;
35350e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
35450e4012aSVenkatesh Srinivas
355e46d128dSSascha Wildner hashdestroy(ctx->syncer_workitem_pending, M_DEVBUF, ctx->syncer_mask);
35650e4012aSVenkatesh Srinivas kfree(ctx, M_TEMP);
35777912481SMatthew Dillon }
35877912481SMatthew Dillon
3595fd012e0SMatthew Dillon struct thread *updatethread;
3605fd012e0SMatthew Dillon
3615fd012e0SMatthew Dillon /*
3625fd012e0SMatthew Dillon * System filesystem synchronizer daemon.
3635fd012e0SMatthew Dillon */
364cd8ab232SMatthew Dillon static void
syncer_thread(void * _ctx)36550e4012aSVenkatesh Srinivas syncer_thread(void *_ctx)
3665fd012e0SMatthew Dillon {
36750e4012aSVenkatesh Srinivas struct syncer_ctx *ctx = _ctx;
3685fd012e0SMatthew Dillon struct synclist *slp;
3695fd012e0SMatthew Dillon struct vnode *vp;
3705fd012e0SMatthew Dillon long starttime;
37150e4012aSVenkatesh Srinivas int *sc_flagsp;
37250e4012aSVenkatesh Srinivas int sc_flags;
37350e4012aSVenkatesh Srinivas int vnodes_synced = 0;
374cf6a53caSMatthew Dillon int delta;
375cf6a53caSMatthew Dillon int dummy = 0;
3765fd012e0SMatthew Dillon
3775fd012e0SMatthew Dillon for (;;) {
3785fd012e0SMatthew Dillon kproc_suspend_loop();
3795fd012e0SMatthew Dillon
380cec73927SMatthew Dillon starttime = time_uptime;
38150e4012aSVenkatesh Srinivas lwkt_gettoken(&ctx->sc_token);
3825fd012e0SMatthew Dillon
3835fd012e0SMatthew Dillon /*
3845fd012e0SMatthew Dillon * Push files whose dirty time has expired. Be careful
3855fd012e0SMatthew Dillon * of interrupt race on slp queue.
38655a5a1baSMatthew Dillon *
38755a5a1baSMatthew Dillon * Note that vsyncscan() and vn_syncer_one() can pull items
38855a5a1baSMatthew Dillon * off the same list, so we shift vp's position in the
38955a5a1baSMatthew Dillon * list immediately.
3905fd012e0SMatthew Dillon */
39150e4012aSVenkatesh Srinivas slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno];
3925fd012e0SMatthew Dillon
3931c222fafSMatthew Dillon /*
3941c222fafSMatthew Dillon * If syncer_trigger is set (from trigger_syncer(mp)),
395d0e99d5dSMatthew Dillon * Immediately do a full filesystem sync and set up the
396d0e99d5dSMatthew Dillon * following full filesystem sync to occur in 1 second.
397*5479a2c1SMatthew Dillon *
398*5479a2c1SMatthew Dillon * The normal syncer_trigger bit 0 is automatically reset.
399*5479a2c1SMatthew Dillon * If other bits are set, they remain set and cause the
400*5479a2c1SMatthew Dillon * syncer to keep running.
4011c222fafSMatthew Dillon */
4021c222fafSMatthew Dillon if (ctx->syncer_trigger) {
4031c222fafSMatthew Dillon if (ctx->sc_mp && ctx->sc_mp->mnt_syncer) {
4041c222fafSMatthew Dillon vp = ctx->sc_mp->mnt_syncer;
4051c222fafSMatthew Dillon if (vp->v_flag & VONWORKLST) {
4061c222fafSMatthew Dillon vn_syncer_add(vp, retrydelay);
4071c222fafSMatthew Dillon if (vget(vp, LK_EXCLUSIVE) == 0) {
408*5479a2c1SMatthew Dillon atomic_clear_int(&ctx->syncer_trigger, 1);
4091c222fafSMatthew Dillon VOP_FSYNC(vp, MNT_LAZY, 0);
4101c222fafSMatthew Dillon vput(vp);
4111c222fafSMatthew Dillon vnodes_synced++;
4121c222fafSMatthew Dillon }
4131c222fafSMatthew Dillon }
4141c222fafSMatthew Dillon }
4151c222fafSMatthew Dillon }
4161c222fafSMatthew Dillon
417d0e99d5dSMatthew Dillon /*
418d0e99d5dSMatthew Dillon * FSYNC items in this bucket
419d0e99d5dSMatthew Dillon */
4205fd012e0SMatthew Dillon while ((vp = LIST_FIRST(slp)) != NULL) {
42155a5a1baSMatthew Dillon vn_syncer_add(vp, retrydelay);
422bf9f24c1SMatthew Dillon if (ctx->syncer_forced) {
423bf9f24c1SMatthew Dillon if (vget(vp, LK_EXCLUSIVE) == 0) {
424bf9f24c1SMatthew Dillon VOP_FSYNC(vp, MNT_NOWAIT, 0);
425bf9f24c1SMatthew Dillon vput(vp);
426bf9f24c1SMatthew Dillon vnodes_synced++;
427bf9f24c1SMatthew Dillon }
428bf9f24c1SMatthew Dillon } else {
42987de5057SMatthew Dillon if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
43052174f71SMatthew Dillon VOP_FSYNC(vp, MNT_LAZY, 0);
4310e0b6202SMatthew Dillon vput(vp);
43250e4012aSVenkatesh Srinivas vnodes_synced++;
4335fd012e0SMatthew Dillon }
434bf9f24c1SMatthew Dillon }
43555a5a1baSMatthew Dillon }
4366bae6177SMatthew Dillon
4376bae6177SMatthew Dillon /*
438d0e99d5dSMatthew Dillon * Increment the slot upon completion. This is typically
439d0e99d5dSMatthew Dillon * one-second but may be faster if the syncer is triggered.
4406bae6177SMatthew Dillon */
44155a5a1baSMatthew Dillon ctx->syncer_delayno = (ctx->syncer_delayno + 1) &
44255a5a1baSMatthew Dillon ctx->syncer_mask;
44350e4012aSVenkatesh Srinivas
44450e4012aSVenkatesh Srinivas sc_flags = ctx->sc_flags;
44550e4012aSVenkatesh Srinivas
44650e4012aSVenkatesh Srinivas /* Exit on unmount */
44750e4012aSVenkatesh Srinivas if (sc_flags & SC_FLAG_EXIT)
44850e4012aSVenkatesh Srinivas break;
44950e4012aSVenkatesh Srinivas
45050e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
4515fd012e0SMatthew Dillon
4525fd012e0SMatthew Dillon /*
453408357d8SMatthew Dillon * Do sync processing for each mount.
4545fd012e0SMatthew Dillon */
455cf6a53caSMatthew Dillon if (ctx->sc_mp)
45650e4012aSVenkatesh Srinivas bio_ops_sync(ctx->sc_mp);
4575fd012e0SMatthew Dillon
4585fd012e0SMatthew Dillon /*
4595fd012e0SMatthew Dillon * The variable rushjob allows the kernel to speed up the
4605fd012e0SMatthew Dillon * processing of the filesystem syncer process. A rushjob
4615fd012e0SMatthew Dillon * value of N tells the filesystem syncer to process the next
4625fd012e0SMatthew Dillon * N seconds worth of work on its queue ASAP. Currently rushjob
4635fd012e0SMatthew Dillon * is used by the soft update code to speed up the filesystem
4645fd012e0SMatthew Dillon * syncer process when the incore state is getting so far
4655fd012e0SMatthew Dillon * ahead of the disk that the kernel memory pool is being
4665fd012e0SMatthew Dillon * threatened with exhaustion.
4675fd012e0SMatthew Dillon */
468cf6a53caSMatthew Dillon delta = rushjob - ctx->syncer_rushjob;
469cf6a53caSMatthew Dillon if ((u_int)delta > syncdelay / 2) {
470cf6a53caSMatthew Dillon ctx->syncer_rushjob = rushjob - syncdelay / 2;
471cf6a53caSMatthew Dillon tsleep(&dummy, 0, "rush", 1);
4725fd012e0SMatthew Dillon continue;
4735fd012e0SMatthew Dillon }
474cf6a53caSMatthew Dillon if (delta) {
475cf6a53caSMatthew Dillon ++ctx->syncer_rushjob;
476cf6a53caSMatthew Dillon tsleep(&dummy, 0, "rush", 1);
477cf6a53caSMatthew Dillon continue;
478cf6a53caSMatthew Dillon }
479cf6a53caSMatthew Dillon
4805fd012e0SMatthew Dillon /*
481d0e99d5dSMatthew Dillon * Normal syncer operation iterates once a second, unless
482d0e99d5dSMatthew Dillon * specifically triggered.
4835fd012e0SMatthew Dillon */
484d0e99d5dSMatthew Dillon if (time_uptime == starttime &&
485d0e99d5dSMatthew Dillon ctx->syncer_trigger == 0) {
486d0e99d5dSMatthew Dillon tsleep_interlock(ctx, 0);
487d0e99d5dSMatthew Dillon if (time_uptime == starttime &&
48800369c4aSMatthew Dillon ctx->syncer_trigger == 0 &&
489*5479a2c1SMatthew Dillon (ctx->sc_flags & SC_FLAG_EXIT) == 0)
490*5479a2c1SMatthew Dillon {
491d0e99d5dSMatthew Dillon tsleep(ctx, PINTERLOCKED, "syncer", hz);
492d0e99d5dSMatthew Dillon }
493d0e99d5dSMatthew Dillon }
4945fd012e0SMatthew Dillon }
49550e4012aSVenkatesh Srinivas
49650e4012aSVenkatesh Srinivas /*
49750e4012aSVenkatesh Srinivas * Unmount/exit path for per-filesystem syncers; sc_token held
49850e4012aSVenkatesh Srinivas */
49950e4012aSVenkatesh Srinivas ctx->sc_flags |= SC_FLAG_DONE;
50050e4012aSVenkatesh Srinivas sc_flagsp = &ctx->sc_flags;
50150e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
50250e4012aSVenkatesh Srinivas wakeup(sc_flagsp);
50350e4012aSVenkatesh Srinivas
50450e4012aSVenkatesh Srinivas kthread_exit();
50550e4012aSVenkatesh Srinivas }
50650e4012aSVenkatesh Srinivas
5075fd012e0SMatthew Dillon /*
50855a5a1baSMatthew Dillon * This allows a filesystem to pro-actively request that a dirty
50955a5a1baSMatthew Dillon * vnode be fsync()d. This routine does not guarantee that one
51055a5a1baSMatthew Dillon * will actually be fsynced.
51155a5a1baSMatthew Dillon */
51255a5a1baSMatthew Dillon void
vn_syncer_one(struct mount * mp)51355a5a1baSMatthew Dillon vn_syncer_one(struct mount *mp)
51455a5a1baSMatthew Dillon {
51555a5a1baSMatthew Dillon struct syncer_ctx *ctx;
51655a5a1baSMatthew Dillon struct synclist *slp;
51755a5a1baSMatthew Dillon struct vnode *vp;
51855a5a1baSMatthew Dillon int i;
51955a5a1baSMatthew Dillon int n = syncdelay;
52055a5a1baSMatthew Dillon
52155a5a1baSMatthew Dillon ctx = mp->mnt_syncer_ctx;
52255a5a1baSMatthew Dillon i = ctx->syncer_delayno & ctx->syncer_mask;
52355a5a1baSMatthew Dillon cpu_ccfence();
52455a5a1baSMatthew Dillon
52555a5a1baSMatthew Dillon if (lwkt_trytoken(&ctx->sc_token) == 0)
52655a5a1baSMatthew Dillon return;
52755a5a1baSMatthew Dillon
52855a5a1baSMatthew Dillon /*
52955a5a1baSMatthew Dillon * Look ahead on our syncer time array.
53055a5a1baSMatthew Dillon */
53155a5a1baSMatthew Dillon do {
53255a5a1baSMatthew Dillon slp = &ctx->syncer_workitem_pending[i];
53355a5a1baSMatthew Dillon vp = LIST_FIRST(slp);
53455a5a1baSMatthew Dillon if (vp && vp->v_type == VNON)
53555a5a1baSMatthew Dillon vp = LIST_NEXT(vp, v_synclist);
53655a5a1baSMatthew Dillon if (vp)
53755a5a1baSMatthew Dillon break;
53855a5a1baSMatthew Dillon i = (i + 1) & ctx->syncer_mask;
53955a5a1baSMatthew Dillon /* i will be wrong if we stop here but vp is NULL so ok */
54055a5a1baSMatthew Dillon } while(--n);
54155a5a1baSMatthew Dillon
54255a5a1baSMatthew Dillon /*
54355a5a1baSMatthew Dillon * Process one vnode, skip the syncer vnode but also stop
54455a5a1baSMatthew Dillon * if the syncer vnode is the only thing on this list.
54555a5a1baSMatthew Dillon */
54655a5a1baSMatthew Dillon if (vp) {
54755a5a1baSMatthew Dillon vn_syncer_add(vp, retrydelay);
54855a5a1baSMatthew Dillon if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
54955a5a1baSMatthew Dillon VOP_FSYNC(vp, MNT_LAZY, 0);
55055a5a1baSMatthew Dillon vput(vp);
55155a5a1baSMatthew Dillon }
55255a5a1baSMatthew Dillon }
55355a5a1baSMatthew Dillon lwkt_reltoken(&ctx->sc_token);
55455a5a1baSMatthew Dillon }
55555a5a1baSMatthew Dillon
55655a5a1baSMatthew Dillon /*
557cf6a53caSMatthew Dillon * Request that the syncer daemon for a specific mount speed up its work.
558cf6a53caSMatthew Dillon * If mp is NULL the caller generally wants to speed up all syncers.
5595fd012e0SMatthew Dillon */
560cf6a53caSMatthew Dillon void
speedup_syncer(struct mount * mp)561cf6a53caSMatthew Dillon speedup_syncer(struct mount *mp)
5625fd012e0SMatthew Dillon {
563344ad853SMatthew Dillon /*
564344ad853SMatthew Dillon * Don't bother protecting the test. unsleep_and_wakeup_thread()
565344ad853SMatthew Dillon * will only do something real if the thread is in the right state.
566344ad853SMatthew Dillon */
567145eb524SMatthew Dillon atomic_add_int(&rushjob, 1);
568cf6a53caSMatthew Dillon ++stat_rush_requests;
5691c222fafSMatthew Dillon if (mp && mp->mnt_syncer_ctx)
570cf6a53caSMatthew Dillon wakeup(mp->mnt_syncer_ctx);
5715fd012e0SMatthew Dillon }
5725fd012e0SMatthew Dillon
5735fd012e0SMatthew Dillon /*
574*5479a2c1SMatthew Dillon * Force continuous full syncs until stopped. This may be used by
575*5479a2c1SMatthew Dillon * filesystems waiting on dirty data to be flushed to avoid syncer/tsleep
576*5479a2c1SMatthew Dillon * races.
577*5479a2c1SMatthew Dillon */
578*5479a2c1SMatthew Dillon void
trigger_syncer_start(struct mount * mp)579*5479a2c1SMatthew Dillon trigger_syncer_start(struct mount *mp)
580*5479a2c1SMatthew Dillon {
581*5479a2c1SMatthew Dillon struct syncer_ctx *ctx;
582*5479a2c1SMatthew Dillon
583*5479a2c1SMatthew Dillon if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
584*5479a2c1SMatthew Dillon if (atomic_fetchadd_int(&ctx->syncer_trigger, 2) <= 1)
585*5479a2c1SMatthew Dillon wakeup(ctx);
586*5479a2c1SMatthew Dillon }
587*5479a2c1SMatthew Dillon }
588*5479a2c1SMatthew Dillon
589*5479a2c1SMatthew Dillon void
trigger_syncer_stop(struct mount * mp)590*5479a2c1SMatthew Dillon trigger_syncer_stop(struct mount *mp)
591*5479a2c1SMatthew Dillon {
592*5479a2c1SMatthew Dillon struct syncer_ctx *ctx;
593*5479a2c1SMatthew Dillon
594*5479a2c1SMatthew Dillon if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
595*5479a2c1SMatthew Dillon atomic_add_int(&ctx->syncer_trigger, -2);
596*5479a2c1SMatthew Dillon }
597*5479a2c1SMatthew Dillon }
598*5479a2c1SMatthew Dillon
599*5479a2c1SMatthew Dillon /*
600*5479a2c1SMatthew Dillon * trigger a full sync (auto-reset)
6011c222fafSMatthew Dillon */
6021c222fafSMatthew Dillon void
trigger_syncer(struct mount * mp)6031c222fafSMatthew Dillon trigger_syncer(struct mount *mp)
6041c222fafSMatthew Dillon {
6051c222fafSMatthew Dillon struct syncer_ctx *ctx;
6061c222fafSMatthew Dillon
6071c222fafSMatthew Dillon if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
608*5479a2c1SMatthew Dillon if ((ctx->syncer_trigger & 1) == 0) {
609*5479a2c1SMatthew Dillon atomic_set_int(&ctx->syncer_trigger, 1);
6101c222fafSMatthew Dillon wakeup(ctx);
6111c222fafSMatthew Dillon }
6121c222fafSMatthew Dillon }
6131c222fafSMatthew Dillon }
6141c222fafSMatthew Dillon
6151c222fafSMatthew Dillon /*
6165fd012e0SMatthew Dillon * Routine to create and manage a filesystem syncer vnode.
6175fd012e0SMatthew Dillon */
618fef8985eSMatthew Dillon static int sync_close(struct vop_close_args *);
6195fd012e0SMatthew Dillon static int sync_fsync(struct vop_fsync_args *);
6205fd012e0SMatthew Dillon static int sync_inactive(struct vop_inactive_args *);
6215fd012e0SMatthew Dillon static int sync_reclaim (struct vop_reclaim_args *);
6225fd012e0SMatthew Dillon static int sync_print(struct vop_print_args *);
6235fd012e0SMatthew Dillon
62466a1ddf5SMatthew Dillon static struct vop_ops sync_vnode_vops = {
62566a1ddf5SMatthew Dillon .vop_default = vop_eopnotsupp,
62666a1ddf5SMatthew Dillon .vop_close = sync_close,
62766a1ddf5SMatthew Dillon .vop_fsync = sync_fsync,
62866a1ddf5SMatthew Dillon .vop_inactive = sync_inactive,
62966a1ddf5SMatthew Dillon .vop_reclaim = sync_reclaim,
63066a1ddf5SMatthew Dillon .vop_print = sync_print,
6315fd012e0SMatthew Dillon };
6325fd012e0SMatthew Dillon
63366a1ddf5SMatthew Dillon static struct vop_ops *sync_vnode_vops_p = &sync_vnode_vops;
6345fd012e0SMatthew Dillon
63566a1ddf5SMatthew Dillon VNODEOP_SET(sync_vnode_vops);
6365fd012e0SMatthew Dillon
6375fd012e0SMatthew Dillon /*
6385fd012e0SMatthew Dillon * Create a new filesystem syncer vnode for the specified mount point.
6395fd012e0SMatthew Dillon * This vnode is placed on the worklist and is responsible for sync'ing
6405fd012e0SMatthew Dillon * the filesystem.
6415fd012e0SMatthew Dillon *
6425fd012e0SMatthew Dillon * NOTE: read-only mounts are also placed on the worklist. The filesystem
6435fd012e0SMatthew Dillon * sync code is also responsible for cleaning up vnodes.
6445fd012e0SMatthew Dillon */
6455fd012e0SMatthew Dillon int
vfs_allocate_syncvnode(struct mount * mp)6465fd012e0SMatthew Dillon vfs_allocate_syncvnode(struct mount *mp)
6475fd012e0SMatthew Dillon {
6485fd012e0SMatthew Dillon struct vnode *vp;
6495fd012e0SMatthew Dillon static long start, incr, next;
6505fd012e0SMatthew Dillon int error;
6515fd012e0SMatthew Dillon
6525fd012e0SMatthew Dillon /* Allocate a new vnode */
65366a1ddf5SMatthew Dillon error = getspecialvnode(VT_VFS, mp, &sync_vnode_vops_p, &vp, 0, 0);
6545fd012e0SMatthew Dillon if (error) {
6555fd012e0SMatthew Dillon mp->mnt_syncer = NULL;
6565fd012e0SMatthew Dillon return (error);
6575fd012e0SMatthew Dillon }
6585fd012e0SMatthew Dillon vp->v_type = VNON;
6595fd012e0SMatthew Dillon /*
6605fd012e0SMatthew Dillon * Place the vnode onto the syncer worklist. We attempt to
6615fd012e0SMatthew Dillon * scatter them about on the list so that they will go off
6625fd012e0SMatthew Dillon * at evenly distributed times even if all the filesystems
6635fd012e0SMatthew Dillon * are mounted at once.
6645fd012e0SMatthew Dillon */
6655fd012e0SMatthew Dillon next += incr;
666bf9f24c1SMatthew Dillon if (next == 0 || next > SYNCER_MAXDELAY) {
6675fd012e0SMatthew Dillon start /= 2;
6685fd012e0SMatthew Dillon incr /= 2;
6695fd012e0SMatthew Dillon if (start == 0) {
670bf9f24c1SMatthew Dillon start = SYNCER_MAXDELAY / 2;
671bf9f24c1SMatthew Dillon incr = SYNCER_MAXDELAY;
6725fd012e0SMatthew Dillon }
6735fd012e0SMatthew Dillon next = start;
6745fd012e0SMatthew Dillon }
67565e98405SMatthew Dillon
67665e98405SMatthew Dillon /*
67765e98405SMatthew Dillon * Only put the syncer vnode onto the syncer list if we have a
67865e98405SMatthew Dillon * syncer thread. Some VFS's (aka NULLFS) don't need a syncer
67965e98405SMatthew Dillon * thread.
68065e98405SMatthew Dillon */
68165e98405SMatthew Dillon if (mp->mnt_syncer_ctx)
68277912481SMatthew Dillon vn_syncer_add(vp, syncdelay > 0 ? next % syncdelay : 0);
6832ec4b00dSMatthew Dillon
6842ec4b00dSMatthew Dillon /*
6852ec4b00dSMatthew Dillon * The mnt_syncer field inherits the vnode reference, which is
6862ec4b00dSMatthew Dillon * held until later decomissioning.
6872ec4b00dSMatthew Dillon */
6885fd012e0SMatthew Dillon mp->mnt_syncer = vp;
6895fd012e0SMatthew Dillon vx_unlock(vp);
6905fd012e0SMatthew Dillon return (0);
6915fd012e0SMatthew Dillon }
6925fd012e0SMatthew Dillon
693fef8985eSMatthew Dillon static int
sync_close(struct vop_close_args * ap)694fef8985eSMatthew Dillon sync_close(struct vop_close_args *ap)
695fef8985eSMatthew Dillon {
696fef8985eSMatthew Dillon return (0);
697fef8985eSMatthew Dillon }
698fef8985eSMatthew Dillon
6995fd012e0SMatthew Dillon /*
7005fd012e0SMatthew Dillon * Do a lazy sync of the filesystem.
7015fd012e0SMatthew Dillon *
702b478fdceSSascha Wildner * sync_fsync { struct vnode *a_vp, int a_waitfor }
7035fd012e0SMatthew Dillon */
7045fd012e0SMatthew Dillon static int
sync_fsync(struct vop_fsync_args * ap)7055fd012e0SMatthew Dillon sync_fsync(struct vop_fsync_args *ap)
7065fd012e0SMatthew Dillon {
7075fd012e0SMatthew Dillon struct vnode *syncvp = ap->a_vp;
7085fd012e0SMatthew Dillon struct mount *mp = syncvp->v_mount;
7095fd012e0SMatthew Dillon int asyncflag;
7105fd012e0SMatthew Dillon
7115fd012e0SMatthew Dillon /*
7125fd012e0SMatthew Dillon * We only need to do something if this is a lazy evaluation.
7135fd012e0SMatthew Dillon */
71428271622SMatthew Dillon if ((ap->a_waitfor & MNT_LAZY) == 0)
7155fd012e0SMatthew Dillon return (0);
7165fd012e0SMatthew Dillon
7175fd012e0SMatthew Dillon /*
7185fd012e0SMatthew Dillon * Move ourselves to the back of the sync list.
7195fd012e0SMatthew Dillon */
72077912481SMatthew Dillon vn_syncer_add(syncvp, syncdelay);
7215fd012e0SMatthew Dillon
7225fd012e0SMatthew Dillon /*
7235fd012e0SMatthew Dillon * Walk the list of vnodes pushing all that are dirty and
7245fd012e0SMatthew Dillon * not already on the sync list, and freeing vnodes which have
7255fd012e0SMatthew Dillon * no refs and whos VM objects are empty. vfs_msync() handles
7265fd012e0SMatthew Dillon * the VM issues and must be called whether the mount is readonly
7275fd012e0SMatthew Dillon * or not.
7285fd012e0SMatthew Dillon */
729f9642f56SMatthew Dillon if (vfs_busy(mp, LK_NOWAIT) != 0)
7305fd012e0SMatthew Dillon return (0);
7315fd012e0SMatthew Dillon if (mp->mnt_flag & MNT_RDONLY) {
7325fd012e0SMatthew Dillon vfs_msync(mp, MNT_NOWAIT);
7335fd012e0SMatthew Dillon } else {
7345fd012e0SMatthew Dillon asyncflag = mp->mnt_flag & MNT_ASYNC;
7355fd012e0SMatthew Dillon mp->mnt_flag &= ~MNT_ASYNC; /* ZZZ hack */
7365fd012e0SMatthew Dillon vfs_msync(mp, MNT_NOWAIT);
73728271622SMatthew Dillon VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY);
7385fd012e0SMatthew Dillon if (asyncflag)
7395fd012e0SMatthew Dillon mp->mnt_flag |= MNT_ASYNC;
7405fd012e0SMatthew Dillon }
741f9642f56SMatthew Dillon vfs_unbusy(mp);
7425fd012e0SMatthew Dillon return (0);
7435fd012e0SMatthew Dillon }
7445fd012e0SMatthew Dillon
7455fd012e0SMatthew Dillon /*
7463c37c940SMatthew Dillon * The syncer vnode is no longer referenced.
7475fd012e0SMatthew Dillon *
7485fd012e0SMatthew Dillon * sync_inactive { struct vnode *a_vp, struct proc *a_p }
7495fd012e0SMatthew Dillon */
7505fd012e0SMatthew Dillon static int
sync_inactive(struct vop_inactive_args * ap)7515fd012e0SMatthew Dillon sync_inactive(struct vop_inactive_args *ap)
7525fd012e0SMatthew Dillon {
7533c37c940SMatthew Dillon vgone_vxlocked(ap->a_vp);
7545fd012e0SMatthew Dillon return (0);
7555fd012e0SMatthew Dillon }
7565fd012e0SMatthew Dillon
7575fd012e0SMatthew Dillon /*
7585fd012e0SMatthew Dillon * The syncer vnode is no longer needed and is being decommissioned.
7592ec4b00dSMatthew Dillon * This can only occur when the last reference has been released on
7602ec4b00dSMatthew Dillon * mp->mnt_syncer, so mp->mnt_syncer had better be NULL.
7615fd012e0SMatthew Dillon *
762e43a034fSMatthew Dillon * Modifications to the worklist must be protected with a critical
763e43a034fSMatthew Dillon * section.
7645fd012e0SMatthew Dillon *
7655fd012e0SMatthew Dillon * sync_reclaim { struct vnode *a_vp }
7665fd012e0SMatthew Dillon */
7675fd012e0SMatthew Dillon static int
sync_reclaim(struct vop_reclaim_args * ap)7685fd012e0SMatthew Dillon sync_reclaim(struct vop_reclaim_args *ap)
7695fd012e0SMatthew Dillon {
7705fd012e0SMatthew Dillon struct vnode *vp = ap->a_vp;
77150e4012aSVenkatesh Srinivas struct syncer_ctx *ctx;
7725fd012e0SMatthew Dillon
773cf6a53caSMatthew Dillon ctx = vp->v_mount->mnt_syncer_ctx;
774d83c6244SMatthew Dillon if (ctx) {
77550e4012aSVenkatesh Srinivas lwkt_gettoken(&ctx->sc_token);
7762ec4b00dSMatthew Dillon KKASSERT(vp->v_mount->mnt_syncer != vp);
7775fd012e0SMatthew Dillon if (vp->v_flag & VONWORKLST) {
7785fd012e0SMatthew Dillon LIST_REMOVE(vp, v_synclist);
7792247fe02SMatthew Dillon vclrflags(vp, VONWORKLST);
78055a5a1baSMatthew Dillon --ctx->syncer_count;
7815fd012e0SMatthew Dillon }
78250e4012aSVenkatesh Srinivas lwkt_reltoken(&ctx->sc_token);
783d83c6244SMatthew Dillon } else {
784d83c6244SMatthew Dillon KKASSERT((vp->v_flag & VONWORKLST) == 0);
785d83c6244SMatthew Dillon }
7865fd012e0SMatthew Dillon
7875fd012e0SMatthew Dillon return (0);
7885fd012e0SMatthew Dillon }
7895fd012e0SMatthew Dillon
7905fd012e0SMatthew Dillon /*
791bf9f24c1SMatthew Dillon * This is very similar to vmntvnodescan() but it only scans the
792bf9f24c1SMatthew Dillon * vnodes on the syncer list. VFS's which support faster VFS_SYNC
793bf9f24c1SMatthew Dillon * operations use the VISDIRTY flag on the vnode to ensure that vnodes
794bf9f24c1SMatthew Dillon * with dirty inodes are added to the syncer in addition to vnodes
795bf9f24c1SMatthew Dillon * with dirty buffers, and can use this function instead of nmntvnodescan().
796bf9f24c1SMatthew Dillon *
79749d97f8bSMatthew Dillon * This scan does not issue VOP_FSYNC()s. The supplied callback is intended
79849d97f8bSMatthew Dillon * to synchronize the file in the manner intended by the VFS using it.
79949d97f8bSMatthew Dillon *
800bf9f24c1SMatthew Dillon * This is important when a system has millions of vnodes.
801bf9f24c1SMatthew Dillon */
802bf9f24c1SMatthew Dillon int
vsyncscan(struct mount * mp,int vmsc_flags,int (* slowfunc)(struct mount * mp,struct vnode * vp,void * data),void * data)803bf9f24c1SMatthew Dillon vsyncscan(
804bf9f24c1SMatthew Dillon struct mount *mp,
805bf9f24c1SMatthew Dillon int vmsc_flags,
806bf9f24c1SMatthew Dillon int (*slowfunc)(struct mount *mp, struct vnode *vp, void *data),
807bf9f24c1SMatthew Dillon void *data
808bf9f24c1SMatthew Dillon ) {
809bf9f24c1SMatthew Dillon struct syncer_ctx *ctx;
810bf9f24c1SMatthew Dillon struct synclist *slp;
811bf9f24c1SMatthew Dillon struct vnode *vp;
812bf9f24c1SMatthew Dillon int i;
813ffd3e597SMatthew Dillon int count;
814bf9f24c1SMatthew Dillon int lkflags;
815bf9f24c1SMatthew Dillon
816bf9f24c1SMatthew Dillon if (vmsc_flags & VMSC_NOWAIT)
817bf9f24c1SMatthew Dillon lkflags = LK_NOWAIT;
818bf9f24c1SMatthew Dillon else
819bf9f24c1SMatthew Dillon lkflags = 0;
820bf9f24c1SMatthew Dillon
821bf9f24c1SMatthew Dillon /*
822972eaa03SMatthew Dillon * Syncer list context. This API requires a dedicated syncer thread.
823972eaa03SMatthew Dillon * (MNTK_THR_SYNC).
824bf9f24c1SMatthew Dillon */
825972eaa03SMatthew Dillon KKASSERT(mp->mnt_kern_flag & MNTK_THR_SYNC);
826bf9f24c1SMatthew Dillon ctx = mp->mnt_syncer_ctx;
827bf9f24c1SMatthew Dillon lwkt_gettoken(&ctx->sc_token);
828bf9f24c1SMatthew Dillon
829bf9f24c1SMatthew Dillon /*
830bf9f24c1SMatthew Dillon * Setup for loop. Allow races against the syncer thread but
831bf9f24c1SMatthew Dillon * require that the syncer thread no be lazy if we were told
832bf9f24c1SMatthew Dillon * not to be lazy.
833bf9f24c1SMatthew Dillon */
834ffd3e597SMatthew Dillon i = ctx->syncer_delayno & ctx->syncer_mask;
835bf9f24c1SMatthew Dillon if ((vmsc_flags & VMSC_NOWAIT) == 0)
836bf9f24c1SMatthew Dillon ++ctx->syncer_forced;
837ffd3e597SMatthew Dillon for (count = 0; count <= ctx->syncer_mask; ++count) {
838bf9f24c1SMatthew Dillon slp = &ctx->syncer_workitem_pending[i];
839bf9f24c1SMatthew Dillon
840bf9f24c1SMatthew Dillon while ((vp = LIST_FIRST(slp)) != NULL) {
841972eaa03SMatthew Dillon KKASSERT(vp->v_mount == mp);
842fd2da346SMatthew Dillon if (vmsc_flags & VMSC_GETVP) {
843bf9f24c1SMatthew Dillon if (vget(vp, LK_EXCLUSIVE | lkflags) == 0) {
844bf9f24c1SMatthew Dillon slowfunc(mp, vp, data);
845bf9f24c1SMatthew Dillon vput(vp);
846bf9f24c1SMatthew Dillon }
847fd2da346SMatthew Dillon } else if (vmsc_flags & VMSC_GETVX) {
848fd2da346SMatthew Dillon vx_get(vp);
849fd2da346SMatthew Dillon slowfunc(mp, vp, data);
850fd2da346SMatthew Dillon vx_put(vp);
851fd2da346SMatthew Dillon } else {
852fd2da346SMatthew Dillon vhold(vp);
853fd2da346SMatthew Dillon slowfunc(mp, vp, data);
854fd2da346SMatthew Dillon vdrop(vp);
855fd2da346SMatthew Dillon }
856ffd3e597SMatthew Dillon
857ffd3e597SMatthew Dillon /*
858ffd3e597SMatthew Dillon * vp could be invalid. However, if vp is still at
859ffd3e597SMatthew Dillon * the head of the list it is clearly valid and we
860ffd3e597SMatthew Dillon * can safely move it.
861ffd3e597SMatthew Dillon */
862bf9f24c1SMatthew Dillon if (LIST_FIRST(slp) == vp)
863bf9f24c1SMatthew Dillon vn_syncer_add(vp, -(i + syncdelay));
864bf9f24c1SMatthew Dillon }
865bf9f24c1SMatthew Dillon i = (i + 1) & ctx->syncer_mask;
866ffd3e597SMatthew Dillon }
867bf9f24c1SMatthew Dillon
868bf9f24c1SMatthew Dillon if ((vmsc_flags & VMSC_NOWAIT) == 0)
869bf9f24c1SMatthew Dillon --ctx->syncer_forced;
870bf9f24c1SMatthew Dillon lwkt_reltoken(&ctx->sc_token);
871bf9f24c1SMatthew Dillon return(0);
872bf9f24c1SMatthew Dillon }
873bf9f24c1SMatthew Dillon
874bf9f24c1SMatthew Dillon /*
8755fd012e0SMatthew Dillon * Print out a syncer vnode.
8765fd012e0SMatthew Dillon *
8775fd012e0SMatthew Dillon * sync_print { struct vnode *a_vp }
8785fd012e0SMatthew Dillon */
8795fd012e0SMatthew Dillon static int
sync_print(struct vop_print_args * ap)8805fd012e0SMatthew Dillon sync_print(struct vop_print_args *ap)
8815fd012e0SMatthew Dillon {
8825fd012e0SMatthew Dillon struct vnode *vp = ap->a_vp;
8835fd012e0SMatthew Dillon
8846ea70f76SSascha Wildner kprintf("syncer vnode");
8855fd012e0SMatthew Dillon lockmgr_printinfo(&vp->v_lock);
8866ea70f76SSascha Wildner kprintf("\n");
8875fd012e0SMatthew Dillon return (0);
8885fd012e0SMatthew Dillon }
8895fd012e0SMatthew Dillon
890