xref: /dflybsd-src/sys/kern/vfs_sync.c (revision 5479a2c19f7119cb8caa4203d874e9ceb2e13451)
15fd012e0SMatthew Dillon /*
25fd012e0SMatthew Dillon  * Copyright (c) 1989, 1993
35fd012e0SMatthew Dillon  *	The Regents of the University of California.  All rights reserved.
45fd012e0SMatthew Dillon  * (c) UNIX System Laboratories, Inc.
55fd012e0SMatthew Dillon  * All or some portions of this file are derived from material licensed
65fd012e0SMatthew Dillon  * to the University of California by American Telephone and Telegraph
75fd012e0SMatthew Dillon  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
85fd012e0SMatthew Dillon  * the permission of UNIX System Laboratories, Inc.
95fd012e0SMatthew Dillon  *
105fd012e0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
115fd012e0SMatthew Dillon  * modification, are permitted provided that the following conditions
125fd012e0SMatthew Dillon  * are met:
135fd012e0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
145fd012e0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
155fd012e0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
165fd012e0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in the
175fd012e0SMatthew Dillon  *    documentation and/or other materials provided with the distribution.
18dc71b7abSJustin C. Sherrill  * 3. Neither the name of the University nor the names of its contributors
195fd012e0SMatthew Dillon  *    may be used to endorse or promote products derived from this software
205fd012e0SMatthew Dillon  *    without specific prior written permission.
215fd012e0SMatthew Dillon  *
225fd012e0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
235fd012e0SMatthew Dillon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
245fd012e0SMatthew Dillon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
255fd012e0SMatthew Dillon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
265fd012e0SMatthew Dillon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
275fd012e0SMatthew Dillon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
285fd012e0SMatthew Dillon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
295fd012e0SMatthew Dillon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
305fd012e0SMatthew Dillon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
315fd012e0SMatthew Dillon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
325fd012e0SMatthew Dillon  * SUCH DAMAGE.
335fd012e0SMatthew Dillon  *
345fd012e0SMatthew Dillon  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
355fd012e0SMatthew Dillon  * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $
365fd012e0SMatthew Dillon  */
375fd012e0SMatthew Dillon 
385fd012e0SMatthew Dillon /*
395fd012e0SMatthew Dillon  * External virtual filesystem routines
405fd012e0SMatthew Dillon  */
415fd012e0SMatthew Dillon 
425fd012e0SMatthew Dillon #include <sys/param.h>
435fd012e0SMatthew Dillon #include <sys/systm.h>
445fd012e0SMatthew Dillon #include <sys/buf.h>
455fd012e0SMatthew Dillon #include <sys/conf.h>
465fd012e0SMatthew Dillon #include <sys/dirent.h>
475fd012e0SMatthew Dillon #include <sys/domain.h>
485fd012e0SMatthew Dillon #include <sys/eventhandler.h>
495fd012e0SMatthew Dillon #include <sys/fcntl.h>
505fd012e0SMatthew Dillon #include <sys/kernel.h>
515fd012e0SMatthew Dillon #include <sys/kthread.h>
525fd012e0SMatthew Dillon #include <sys/malloc.h>
535fd012e0SMatthew Dillon #include <sys/mbuf.h>
545fd012e0SMatthew Dillon #include <sys/mount.h>
555fd012e0SMatthew Dillon #include <sys/proc.h>
565fd012e0SMatthew Dillon #include <sys/reboot.h>
575fd012e0SMatthew Dillon #include <sys/socket.h>
585fd012e0SMatthew Dillon #include <sys/stat.h>
595fd012e0SMatthew Dillon #include <sys/sysctl.h>
605fd012e0SMatthew Dillon #include <sys/syslog.h>
615fd012e0SMatthew Dillon #include <sys/vmmeter.h>
625fd012e0SMatthew Dillon #include <sys/vnode.h>
635fd012e0SMatthew Dillon 
645fd012e0SMatthew Dillon #include <machine/limits.h>
655fd012e0SMatthew Dillon 
665fd012e0SMatthew Dillon #include <vm/vm.h>
675fd012e0SMatthew Dillon #include <vm/vm_object.h>
685fd012e0SMatthew Dillon #include <vm/vm_extern.h>
695fd012e0SMatthew Dillon #include <vm/vm_kern.h>
705fd012e0SMatthew Dillon #include <vm/pmap.h>
715fd012e0SMatthew Dillon #include <vm/vm_map.h>
725fd012e0SMatthew Dillon #include <vm/vm_page.h>
735fd012e0SMatthew Dillon #include <vm/vm_pager.h>
745fd012e0SMatthew Dillon #include <vm/vnode_pager.h>
755fd012e0SMatthew Dillon 
765fd012e0SMatthew Dillon #include <sys/buf2.h>
775fd012e0SMatthew Dillon 
785fd012e0SMatthew Dillon /*
795fd012e0SMatthew Dillon  * The workitem queue.
805fd012e0SMatthew Dillon  */
815fd012e0SMatthew Dillon #define SYNCER_MAXDELAY		32
82bf9f24c1SMatthew Dillon static int sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS);
835fd012e0SMatthew Dillon time_t syncdelay = 30;		/* max time to delay syncing data */
84bf9f24c1SMatthew Dillon SYSCTL_PROC(_kern, OID_AUTO, syncdelay, CTLTYPE_INT | CTLFLAG_RW, 0, 0,
85bf9f24c1SMatthew Dillon 		sysctl_kern_syncdelay, "I", "VFS data synchronization delay");
865fd012e0SMatthew Dillon time_t filedelay = 30;		/* time to delay syncing files */
875fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW,
885fd012e0SMatthew Dillon 		&filedelay, 0, "File synchronization delay");
895fd012e0SMatthew Dillon time_t dirdelay = 29;		/* time to delay syncing directories */
905fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW,
915fd012e0SMatthew Dillon 		&dirdelay, 0, "Directory synchronization delay");
925fd012e0SMatthew Dillon time_t metadelay = 28;		/* time to delay syncing metadata */
935fd012e0SMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW,
945fd012e0SMatthew Dillon 		&metadelay, 0, "VFS metadata synchronization delay");
9555a5a1baSMatthew Dillon time_t retrydelay = 1;		/* retry delay after failure */
9655a5a1baSMatthew Dillon SYSCTL_INT(_kern, OID_AUTO, retrydelay, CTLFLAG_RW,
9755a5a1baSMatthew Dillon 		&retrydelay, 0, "VFS retry synchronization delay");
985fd012e0SMatthew Dillon static int rushjob;			/* number of slots to run ASAP */
995fd012e0SMatthew Dillon static int stat_rush_requests;	/* number of times I/O speeded up */
1005fd012e0SMatthew Dillon SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW,
1015fd012e0SMatthew Dillon 		&stat_rush_requests, 0, "");
1025fd012e0SMatthew Dillon 
1035fd012e0SMatthew Dillon LIST_HEAD(synclist, vnode);
10450e4012aSVenkatesh Srinivas 
10550e4012aSVenkatesh Srinivas #define	SC_FLAG_EXIT		(0x1)		/* request syncer exit */
10650e4012aSVenkatesh Srinivas #define	SC_FLAG_DONE		(0x2)		/* syncer confirm exit */
10750e4012aSVenkatesh Srinivas 
10850e4012aSVenkatesh Srinivas struct syncer_ctx {
10950e4012aSVenkatesh Srinivas 	struct mount		*sc_mp;
11050e4012aSVenkatesh Srinivas 	struct lwkt_token 	sc_token;
11150e4012aSVenkatesh Srinivas 	struct thread		*sc_thread;
11250e4012aSVenkatesh Srinivas 	int			sc_flags;
11350e4012aSVenkatesh Srinivas 	struct synclist 	*syncer_workitem_pending;
11450e4012aSVenkatesh Srinivas 	long			syncer_mask;
11550e4012aSVenkatesh Srinivas 	int 			syncer_delayno;
116bf9f24c1SMatthew Dillon 	int			syncer_forced;
1171c222fafSMatthew Dillon 	int			syncer_rushjob;	/* sequence vnodes faster */
1181c222fafSMatthew Dillon 	int			syncer_trigger;	/* trigger full sync */
11955a5a1baSMatthew Dillon 	long			syncer_count;
12050e4012aSVenkatesh Srinivas };
12150e4012aSVenkatesh Srinivas 
12250e4012aSVenkatesh Srinivas static void syncer_thread(void *);
12350e4012aSVenkatesh Srinivas 
124bf9f24c1SMatthew Dillon static int
sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS)125bf9f24c1SMatthew Dillon sysctl_kern_syncdelay(SYSCTL_HANDLER_ARGS)
126bf9f24c1SMatthew Dillon {
127bf9f24c1SMatthew Dillon 	int error;
128bf9f24c1SMatthew Dillon 	int v = syncdelay;
129bf9f24c1SMatthew Dillon 
130bf9f24c1SMatthew Dillon 	error = sysctl_handle_int(oidp, &v, 0, req);
131bf9f24c1SMatthew Dillon 	if (error || !req->newptr)
132bf9f24c1SMatthew Dillon 		return (error);
133bf9f24c1SMatthew Dillon 	if (v < 1)
134bf9f24c1SMatthew Dillon 		v = 1;
135bf9f24c1SMatthew Dillon 	if (v > SYNCER_MAXDELAY)
136bf9f24c1SMatthew Dillon 		v = SYNCER_MAXDELAY;
137bf9f24c1SMatthew Dillon 	syncdelay = v;
138bf9f24c1SMatthew Dillon 
139bf9f24c1SMatthew Dillon 	return(0);
140bf9f24c1SMatthew Dillon }
141bf9f24c1SMatthew Dillon 
1425fd012e0SMatthew Dillon /*
1435fd012e0SMatthew Dillon  * The workitem queue.
1445fd012e0SMatthew Dillon  *
1455fd012e0SMatthew Dillon  * It is useful to delay writes of file data and filesystem metadata
1465fd012e0SMatthew Dillon  * for tens of seconds so that quickly created and deleted files need
1475fd012e0SMatthew Dillon  * not waste disk bandwidth being created and removed. To realize this,
1485fd012e0SMatthew Dillon  * we append vnodes to a "workitem" queue. When running with a soft
1495fd012e0SMatthew Dillon  * updates implementation, most pending metadata dependencies should
1505fd012e0SMatthew Dillon  * not wait for more than a few seconds. Thus, mounted on block devices
1515fd012e0SMatthew Dillon  * are delayed only about a half the time that file data is delayed.
1525fd012e0SMatthew Dillon  * Similarly, directory updates are more critical, so are only delayed
1535fd012e0SMatthew Dillon  * about a third the time that file data is delayed. Thus, there are
1545fd012e0SMatthew Dillon  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
1555fd012e0SMatthew Dillon  * one each second (driven off the filesystem syncer process). The
1565fd012e0SMatthew Dillon  * syncer_delayno variable indicates the next queue that is to be processed.
1575fd012e0SMatthew Dillon  * Items that need to be processed soon are placed in this queue:
1585fd012e0SMatthew Dillon  *
1595fd012e0SMatthew Dillon  *	syncer_workitem_pending[syncer_delayno]
1605fd012e0SMatthew Dillon  *
1615fd012e0SMatthew Dillon  * A delay of fifteen seconds is done by placing the request fifteen
1625fd012e0SMatthew Dillon  * entries later in the queue:
1635fd012e0SMatthew Dillon  *
1645fd012e0SMatthew Dillon  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
1655fd012e0SMatthew Dillon  *
1665fd012e0SMatthew Dillon  */
1675fd012e0SMatthew Dillon 
1685fd012e0SMatthew Dillon /*
16955a5a1baSMatthew Dillon  * Return the number of vnodes on the syncer's timed list.  This will
17055a5a1baSMatthew Dillon  * include the syncer vnode (mp->mnt_syncer) so if used, a minimum
17155a5a1baSMatthew Dillon  * value of 1 will be returned.
17255a5a1baSMatthew Dillon  */
17355a5a1baSMatthew Dillon long
vn_syncer_count(struct mount * mp)17455a5a1baSMatthew Dillon vn_syncer_count(struct mount *mp)
17555a5a1baSMatthew Dillon {
17655a5a1baSMatthew Dillon 	struct syncer_ctx *ctx;
17755a5a1baSMatthew Dillon 
17855a5a1baSMatthew Dillon 	ctx = mp->mnt_syncer_ctx;
17955a5a1baSMatthew Dillon 	if (ctx)
18055a5a1baSMatthew Dillon 		return (ctx->syncer_count);
18155a5a1baSMatthew Dillon 	return 0;
18255a5a1baSMatthew Dillon }
18355a5a1baSMatthew Dillon 
18455a5a1baSMatthew Dillon /*
1855fd012e0SMatthew Dillon  * Add an item to the syncer work queue.
186b1c20cfaSMatthew Dillon  *
18777912481SMatthew Dillon  * WARNING: Cannot get vp->v_token here if not already held, we must
18877912481SMatthew Dillon  *	    depend on the syncer_token (which might already be held by
18977912481SMatthew Dillon  *	    the caller) to protect v_synclist and VONWORKLST.
19077912481SMatthew Dillon  *
1911c222fafSMatthew Dillon  * WARNING: The syncer depends on this function not blocking if the caller
1921c222fafSMatthew Dillon  *	    already holds the syncer token.
1935fd012e0SMatthew Dillon  */
1945fd012e0SMatthew Dillon void
vn_syncer_add(struct vnode * vp,int delay)19577912481SMatthew Dillon vn_syncer_add(struct vnode *vp, int delay)
1965fd012e0SMatthew Dillon {
19750e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx;
1985fd012e0SMatthew Dillon 	int slot;
1995fd012e0SMatthew Dillon 
200cf6a53caSMatthew Dillon 	ctx = vp->v_mount->mnt_syncer_ctx;
20150e4012aSVenkatesh Srinivas 	lwkt_gettoken(&ctx->sc_token);
2025fd012e0SMatthew Dillon 
20355a5a1baSMatthew Dillon 	if (vp->v_flag & VONWORKLST) {
2045fd012e0SMatthew Dillon 		LIST_REMOVE(vp, v_synclist);
20555a5a1baSMatthew Dillon 		--ctx->syncer_count;
20655a5a1baSMatthew Dillon 	}
207bf9f24c1SMatthew Dillon 	if (delay <= 0) {
208bf9f24c1SMatthew Dillon 		slot = -delay & ctx->syncer_mask;
209bf9f24c1SMatthew Dillon 	} else {
210bf9f24c1SMatthew Dillon 		if (delay > SYNCER_MAXDELAY - 2)
211bf9f24c1SMatthew Dillon 			delay = SYNCER_MAXDELAY - 2;
21250e4012aSVenkatesh Srinivas 		slot = (ctx->syncer_delayno + delay) & ctx->syncer_mask;
213bf9f24c1SMatthew Dillon 	}
2145fd012e0SMatthew Dillon 
21550e4012aSVenkatesh Srinivas 	LIST_INSERT_HEAD(&ctx->syncer_workitem_pending[slot], vp, v_synclist);
2162247fe02SMatthew Dillon 	vsetflags(vp, VONWORKLST);
21755a5a1baSMatthew Dillon 	++ctx->syncer_count;
2180202303bSMatthew Dillon 
21950e4012aSVenkatesh Srinivas 	lwkt_reltoken(&ctx->sc_token);
2205fd012e0SMatthew Dillon }
2215fd012e0SMatthew Dillon 
22277912481SMatthew Dillon /*
22377912481SMatthew Dillon  * Removes the vnode from the syncer list.  Since we might block while
224ffd3e597SMatthew Dillon  * acquiring the syncer_token we have to [re]check conditions to determine
225ffd3e597SMatthew Dillon  * that it is ok to remove the vnode.
22677912481SMatthew Dillon  *
227f4428f2fSMatthew Dillon  * Force removal if force != 0.  This can only occur during a forced unmount.
228f4428f2fSMatthew Dillon  *
22977912481SMatthew Dillon  * vp->v_token held on call
23077912481SMatthew Dillon  */
23177912481SMatthew Dillon void
vn_syncer_remove(struct vnode * vp,int force)232f4428f2fSMatthew Dillon vn_syncer_remove(struct vnode *vp, int force)
23377912481SMatthew Dillon {
23450e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx;
23550e4012aSVenkatesh Srinivas 
236cf6a53caSMatthew Dillon 	ctx = vp->v_mount->mnt_syncer_ctx;
23750e4012aSVenkatesh Srinivas 	lwkt_gettoken(&ctx->sc_token);
23877912481SMatthew Dillon 
239eddc656aSMatthew Dillon 	if ((vp->v_flag & (VISDIRTY | VONWORKLST | VOBJDIRTY)) == VONWORKLST &&
2400f79f6b2SMatthew Dillon 	    RB_EMPTY(&vp->v_rbdirty_tree)) {
24177912481SMatthew Dillon 		vclrflags(vp, VONWORKLST);
24277912481SMatthew Dillon 		LIST_REMOVE(vp, v_synclist);
24355a5a1baSMatthew Dillon 		--ctx->syncer_count;
244f4428f2fSMatthew Dillon 	} else if (force && (vp->v_flag & VONWORKLST)) {
245f4428f2fSMatthew Dillon 		vclrflags(vp, VONWORKLST);
246f4428f2fSMatthew Dillon 		LIST_REMOVE(vp, v_synclist);
24755a5a1baSMatthew Dillon 		--ctx->syncer_count;
24877912481SMatthew Dillon 	}
24977912481SMatthew Dillon 
25050e4012aSVenkatesh Srinivas 	lwkt_reltoken(&ctx->sc_token);
25150e4012aSVenkatesh Srinivas }
25250e4012aSVenkatesh Srinivas 
25350e4012aSVenkatesh Srinivas /*
2540f79f6b2SMatthew Dillon  * vnode must be locked
2550f79f6b2SMatthew Dillon  */
2560f79f6b2SMatthew Dillon void
vclrisdirty(struct vnode * vp)2570f79f6b2SMatthew Dillon vclrisdirty(struct vnode *vp)
2580f79f6b2SMatthew Dillon {
2590f79f6b2SMatthew Dillon 	vclrflags(vp, VISDIRTY);
2600f79f6b2SMatthew Dillon 	if (vp->v_flag & VONWORKLST)
261f4428f2fSMatthew Dillon 		vn_syncer_remove(vp, 0);
2620f79f6b2SMatthew Dillon }
2630f79f6b2SMatthew Dillon 
264fd2da346SMatthew Dillon void
vclrobjdirty(struct vnode * vp)265fd2da346SMatthew Dillon vclrobjdirty(struct vnode *vp)
266fd2da346SMatthew Dillon {
267fd2da346SMatthew Dillon 	vclrflags(vp, VOBJDIRTY);
268fd2da346SMatthew Dillon 	if (vp->v_flag & VONWORKLST)
269f4428f2fSMatthew Dillon 		vn_syncer_remove(vp, 0);
270fd2da346SMatthew Dillon }
271fd2da346SMatthew Dillon 
2720f79f6b2SMatthew Dillon /*
2730f79f6b2SMatthew Dillon  * vnode must be stable
2740f79f6b2SMatthew Dillon  */
2750f79f6b2SMatthew Dillon void
vsetisdirty(struct vnode * vp)2760f79f6b2SMatthew Dillon vsetisdirty(struct vnode *vp)
2770f79f6b2SMatthew Dillon {
278fd2da346SMatthew Dillon 	struct syncer_ctx *ctx;
279fd2da346SMatthew Dillon 
2800f79f6b2SMatthew Dillon 	if ((vp->v_flag & VISDIRTY) == 0) {
281cf6a53caSMatthew Dillon 		ctx = vp->v_mount->mnt_syncer_ctx;
2820f79f6b2SMatthew Dillon 		vsetflags(vp, VISDIRTY);
283fd2da346SMatthew Dillon 		lwkt_gettoken(&ctx->sc_token);
284fd2da346SMatthew Dillon 		if ((vp->v_flag & VONWORKLST) == 0)
2850f79f6b2SMatthew Dillon 			vn_syncer_add(vp, syncdelay);
286fd2da346SMatthew Dillon 		lwkt_reltoken(&ctx->sc_token);
287fd2da346SMatthew Dillon 	}
288fd2da346SMatthew Dillon }
289fd2da346SMatthew Dillon 
290fd2da346SMatthew Dillon void
vsetobjdirty(struct vnode * vp)291fd2da346SMatthew Dillon vsetobjdirty(struct vnode *vp)
292fd2da346SMatthew Dillon {
293fd2da346SMatthew Dillon 	struct syncer_ctx *ctx;
294fd2da346SMatthew Dillon 
295fd2da346SMatthew Dillon 	if ((vp->v_flag & VOBJDIRTY) == 0) {
296cf6a53caSMatthew Dillon 		ctx = vp->v_mount->mnt_syncer_ctx;
297fd2da346SMatthew Dillon 		vsetflags(vp, VOBJDIRTY);
298fd2da346SMatthew Dillon 		lwkt_gettoken(&ctx->sc_token);
299fd2da346SMatthew Dillon 		if ((vp->v_flag & VONWORKLST) == 0)
300fd2da346SMatthew Dillon 			vn_syncer_add(vp, syncdelay);
301fd2da346SMatthew Dillon 		lwkt_reltoken(&ctx->sc_token);
3020f79f6b2SMatthew Dillon 	}
3030f79f6b2SMatthew Dillon }
3040f79f6b2SMatthew Dillon 
3050f79f6b2SMatthew Dillon /*
30650e4012aSVenkatesh Srinivas  * Create per-filesystem syncer process
30750e4012aSVenkatesh Srinivas  */
30850e4012aSVenkatesh Srinivas void
vn_syncer_thr_create(struct mount * mp)30950e4012aSVenkatesh Srinivas vn_syncer_thr_create(struct mount *mp)
31050e4012aSVenkatesh Srinivas {
31150e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx;
31250e4012aSVenkatesh Srinivas 	static int syncalloc = 0;
31350e4012aSVenkatesh Srinivas 
314cf6a53caSMatthew Dillon 	ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK | M_ZERO);
315cf6a53caSMatthew Dillon 	ctx->sc_mp = mp;
316cf6a53caSMatthew Dillon 	ctx->sc_flags = 0;
317cf6a53caSMatthew Dillon 	ctx->syncer_workitem_pending = hashinit(SYNCER_MAXDELAY, M_DEVBUF,
318cf6a53caSMatthew Dillon 						&ctx->syncer_mask);
319cf6a53caSMatthew Dillon 	ctx->syncer_delayno = 0;
320cf6a53caSMatthew Dillon 	lwkt_token_init(&ctx->sc_token, "syncer");
32150e4012aSVenkatesh Srinivas 	mp->mnt_syncer_ctx = ctx;
322cf6a53caSMatthew Dillon 	kthread_create(syncer_thread, ctx, &ctx->sc_thread,
323cf6a53caSMatthew Dillon 		       "syncer%d", ++syncalloc & 0x7FFFFFFF);
3242f05c7ffSVenkatesh Srinivas }
3252f05c7ffSVenkatesh Srinivas 
32650e4012aSVenkatesh Srinivas /*
32750e4012aSVenkatesh Srinivas  * Stop per-filesystem syncer process
32850e4012aSVenkatesh Srinivas  */
32950e4012aSVenkatesh Srinivas void
vn_syncer_thr_stop(struct mount * mp)330bf9f24c1SMatthew Dillon vn_syncer_thr_stop(struct mount *mp)
33150e4012aSVenkatesh Srinivas {
33250e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx;
33350e4012aSVenkatesh Srinivas 
334bf9f24c1SMatthew Dillon 	ctx = mp->mnt_syncer_ctx;
335cf6a53caSMatthew Dillon 	if (ctx == NULL)
336bf9f24c1SMatthew Dillon 		return;
33750e4012aSVenkatesh Srinivas 
33850e4012aSVenkatesh Srinivas 	lwkt_gettoken(&ctx->sc_token);
33950e4012aSVenkatesh Srinivas 
34050e4012aSVenkatesh Srinivas 	/* Signal the syncer process to exit */
34150e4012aSVenkatesh Srinivas 	ctx->sc_flags |= SC_FLAG_EXIT;
34250e4012aSVenkatesh Srinivas 	wakeup(ctx);
34350e4012aSVenkatesh Srinivas 
34450e4012aSVenkatesh Srinivas 	/* Wait till syncer process exits */
34500369c4aSMatthew Dillon 	while ((ctx->sc_flags & SC_FLAG_DONE) == 0) {
34600369c4aSMatthew Dillon 		tsleep_interlock(&ctx->sc_flags, 0);
34700369c4aSMatthew Dillon 		lwkt_reltoken(&ctx->sc_token);
34800369c4aSMatthew Dillon 		tsleep(&ctx->sc_flags, PINTERLOCKED, "syncexit", hz);
34900369c4aSMatthew Dillon 		lwkt_gettoken(&ctx->sc_token);
35000369c4aSMatthew Dillon 	}
35150e4012aSVenkatesh Srinivas 
352bf9f24c1SMatthew Dillon 	mp->mnt_syncer_ctx = NULL;
35350e4012aSVenkatesh Srinivas 	lwkt_reltoken(&ctx->sc_token);
35450e4012aSVenkatesh Srinivas 
355e46d128dSSascha Wildner 	hashdestroy(ctx->syncer_workitem_pending, M_DEVBUF, ctx->syncer_mask);
35650e4012aSVenkatesh Srinivas 	kfree(ctx, M_TEMP);
35777912481SMatthew Dillon }
35877912481SMatthew Dillon 
3595fd012e0SMatthew Dillon struct  thread *updatethread;
3605fd012e0SMatthew Dillon 
3615fd012e0SMatthew Dillon /*
3625fd012e0SMatthew Dillon  * System filesystem synchronizer daemon.
3635fd012e0SMatthew Dillon  */
364cd8ab232SMatthew Dillon static void
syncer_thread(void * _ctx)36550e4012aSVenkatesh Srinivas syncer_thread(void *_ctx)
3665fd012e0SMatthew Dillon {
36750e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx = _ctx;
3685fd012e0SMatthew Dillon 	struct synclist *slp;
3695fd012e0SMatthew Dillon 	struct vnode *vp;
3705fd012e0SMatthew Dillon 	long starttime;
37150e4012aSVenkatesh Srinivas 	int *sc_flagsp;
37250e4012aSVenkatesh Srinivas 	int sc_flags;
37350e4012aSVenkatesh Srinivas 	int vnodes_synced = 0;
374cf6a53caSMatthew Dillon 	int delta;
375cf6a53caSMatthew Dillon 	int dummy = 0;
3765fd012e0SMatthew Dillon 
3775fd012e0SMatthew Dillon 	for (;;) {
3785fd012e0SMatthew Dillon 		kproc_suspend_loop();
3795fd012e0SMatthew Dillon 
380cec73927SMatthew Dillon 		starttime = time_uptime;
38150e4012aSVenkatesh Srinivas 		lwkt_gettoken(&ctx->sc_token);
3825fd012e0SMatthew Dillon 
3835fd012e0SMatthew Dillon 		/*
3845fd012e0SMatthew Dillon 		 * Push files whose dirty time has expired.  Be careful
3855fd012e0SMatthew Dillon 		 * of interrupt race on slp queue.
38655a5a1baSMatthew Dillon 		 *
38755a5a1baSMatthew Dillon 		 * Note that vsyncscan() and vn_syncer_one() can pull items
38855a5a1baSMatthew Dillon 		 * off the same list, so we shift vp's position in the
38955a5a1baSMatthew Dillon 		 * list immediately.
3905fd012e0SMatthew Dillon 		 */
39150e4012aSVenkatesh Srinivas 		slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno];
3925fd012e0SMatthew Dillon 
3931c222fafSMatthew Dillon 		/*
3941c222fafSMatthew Dillon 		 * If syncer_trigger is set (from trigger_syncer(mp)),
395d0e99d5dSMatthew Dillon 		 * Immediately do a full filesystem sync and set up the
396d0e99d5dSMatthew Dillon 		 * following full filesystem sync to occur in 1 second.
397*5479a2c1SMatthew Dillon 		 *
398*5479a2c1SMatthew Dillon 		 * The normal syncer_trigger bit 0 is automatically reset.
399*5479a2c1SMatthew Dillon 		 * If other bits are set, they remain set and cause the
400*5479a2c1SMatthew Dillon 		 * syncer to keep running.
4011c222fafSMatthew Dillon 		 */
4021c222fafSMatthew Dillon 		if (ctx->syncer_trigger) {
4031c222fafSMatthew Dillon 			if (ctx->sc_mp && ctx->sc_mp->mnt_syncer) {
4041c222fafSMatthew Dillon 				vp = ctx->sc_mp->mnt_syncer;
4051c222fafSMatthew Dillon 				if (vp->v_flag & VONWORKLST) {
4061c222fafSMatthew Dillon 					vn_syncer_add(vp, retrydelay);
4071c222fafSMatthew Dillon 					if (vget(vp, LK_EXCLUSIVE) == 0) {
408*5479a2c1SMatthew Dillon 						atomic_clear_int(&ctx->syncer_trigger, 1);
4091c222fafSMatthew Dillon 						VOP_FSYNC(vp, MNT_LAZY, 0);
4101c222fafSMatthew Dillon 						vput(vp);
4111c222fafSMatthew Dillon 						vnodes_synced++;
4121c222fafSMatthew Dillon 					}
4131c222fafSMatthew Dillon 				}
4141c222fafSMatthew Dillon 			}
4151c222fafSMatthew Dillon 		}
4161c222fafSMatthew Dillon 
417d0e99d5dSMatthew Dillon 		/*
418d0e99d5dSMatthew Dillon 		 * FSYNC items in this bucket
419d0e99d5dSMatthew Dillon 		 */
4205fd012e0SMatthew Dillon 		while ((vp = LIST_FIRST(slp)) != NULL) {
42155a5a1baSMatthew Dillon 			vn_syncer_add(vp, retrydelay);
422bf9f24c1SMatthew Dillon 			if (ctx->syncer_forced) {
423bf9f24c1SMatthew Dillon 				if (vget(vp, LK_EXCLUSIVE) == 0) {
424bf9f24c1SMatthew Dillon 					VOP_FSYNC(vp, MNT_NOWAIT, 0);
425bf9f24c1SMatthew Dillon 					vput(vp);
426bf9f24c1SMatthew Dillon 					vnodes_synced++;
427bf9f24c1SMatthew Dillon 				}
428bf9f24c1SMatthew Dillon 			} else {
42987de5057SMatthew Dillon 				if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
43052174f71SMatthew Dillon 					VOP_FSYNC(vp, MNT_LAZY, 0);
4310e0b6202SMatthew Dillon 					vput(vp);
43250e4012aSVenkatesh Srinivas 					vnodes_synced++;
4335fd012e0SMatthew Dillon 				}
434bf9f24c1SMatthew Dillon 			}
43555a5a1baSMatthew Dillon 		}
4366bae6177SMatthew Dillon 
4376bae6177SMatthew Dillon 		/*
438d0e99d5dSMatthew Dillon 		 * Increment the slot upon completion.  This is typically
439d0e99d5dSMatthew Dillon 		 * one-second but may be faster if the syncer is triggered.
4406bae6177SMatthew Dillon 		 */
44155a5a1baSMatthew Dillon 		ctx->syncer_delayno = (ctx->syncer_delayno + 1) &
44255a5a1baSMatthew Dillon 				      ctx->syncer_mask;
44350e4012aSVenkatesh Srinivas 
44450e4012aSVenkatesh Srinivas 		sc_flags = ctx->sc_flags;
44550e4012aSVenkatesh Srinivas 
44650e4012aSVenkatesh Srinivas 		/* Exit on unmount */
44750e4012aSVenkatesh Srinivas 		if (sc_flags & SC_FLAG_EXIT)
44850e4012aSVenkatesh Srinivas 			break;
44950e4012aSVenkatesh Srinivas 
45050e4012aSVenkatesh Srinivas 		lwkt_reltoken(&ctx->sc_token);
4515fd012e0SMatthew Dillon 
4525fd012e0SMatthew Dillon 		/*
453408357d8SMatthew Dillon 		 * Do sync processing for each mount.
4545fd012e0SMatthew Dillon 		 */
455cf6a53caSMatthew Dillon 		if (ctx->sc_mp)
45650e4012aSVenkatesh Srinivas 			bio_ops_sync(ctx->sc_mp);
4575fd012e0SMatthew Dillon 
4585fd012e0SMatthew Dillon 		/*
4595fd012e0SMatthew Dillon 		 * The variable rushjob allows the kernel to speed up the
4605fd012e0SMatthew Dillon 		 * processing of the filesystem syncer process. A rushjob
4615fd012e0SMatthew Dillon 		 * value of N tells the filesystem syncer to process the next
4625fd012e0SMatthew Dillon 		 * N seconds worth of work on its queue ASAP. Currently rushjob
4635fd012e0SMatthew Dillon 		 * is used by the soft update code to speed up the filesystem
4645fd012e0SMatthew Dillon 		 * syncer process when the incore state is getting so far
4655fd012e0SMatthew Dillon 		 * ahead of the disk that the kernel memory pool is being
4665fd012e0SMatthew Dillon 		 * threatened with exhaustion.
4675fd012e0SMatthew Dillon 		 */
468cf6a53caSMatthew Dillon 		delta = rushjob - ctx->syncer_rushjob;
469cf6a53caSMatthew Dillon 		if ((u_int)delta > syncdelay / 2) {
470cf6a53caSMatthew Dillon 			ctx->syncer_rushjob = rushjob - syncdelay / 2;
471cf6a53caSMatthew Dillon 			tsleep(&dummy, 0, "rush", 1);
4725fd012e0SMatthew Dillon 			continue;
4735fd012e0SMatthew Dillon 		}
474cf6a53caSMatthew Dillon 		if (delta) {
475cf6a53caSMatthew Dillon 			++ctx->syncer_rushjob;
476cf6a53caSMatthew Dillon 			tsleep(&dummy, 0, "rush", 1);
477cf6a53caSMatthew Dillon 			continue;
478cf6a53caSMatthew Dillon 		}
479cf6a53caSMatthew Dillon 
4805fd012e0SMatthew Dillon 		/*
481d0e99d5dSMatthew Dillon 		 * Normal syncer operation iterates once a second, unless
482d0e99d5dSMatthew Dillon 		 * specifically triggered.
4835fd012e0SMatthew Dillon 		 */
484d0e99d5dSMatthew Dillon 		if (time_uptime == starttime &&
485d0e99d5dSMatthew Dillon 		    ctx->syncer_trigger == 0) {
486d0e99d5dSMatthew Dillon 			tsleep_interlock(ctx, 0);
487d0e99d5dSMatthew Dillon 			if (time_uptime == starttime &&
48800369c4aSMatthew Dillon 			    ctx->syncer_trigger == 0 &&
489*5479a2c1SMatthew Dillon 			    (ctx->sc_flags & SC_FLAG_EXIT) == 0)
490*5479a2c1SMatthew Dillon 			{
491d0e99d5dSMatthew Dillon 				tsleep(ctx, PINTERLOCKED, "syncer", hz);
492d0e99d5dSMatthew Dillon 			}
493d0e99d5dSMatthew Dillon 		}
4945fd012e0SMatthew Dillon 	}
49550e4012aSVenkatesh Srinivas 
49650e4012aSVenkatesh Srinivas 	/*
49750e4012aSVenkatesh Srinivas 	 * Unmount/exit path for per-filesystem syncers; sc_token held
49850e4012aSVenkatesh Srinivas 	 */
49950e4012aSVenkatesh Srinivas 	ctx->sc_flags |= SC_FLAG_DONE;
50050e4012aSVenkatesh Srinivas 	sc_flagsp = &ctx->sc_flags;
50150e4012aSVenkatesh Srinivas 	lwkt_reltoken(&ctx->sc_token);
50250e4012aSVenkatesh Srinivas 	wakeup(sc_flagsp);
50350e4012aSVenkatesh Srinivas 
50450e4012aSVenkatesh Srinivas 	kthread_exit();
50550e4012aSVenkatesh Srinivas }
50650e4012aSVenkatesh Srinivas 
5075fd012e0SMatthew Dillon /*
50855a5a1baSMatthew Dillon  * This allows a filesystem to pro-actively request that a dirty
50955a5a1baSMatthew Dillon  * vnode be fsync()d.  This routine does not guarantee that one
51055a5a1baSMatthew Dillon  * will actually be fsynced.
51155a5a1baSMatthew Dillon  */
51255a5a1baSMatthew Dillon void
vn_syncer_one(struct mount * mp)51355a5a1baSMatthew Dillon vn_syncer_one(struct mount *mp)
51455a5a1baSMatthew Dillon {
51555a5a1baSMatthew Dillon 	struct syncer_ctx *ctx;
51655a5a1baSMatthew Dillon 	struct synclist *slp;
51755a5a1baSMatthew Dillon 	struct vnode *vp;
51855a5a1baSMatthew Dillon 	int i;
51955a5a1baSMatthew Dillon 	int n = syncdelay;
52055a5a1baSMatthew Dillon 
52155a5a1baSMatthew Dillon 	ctx = mp->mnt_syncer_ctx;
52255a5a1baSMatthew Dillon 	i = ctx->syncer_delayno & ctx->syncer_mask;
52355a5a1baSMatthew Dillon 	cpu_ccfence();
52455a5a1baSMatthew Dillon 
52555a5a1baSMatthew Dillon 	if (lwkt_trytoken(&ctx->sc_token) == 0)
52655a5a1baSMatthew Dillon 		return;
52755a5a1baSMatthew Dillon 
52855a5a1baSMatthew Dillon 	/*
52955a5a1baSMatthew Dillon 	 * Look ahead on our syncer time array.
53055a5a1baSMatthew Dillon 	 */
53155a5a1baSMatthew Dillon 	do {
53255a5a1baSMatthew Dillon 		slp = &ctx->syncer_workitem_pending[i];
53355a5a1baSMatthew Dillon 		vp = LIST_FIRST(slp);
53455a5a1baSMatthew Dillon 		if (vp && vp->v_type == VNON)
53555a5a1baSMatthew Dillon 			vp = LIST_NEXT(vp, v_synclist);
53655a5a1baSMatthew Dillon 		if (vp)
53755a5a1baSMatthew Dillon 			break;
53855a5a1baSMatthew Dillon 		i = (i + 1) & ctx->syncer_mask;
53955a5a1baSMatthew Dillon 		/* i will be wrong if we stop here but vp is NULL so ok */
54055a5a1baSMatthew Dillon 	} while(--n);
54155a5a1baSMatthew Dillon 
54255a5a1baSMatthew Dillon 	/*
54355a5a1baSMatthew Dillon 	 * Process one vnode, skip the syncer vnode but also stop
54455a5a1baSMatthew Dillon 	 * if the syncer vnode is the only thing on this list.
54555a5a1baSMatthew Dillon 	 */
54655a5a1baSMatthew Dillon 	if (vp) {
54755a5a1baSMatthew Dillon 		vn_syncer_add(vp, retrydelay);
54855a5a1baSMatthew Dillon 		if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
54955a5a1baSMatthew Dillon 			VOP_FSYNC(vp, MNT_LAZY, 0);
55055a5a1baSMatthew Dillon 			vput(vp);
55155a5a1baSMatthew Dillon 		}
55255a5a1baSMatthew Dillon 	}
55355a5a1baSMatthew Dillon 	lwkt_reltoken(&ctx->sc_token);
55455a5a1baSMatthew Dillon }
55555a5a1baSMatthew Dillon 
55655a5a1baSMatthew Dillon /*
557cf6a53caSMatthew Dillon  * Request that the syncer daemon for a specific mount speed up its work.
558cf6a53caSMatthew Dillon  * If mp is NULL the caller generally wants to speed up all syncers.
5595fd012e0SMatthew Dillon  */
560cf6a53caSMatthew Dillon void
speedup_syncer(struct mount * mp)561cf6a53caSMatthew Dillon speedup_syncer(struct mount *mp)
5625fd012e0SMatthew Dillon {
563344ad853SMatthew Dillon 	/*
564344ad853SMatthew Dillon 	 * Don't bother protecting the test.  unsleep_and_wakeup_thread()
565344ad853SMatthew Dillon 	 * will only do something real if the thread is in the right state.
566344ad853SMatthew Dillon 	 */
567145eb524SMatthew Dillon 	atomic_add_int(&rushjob, 1);
568cf6a53caSMatthew Dillon 	++stat_rush_requests;
5691c222fafSMatthew Dillon 	if (mp && mp->mnt_syncer_ctx)
570cf6a53caSMatthew Dillon 		wakeup(mp->mnt_syncer_ctx);
5715fd012e0SMatthew Dillon }
5725fd012e0SMatthew Dillon 
5735fd012e0SMatthew Dillon /*
574*5479a2c1SMatthew Dillon  * Force continuous full syncs until stopped.  This may be used by
575*5479a2c1SMatthew Dillon  * filesystems waiting on dirty data to be flushed to avoid syncer/tsleep
576*5479a2c1SMatthew Dillon  * races.
577*5479a2c1SMatthew Dillon  */
578*5479a2c1SMatthew Dillon void
trigger_syncer_start(struct mount * mp)579*5479a2c1SMatthew Dillon trigger_syncer_start(struct mount *mp)
580*5479a2c1SMatthew Dillon {
581*5479a2c1SMatthew Dillon 	struct syncer_ctx *ctx;
582*5479a2c1SMatthew Dillon 
583*5479a2c1SMatthew Dillon 	if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
584*5479a2c1SMatthew Dillon 		if (atomic_fetchadd_int(&ctx->syncer_trigger, 2) <= 1)
585*5479a2c1SMatthew Dillon 			wakeup(ctx);
586*5479a2c1SMatthew Dillon 	}
587*5479a2c1SMatthew Dillon }
588*5479a2c1SMatthew Dillon 
589*5479a2c1SMatthew Dillon void
trigger_syncer_stop(struct mount * mp)590*5479a2c1SMatthew Dillon trigger_syncer_stop(struct mount *mp)
591*5479a2c1SMatthew Dillon {
592*5479a2c1SMatthew Dillon 	struct syncer_ctx *ctx;
593*5479a2c1SMatthew Dillon 
594*5479a2c1SMatthew Dillon 	if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
595*5479a2c1SMatthew Dillon 		atomic_add_int(&ctx->syncer_trigger, -2);
596*5479a2c1SMatthew Dillon 	}
597*5479a2c1SMatthew Dillon }
598*5479a2c1SMatthew Dillon 
599*5479a2c1SMatthew Dillon /*
600*5479a2c1SMatthew Dillon  * trigger a full sync (auto-reset)
6011c222fafSMatthew Dillon  */
6021c222fafSMatthew Dillon void
trigger_syncer(struct mount * mp)6031c222fafSMatthew Dillon trigger_syncer(struct mount *mp)
6041c222fafSMatthew Dillon {
6051c222fafSMatthew Dillon 	struct syncer_ctx *ctx;
6061c222fafSMatthew Dillon 
6071c222fafSMatthew Dillon 	if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) {
608*5479a2c1SMatthew Dillon 		if ((ctx->syncer_trigger & 1) == 0) {
609*5479a2c1SMatthew Dillon 			atomic_set_int(&ctx->syncer_trigger, 1);
6101c222fafSMatthew Dillon 			wakeup(ctx);
6111c222fafSMatthew Dillon 		}
6121c222fafSMatthew Dillon 	}
6131c222fafSMatthew Dillon }
6141c222fafSMatthew Dillon 
6151c222fafSMatthew Dillon /*
6165fd012e0SMatthew Dillon  * Routine to create and manage a filesystem syncer vnode.
6175fd012e0SMatthew Dillon  */
618fef8985eSMatthew Dillon static int sync_close(struct vop_close_args *);
6195fd012e0SMatthew Dillon static int sync_fsync(struct vop_fsync_args *);
6205fd012e0SMatthew Dillon static int sync_inactive(struct vop_inactive_args *);
6215fd012e0SMatthew Dillon static int sync_reclaim (struct vop_reclaim_args *);
6225fd012e0SMatthew Dillon static int sync_print(struct vop_print_args *);
6235fd012e0SMatthew Dillon 
62466a1ddf5SMatthew Dillon static struct vop_ops sync_vnode_vops = {
62566a1ddf5SMatthew Dillon 	.vop_default =	vop_eopnotsupp,
62666a1ddf5SMatthew Dillon 	.vop_close =	sync_close,
62766a1ddf5SMatthew Dillon 	.vop_fsync =	sync_fsync,
62866a1ddf5SMatthew Dillon 	.vop_inactive =	sync_inactive,
62966a1ddf5SMatthew Dillon 	.vop_reclaim =	sync_reclaim,
63066a1ddf5SMatthew Dillon 	.vop_print =	sync_print,
6315fd012e0SMatthew Dillon };
6325fd012e0SMatthew Dillon 
63366a1ddf5SMatthew Dillon static struct vop_ops *sync_vnode_vops_p = &sync_vnode_vops;
6345fd012e0SMatthew Dillon 
63566a1ddf5SMatthew Dillon VNODEOP_SET(sync_vnode_vops);
6365fd012e0SMatthew Dillon 
6375fd012e0SMatthew Dillon /*
6385fd012e0SMatthew Dillon  * Create a new filesystem syncer vnode for the specified mount point.
6395fd012e0SMatthew Dillon  * This vnode is placed on the worklist and is responsible for sync'ing
6405fd012e0SMatthew Dillon  * the filesystem.
6415fd012e0SMatthew Dillon  *
6425fd012e0SMatthew Dillon  * NOTE: read-only mounts are also placed on the worklist.  The filesystem
6435fd012e0SMatthew Dillon  * sync code is also responsible for cleaning up vnodes.
6445fd012e0SMatthew Dillon  */
6455fd012e0SMatthew Dillon int
vfs_allocate_syncvnode(struct mount * mp)6465fd012e0SMatthew Dillon vfs_allocate_syncvnode(struct mount *mp)
6475fd012e0SMatthew Dillon {
6485fd012e0SMatthew Dillon 	struct vnode *vp;
6495fd012e0SMatthew Dillon 	static long start, incr, next;
6505fd012e0SMatthew Dillon 	int error;
6515fd012e0SMatthew Dillon 
6525fd012e0SMatthew Dillon 	/* Allocate a new vnode */
65366a1ddf5SMatthew Dillon 	error = getspecialvnode(VT_VFS, mp, &sync_vnode_vops_p, &vp, 0, 0);
6545fd012e0SMatthew Dillon 	if (error) {
6555fd012e0SMatthew Dillon 		mp->mnt_syncer = NULL;
6565fd012e0SMatthew Dillon 		return (error);
6575fd012e0SMatthew Dillon 	}
6585fd012e0SMatthew Dillon 	vp->v_type = VNON;
6595fd012e0SMatthew Dillon 	/*
6605fd012e0SMatthew Dillon 	 * Place the vnode onto the syncer worklist. We attempt to
6615fd012e0SMatthew Dillon 	 * scatter them about on the list so that they will go off
6625fd012e0SMatthew Dillon 	 * at evenly distributed times even if all the filesystems
6635fd012e0SMatthew Dillon 	 * are mounted at once.
6645fd012e0SMatthew Dillon 	 */
6655fd012e0SMatthew Dillon 	next += incr;
666bf9f24c1SMatthew Dillon 	if (next == 0 || next > SYNCER_MAXDELAY) {
6675fd012e0SMatthew Dillon 		start /= 2;
6685fd012e0SMatthew Dillon 		incr /= 2;
6695fd012e0SMatthew Dillon 		if (start == 0) {
670bf9f24c1SMatthew Dillon 			start = SYNCER_MAXDELAY / 2;
671bf9f24c1SMatthew Dillon 			incr = SYNCER_MAXDELAY;
6725fd012e0SMatthew Dillon 		}
6735fd012e0SMatthew Dillon 		next = start;
6745fd012e0SMatthew Dillon 	}
67565e98405SMatthew Dillon 
67665e98405SMatthew Dillon 	/*
67765e98405SMatthew Dillon 	 * Only put the syncer vnode onto the syncer list if we have a
67865e98405SMatthew Dillon 	 * syncer thread.  Some VFS's (aka NULLFS) don't need a syncer
67965e98405SMatthew Dillon 	 * thread.
68065e98405SMatthew Dillon 	 */
68165e98405SMatthew Dillon 	if (mp->mnt_syncer_ctx)
68277912481SMatthew Dillon 		vn_syncer_add(vp, syncdelay > 0 ? next % syncdelay : 0);
6832ec4b00dSMatthew Dillon 
6842ec4b00dSMatthew Dillon 	/*
6852ec4b00dSMatthew Dillon 	 * The mnt_syncer field inherits the vnode reference, which is
6862ec4b00dSMatthew Dillon 	 * held until later decomissioning.
6872ec4b00dSMatthew Dillon 	 */
6885fd012e0SMatthew Dillon 	mp->mnt_syncer = vp;
6895fd012e0SMatthew Dillon 	vx_unlock(vp);
6905fd012e0SMatthew Dillon 	return (0);
6915fd012e0SMatthew Dillon }
6925fd012e0SMatthew Dillon 
693fef8985eSMatthew Dillon static int
sync_close(struct vop_close_args * ap)694fef8985eSMatthew Dillon sync_close(struct vop_close_args *ap)
695fef8985eSMatthew Dillon {
696fef8985eSMatthew Dillon 	return (0);
697fef8985eSMatthew Dillon }
698fef8985eSMatthew Dillon 
6995fd012e0SMatthew Dillon /*
7005fd012e0SMatthew Dillon  * Do a lazy sync of the filesystem.
7015fd012e0SMatthew Dillon  *
702b478fdceSSascha Wildner  * sync_fsync { struct vnode *a_vp, int a_waitfor }
7035fd012e0SMatthew Dillon  */
7045fd012e0SMatthew Dillon static int
sync_fsync(struct vop_fsync_args * ap)7055fd012e0SMatthew Dillon sync_fsync(struct vop_fsync_args *ap)
7065fd012e0SMatthew Dillon {
7075fd012e0SMatthew Dillon 	struct vnode *syncvp = ap->a_vp;
7085fd012e0SMatthew Dillon 	struct mount *mp = syncvp->v_mount;
7095fd012e0SMatthew Dillon 	int asyncflag;
7105fd012e0SMatthew Dillon 
7115fd012e0SMatthew Dillon 	/*
7125fd012e0SMatthew Dillon 	 * We only need to do something if this is a lazy evaluation.
7135fd012e0SMatthew Dillon 	 */
71428271622SMatthew Dillon 	if ((ap->a_waitfor & MNT_LAZY) == 0)
7155fd012e0SMatthew Dillon 		return (0);
7165fd012e0SMatthew Dillon 
7175fd012e0SMatthew Dillon 	/*
7185fd012e0SMatthew Dillon 	 * Move ourselves to the back of the sync list.
7195fd012e0SMatthew Dillon 	 */
72077912481SMatthew Dillon 	vn_syncer_add(syncvp, syncdelay);
7215fd012e0SMatthew Dillon 
7225fd012e0SMatthew Dillon 	/*
7235fd012e0SMatthew Dillon 	 * Walk the list of vnodes pushing all that are dirty and
7245fd012e0SMatthew Dillon 	 * not already on the sync list, and freeing vnodes which have
7255fd012e0SMatthew Dillon 	 * no refs and whos VM objects are empty.  vfs_msync() handles
7265fd012e0SMatthew Dillon 	 * the VM issues and must be called whether the mount is readonly
7275fd012e0SMatthew Dillon 	 * or not.
7285fd012e0SMatthew Dillon 	 */
729f9642f56SMatthew Dillon 	if (vfs_busy(mp, LK_NOWAIT) != 0)
7305fd012e0SMatthew Dillon 		return (0);
7315fd012e0SMatthew Dillon 	if (mp->mnt_flag & MNT_RDONLY) {
7325fd012e0SMatthew Dillon 		vfs_msync(mp, MNT_NOWAIT);
7335fd012e0SMatthew Dillon 	} else {
7345fd012e0SMatthew Dillon 		asyncflag = mp->mnt_flag & MNT_ASYNC;
7355fd012e0SMatthew Dillon 		mp->mnt_flag &= ~MNT_ASYNC;	/* ZZZ hack */
7365fd012e0SMatthew Dillon 		vfs_msync(mp, MNT_NOWAIT);
73728271622SMatthew Dillon 		VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY);
7385fd012e0SMatthew Dillon 		if (asyncflag)
7395fd012e0SMatthew Dillon 			mp->mnt_flag |= MNT_ASYNC;
7405fd012e0SMatthew Dillon 	}
741f9642f56SMatthew Dillon 	vfs_unbusy(mp);
7425fd012e0SMatthew Dillon 	return (0);
7435fd012e0SMatthew Dillon }
7445fd012e0SMatthew Dillon 
7455fd012e0SMatthew Dillon /*
7463c37c940SMatthew Dillon  * The syncer vnode is no longer referenced.
7475fd012e0SMatthew Dillon  *
7485fd012e0SMatthew Dillon  * sync_inactive { struct vnode *a_vp, struct proc *a_p }
7495fd012e0SMatthew Dillon  */
7505fd012e0SMatthew Dillon static int
sync_inactive(struct vop_inactive_args * ap)7515fd012e0SMatthew Dillon sync_inactive(struct vop_inactive_args *ap)
7525fd012e0SMatthew Dillon {
7533c37c940SMatthew Dillon 	vgone_vxlocked(ap->a_vp);
7545fd012e0SMatthew Dillon 	return (0);
7555fd012e0SMatthew Dillon }
7565fd012e0SMatthew Dillon 
7575fd012e0SMatthew Dillon /*
7585fd012e0SMatthew Dillon  * The syncer vnode is no longer needed and is being decommissioned.
7592ec4b00dSMatthew Dillon  * This can only occur when the last reference has been released on
7602ec4b00dSMatthew Dillon  * mp->mnt_syncer, so mp->mnt_syncer had better be NULL.
7615fd012e0SMatthew Dillon  *
762e43a034fSMatthew Dillon  * Modifications to the worklist must be protected with a critical
763e43a034fSMatthew Dillon  * section.
7645fd012e0SMatthew Dillon  *
7655fd012e0SMatthew Dillon  *	sync_reclaim { struct vnode *a_vp }
7665fd012e0SMatthew Dillon  */
7675fd012e0SMatthew Dillon static int
sync_reclaim(struct vop_reclaim_args * ap)7685fd012e0SMatthew Dillon sync_reclaim(struct vop_reclaim_args *ap)
7695fd012e0SMatthew Dillon {
7705fd012e0SMatthew Dillon 	struct vnode *vp = ap->a_vp;
77150e4012aSVenkatesh Srinivas 	struct syncer_ctx *ctx;
7725fd012e0SMatthew Dillon 
773cf6a53caSMatthew Dillon 	ctx = vp->v_mount->mnt_syncer_ctx;
774d83c6244SMatthew Dillon 	if (ctx) {
77550e4012aSVenkatesh Srinivas 		lwkt_gettoken(&ctx->sc_token);
7762ec4b00dSMatthew Dillon 		KKASSERT(vp->v_mount->mnt_syncer != vp);
7775fd012e0SMatthew Dillon 		if (vp->v_flag & VONWORKLST) {
7785fd012e0SMatthew Dillon 			LIST_REMOVE(vp, v_synclist);
7792247fe02SMatthew Dillon 			vclrflags(vp, VONWORKLST);
78055a5a1baSMatthew Dillon 			--ctx->syncer_count;
7815fd012e0SMatthew Dillon 		}
78250e4012aSVenkatesh Srinivas 		lwkt_reltoken(&ctx->sc_token);
783d83c6244SMatthew Dillon 	} else {
784d83c6244SMatthew Dillon 		KKASSERT((vp->v_flag & VONWORKLST) == 0);
785d83c6244SMatthew Dillon 	}
7865fd012e0SMatthew Dillon 
7875fd012e0SMatthew Dillon 	return (0);
7885fd012e0SMatthew Dillon }
7895fd012e0SMatthew Dillon 
7905fd012e0SMatthew Dillon /*
791bf9f24c1SMatthew Dillon  * This is very similar to vmntvnodescan() but it only scans the
792bf9f24c1SMatthew Dillon  * vnodes on the syncer list.  VFS's which support faster VFS_SYNC
793bf9f24c1SMatthew Dillon  * operations use the VISDIRTY flag on the vnode to ensure that vnodes
794bf9f24c1SMatthew Dillon  * with dirty inodes are added to the syncer in addition to vnodes
795bf9f24c1SMatthew Dillon  * with dirty buffers, and can use this function instead of nmntvnodescan().
796bf9f24c1SMatthew Dillon  *
79749d97f8bSMatthew Dillon  * This scan does not issue VOP_FSYNC()s.  The supplied callback is intended
79849d97f8bSMatthew Dillon  * to synchronize the file in the manner intended by the VFS using it.
79949d97f8bSMatthew Dillon  *
800bf9f24c1SMatthew Dillon  * This is important when a system has millions of vnodes.
801bf9f24c1SMatthew Dillon  */
802bf9f24c1SMatthew Dillon int
vsyncscan(struct mount * mp,int vmsc_flags,int (* slowfunc)(struct mount * mp,struct vnode * vp,void * data),void * data)803bf9f24c1SMatthew Dillon vsyncscan(
804bf9f24c1SMatthew Dillon     struct mount *mp,
805bf9f24c1SMatthew Dillon     int vmsc_flags,
806bf9f24c1SMatthew Dillon     int (*slowfunc)(struct mount *mp, struct vnode *vp, void *data),
807bf9f24c1SMatthew Dillon     void *data
808bf9f24c1SMatthew Dillon ) {
809bf9f24c1SMatthew Dillon 	struct syncer_ctx *ctx;
810bf9f24c1SMatthew Dillon 	struct synclist *slp;
811bf9f24c1SMatthew Dillon 	struct vnode *vp;
812bf9f24c1SMatthew Dillon 	int i;
813ffd3e597SMatthew Dillon 	int count;
814bf9f24c1SMatthew Dillon 	int lkflags;
815bf9f24c1SMatthew Dillon 
816bf9f24c1SMatthew Dillon 	if (vmsc_flags & VMSC_NOWAIT)
817bf9f24c1SMatthew Dillon 		lkflags = LK_NOWAIT;
818bf9f24c1SMatthew Dillon 	else
819bf9f24c1SMatthew Dillon 		lkflags = 0;
820bf9f24c1SMatthew Dillon 
821bf9f24c1SMatthew Dillon 	/*
822972eaa03SMatthew Dillon 	 * Syncer list context.  This API requires a dedicated syncer thread.
823972eaa03SMatthew Dillon 	 * (MNTK_THR_SYNC).
824bf9f24c1SMatthew Dillon 	 */
825972eaa03SMatthew Dillon 	KKASSERT(mp->mnt_kern_flag & MNTK_THR_SYNC);
826bf9f24c1SMatthew Dillon 	ctx = mp->mnt_syncer_ctx;
827bf9f24c1SMatthew Dillon 	lwkt_gettoken(&ctx->sc_token);
828bf9f24c1SMatthew Dillon 
829bf9f24c1SMatthew Dillon 	/*
830bf9f24c1SMatthew Dillon 	 * Setup for loop.  Allow races against the syncer thread but
831bf9f24c1SMatthew Dillon 	 * require that the syncer thread no be lazy if we were told
832bf9f24c1SMatthew Dillon 	 * not to be lazy.
833bf9f24c1SMatthew Dillon 	 */
834ffd3e597SMatthew Dillon 	i = ctx->syncer_delayno & ctx->syncer_mask;
835bf9f24c1SMatthew Dillon 	if ((vmsc_flags & VMSC_NOWAIT) == 0)
836bf9f24c1SMatthew Dillon 		++ctx->syncer_forced;
837ffd3e597SMatthew Dillon 	for (count = 0; count <= ctx->syncer_mask; ++count) {
838bf9f24c1SMatthew Dillon 		slp = &ctx->syncer_workitem_pending[i];
839bf9f24c1SMatthew Dillon 
840bf9f24c1SMatthew Dillon 		while ((vp = LIST_FIRST(slp)) != NULL) {
841972eaa03SMatthew Dillon 			KKASSERT(vp->v_mount == mp);
842fd2da346SMatthew Dillon 			if (vmsc_flags & VMSC_GETVP) {
843bf9f24c1SMatthew Dillon 				if (vget(vp, LK_EXCLUSIVE | lkflags) == 0) {
844bf9f24c1SMatthew Dillon 					slowfunc(mp, vp, data);
845bf9f24c1SMatthew Dillon 					vput(vp);
846bf9f24c1SMatthew Dillon 				}
847fd2da346SMatthew Dillon 			} else if (vmsc_flags & VMSC_GETVX) {
848fd2da346SMatthew Dillon 				vx_get(vp);
849fd2da346SMatthew Dillon 				slowfunc(mp, vp, data);
850fd2da346SMatthew Dillon 				vx_put(vp);
851fd2da346SMatthew Dillon 			} else {
852fd2da346SMatthew Dillon 				vhold(vp);
853fd2da346SMatthew Dillon 				slowfunc(mp, vp, data);
854fd2da346SMatthew Dillon 				vdrop(vp);
855fd2da346SMatthew Dillon 			}
856ffd3e597SMatthew Dillon 
857ffd3e597SMatthew Dillon 			/*
858ffd3e597SMatthew Dillon 			 * vp could be invalid.  However, if vp is still at
859ffd3e597SMatthew Dillon 			 * the head of the list it is clearly valid and we
860ffd3e597SMatthew Dillon 			 * can safely move it.
861ffd3e597SMatthew Dillon 			 */
862bf9f24c1SMatthew Dillon 			if (LIST_FIRST(slp) == vp)
863bf9f24c1SMatthew Dillon 				vn_syncer_add(vp, -(i + syncdelay));
864bf9f24c1SMatthew Dillon 		}
865bf9f24c1SMatthew Dillon 		i = (i + 1) & ctx->syncer_mask;
866ffd3e597SMatthew Dillon 	}
867bf9f24c1SMatthew Dillon 
868bf9f24c1SMatthew Dillon 	if ((vmsc_flags & VMSC_NOWAIT) == 0)
869bf9f24c1SMatthew Dillon 		--ctx->syncer_forced;
870bf9f24c1SMatthew Dillon 	lwkt_reltoken(&ctx->sc_token);
871bf9f24c1SMatthew Dillon 	return(0);
872bf9f24c1SMatthew Dillon }
873bf9f24c1SMatthew Dillon 
874bf9f24c1SMatthew Dillon /*
8755fd012e0SMatthew Dillon  * Print out a syncer vnode.
8765fd012e0SMatthew Dillon  *
8775fd012e0SMatthew Dillon  *	sync_print { struct vnode *a_vp }
8785fd012e0SMatthew Dillon  */
8795fd012e0SMatthew Dillon static int
sync_print(struct vop_print_args * ap)8805fd012e0SMatthew Dillon sync_print(struct vop_print_args *ap)
8815fd012e0SMatthew Dillon {
8825fd012e0SMatthew Dillon 	struct vnode *vp = ap->a_vp;
8835fd012e0SMatthew Dillon 
8846ea70f76SSascha Wildner 	kprintf("syncer vnode");
8855fd012e0SMatthew Dillon 	lockmgr_printinfo(&vp->v_lock);
8866ea70f76SSascha Wildner 	kprintf("\n");
8875fd012e0SMatthew Dillon 	return (0);
8885fd012e0SMatthew Dillon }
8895fd012e0SMatthew Dillon 
890