xref: /freebsd-src/sys/contrib/openzfs/module/zfs/dsl_synctask.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*eda14cbcSMatt Macy  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24*eda14cbcSMatt Macy  */
25*eda14cbcSMatt Macy 
26*eda14cbcSMatt Macy #include <sys/dmu.h>
27*eda14cbcSMatt Macy #include <sys/dmu_tx.h>
28*eda14cbcSMatt Macy #include <sys/dsl_pool.h>
29*eda14cbcSMatt Macy #include <sys/dsl_dir.h>
30*eda14cbcSMatt Macy #include <sys/dsl_synctask.h>
31*eda14cbcSMatt Macy #include <sys/metaslab.h>
32*eda14cbcSMatt Macy 
33*eda14cbcSMatt Macy #define	DST_AVG_BLKSHIFT 14
34*eda14cbcSMatt Macy 
35*eda14cbcSMatt Macy /* ARGSUSED */
36*eda14cbcSMatt Macy static int
37*eda14cbcSMatt Macy dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
38*eda14cbcSMatt Macy {
39*eda14cbcSMatt Macy 	return (0);
40*eda14cbcSMatt Macy }
41*eda14cbcSMatt Macy 
42*eda14cbcSMatt Macy static int
43*eda14cbcSMatt Macy dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc,
44*eda14cbcSMatt Macy     dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
45*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check, boolean_t early)
46*eda14cbcSMatt Macy {
47*eda14cbcSMatt Macy 	spa_t *spa;
48*eda14cbcSMatt Macy 	dmu_tx_t *tx;
49*eda14cbcSMatt Macy 	int err;
50*eda14cbcSMatt Macy 	dsl_sync_task_t dst = { { { NULL } } };
51*eda14cbcSMatt Macy 	dsl_pool_t *dp;
52*eda14cbcSMatt Macy 
53*eda14cbcSMatt Macy 	err = spa_open(pool, &spa, FTAG);
54*eda14cbcSMatt Macy 	if (err != 0)
55*eda14cbcSMatt Macy 		return (err);
56*eda14cbcSMatt Macy 	dp = spa_get_dsl(spa);
57*eda14cbcSMatt Macy 
58*eda14cbcSMatt Macy top:
59*eda14cbcSMatt Macy 	tx = dmu_tx_create_dd(dp->dp_mos_dir);
60*eda14cbcSMatt Macy 	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
61*eda14cbcSMatt Macy 
62*eda14cbcSMatt Macy 	dst.dst_pool = dp;
63*eda14cbcSMatt Macy 	dst.dst_txg = dmu_tx_get_txg(tx);
64*eda14cbcSMatt Macy 	dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
65*eda14cbcSMatt Macy 	dst.dst_space_check = space_check;
66*eda14cbcSMatt Macy 	dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
67*eda14cbcSMatt Macy 	dst.dst_syncfunc = syncfunc;
68*eda14cbcSMatt Macy 	dst.dst_arg = arg;
69*eda14cbcSMatt Macy 	dst.dst_error = 0;
70*eda14cbcSMatt Macy 	dst.dst_nowaiter = B_FALSE;
71*eda14cbcSMatt Macy 
72*eda14cbcSMatt Macy 	dsl_pool_config_enter(dp, FTAG);
73*eda14cbcSMatt Macy 	err = dst.dst_checkfunc(arg, tx);
74*eda14cbcSMatt Macy 	dsl_pool_config_exit(dp, FTAG);
75*eda14cbcSMatt Macy 
76*eda14cbcSMatt Macy 	if (err != 0) {
77*eda14cbcSMatt Macy 		dmu_tx_commit(tx);
78*eda14cbcSMatt Macy 		spa_close(spa, FTAG);
79*eda14cbcSMatt Macy 		return (err);
80*eda14cbcSMatt Macy 	}
81*eda14cbcSMatt Macy 
82*eda14cbcSMatt Macy 	txg_list_t *task_list = (early) ?
83*eda14cbcSMatt Macy 	    &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
84*eda14cbcSMatt Macy 	VERIFY(txg_list_add_tail(task_list, &dst, dst.dst_txg));
85*eda14cbcSMatt Macy 
86*eda14cbcSMatt Macy 	dmu_tx_commit(tx);
87*eda14cbcSMatt Macy 
88*eda14cbcSMatt Macy 	if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) {
89*eda14cbcSMatt Macy 		/* current contract is to call func once */
90*eda14cbcSMatt Macy 		sigfunc(arg, tx);
91*eda14cbcSMatt Macy 		sigfunc = NULL;	/* in case we're performing an EAGAIN retry */
92*eda14cbcSMatt Macy 	}
93*eda14cbcSMatt Macy 	txg_wait_synced(dp, dst.dst_txg);
94*eda14cbcSMatt Macy 
95*eda14cbcSMatt Macy 	if (dst.dst_error == EAGAIN) {
96*eda14cbcSMatt Macy 		txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
97*eda14cbcSMatt Macy 		goto top;
98*eda14cbcSMatt Macy 	}
99*eda14cbcSMatt Macy 
100*eda14cbcSMatt Macy 	spa_close(spa, FTAG);
101*eda14cbcSMatt Macy 	return (dst.dst_error);
102*eda14cbcSMatt Macy }
103*eda14cbcSMatt Macy 
104*eda14cbcSMatt Macy /*
105*eda14cbcSMatt Macy  * Called from open context to perform a callback in syncing context.  Waits
106*eda14cbcSMatt Macy  * for the operation to complete.
107*eda14cbcSMatt Macy  *
108*eda14cbcSMatt Macy  * The checkfunc will be called from open context as a preliminary check
109*eda14cbcSMatt Macy  * which can quickly fail.  If it succeeds, it will be called again from
110*eda14cbcSMatt Macy  * syncing context.  The checkfunc should generally be designed to work
111*eda14cbcSMatt Macy  * properly in either context, but if necessary it can check
112*eda14cbcSMatt Macy  * dmu_tx_is_syncing(tx).
113*eda14cbcSMatt Macy  *
114*eda14cbcSMatt Macy  * The synctask infrastructure enforces proper locking strategy with respect
115*eda14cbcSMatt Macy  * to the dp_config_rwlock -- the lock will always be held when the callbacks
116*eda14cbcSMatt Macy  * are called.  It will be held for read during the open-context (preliminary)
117*eda14cbcSMatt Macy  * call to the checkfunc, and then held for write from syncing context during
118*eda14cbcSMatt Macy  * the calls to the check and sync funcs.
119*eda14cbcSMatt Macy  *
120*eda14cbcSMatt Macy  * A dataset or pool name can be passed as the first argument.  Typically,
121*eda14cbcSMatt Macy  * the check func will hold, check the return value of the hold, and then
122*eda14cbcSMatt Macy  * release the dataset.  The sync func will VERIFYO(hold()) the dataset.
123*eda14cbcSMatt Macy  * This is safe because no changes can be made between the check and sync funcs,
124*eda14cbcSMatt Macy  * and the sync func will only be called if the check func successfully opened
125*eda14cbcSMatt Macy  * the dataset.
126*eda14cbcSMatt Macy  */
127*eda14cbcSMatt Macy int
128*eda14cbcSMatt Macy dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
129*eda14cbcSMatt Macy     dsl_syncfunc_t *syncfunc, void *arg,
130*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check)
131*eda14cbcSMatt Macy {
132*eda14cbcSMatt Macy 	return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
133*eda14cbcSMatt Macy 	    blocks_modified, space_check, B_FALSE));
134*eda14cbcSMatt Macy }
135*eda14cbcSMatt Macy 
136*eda14cbcSMatt Macy /*
137*eda14cbcSMatt Macy  * An early synctask works exactly as a standard synctask with one important
138*eda14cbcSMatt Macy  * difference on the way it is handled during syncing context. Standard
139*eda14cbcSMatt Macy  * synctasks run after we've written out all the dirty blocks of dirty
140*eda14cbcSMatt Macy  * datasets. Early synctasks are executed before writing out any dirty data,
141*eda14cbcSMatt Macy  * and thus before standard synctasks.
142*eda14cbcSMatt Macy  *
143*eda14cbcSMatt Macy  * For that reason, early synctasks can affect the process of writing dirty
144*eda14cbcSMatt Macy  * changes to disk for the txg that they run and should be used with caution.
145*eda14cbcSMatt Macy  * In addition, early synctasks should not dirty any metaslabs as this would
146*eda14cbcSMatt Macy  * invalidate the precondition/invariant for subsequent early synctasks.
147*eda14cbcSMatt Macy  * [see dsl_pool_sync() and dsl_early_sync_task_verify()]
148*eda14cbcSMatt Macy  */
149*eda14cbcSMatt Macy int
150*eda14cbcSMatt Macy dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
151*eda14cbcSMatt Macy     dsl_syncfunc_t *syncfunc, void *arg,
152*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check)
153*eda14cbcSMatt Macy {
154*eda14cbcSMatt Macy 	return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
155*eda14cbcSMatt Macy 	    blocks_modified, space_check, B_TRUE));
156*eda14cbcSMatt Macy }
157*eda14cbcSMatt Macy 
158*eda14cbcSMatt Macy /*
159*eda14cbcSMatt Macy  * A standard synctask that can be interrupted from a signal. The sigfunc
160*eda14cbcSMatt Macy  * is called once if a signal occurred while waiting for the task to sync.
161*eda14cbcSMatt Macy  */
162*eda14cbcSMatt Macy int
163*eda14cbcSMatt Macy dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc,
164*eda14cbcSMatt Macy     dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
165*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check)
166*eda14cbcSMatt Macy {
167*eda14cbcSMatt Macy 	return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg,
168*eda14cbcSMatt Macy 	    blocks_modified, space_check, B_FALSE));
169*eda14cbcSMatt Macy }
170*eda14cbcSMatt Macy 
171*eda14cbcSMatt Macy static void
172*eda14cbcSMatt Macy dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
173*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx,
174*eda14cbcSMatt Macy     boolean_t early)
175*eda14cbcSMatt Macy {
176*eda14cbcSMatt Macy 	dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
177*eda14cbcSMatt Macy 
178*eda14cbcSMatt Macy 	dst->dst_pool = dp;
179*eda14cbcSMatt Macy 	dst->dst_txg = dmu_tx_get_txg(tx);
180*eda14cbcSMatt Macy 	dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
181*eda14cbcSMatt Macy 	dst->dst_space_check = space_check;
182*eda14cbcSMatt Macy 	dst->dst_checkfunc = dsl_null_checkfunc;
183*eda14cbcSMatt Macy 	dst->dst_syncfunc = syncfunc;
184*eda14cbcSMatt Macy 	dst->dst_arg = arg;
185*eda14cbcSMatt Macy 	dst->dst_error = 0;
186*eda14cbcSMatt Macy 	dst->dst_nowaiter = B_TRUE;
187*eda14cbcSMatt Macy 
188*eda14cbcSMatt Macy 	txg_list_t *task_list = (early) ?
189*eda14cbcSMatt Macy 	    &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
190*eda14cbcSMatt Macy 	VERIFY(txg_list_add_tail(task_list, dst, dst->dst_txg));
191*eda14cbcSMatt Macy }
192*eda14cbcSMatt Macy 
193*eda14cbcSMatt Macy void
194*eda14cbcSMatt Macy dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
195*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
196*eda14cbcSMatt Macy {
197*eda14cbcSMatt Macy 	dsl_sync_task_nowait_common(dp, syncfunc, arg,
198*eda14cbcSMatt Macy 	    blocks_modified, space_check, tx, B_FALSE);
199*eda14cbcSMatt Macy }
200*eda14cbcSMatt Macy 
201*eda14cbcSMatt Macy void
202*eda14cbcSMatt Macy dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
203*eda14cbcSMatt Macy     int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
204*eda14cbcSMatt Macy {
205*eda14cbcSMatt Macy 	dsl_sync_task_nowait_common(dp, syncfunc, arg,
206*eda14cbcSMatt Macy 	    blocks_modified, space_check, tx, B_TRUE);
207*eda14cbcSMatt Macy }
208*eda14cbcSMatt Macy 
209*eda14cbcSMatt Macy /*
210*eda14cbcSMatt Macy  * Called in syncing context to execute the synctask.
211*eda14cbcSMatt Macy  */
212*eda14cbcSMatt Macy void
213*eda14cbcSMatt Macy dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
214*eda14cbcSMatt Macy {
215*eda14cbcSMatt Macy 	dsl_pool_t *dp = dst->dst_pool;
216*eda14cbcSMatt Macy 
217*eda14cbcSMatt Macy 	ASSERT0(dst->dst_error);
218*eda14cbcSMatt Macy 
219*eda14cbcSMatt Macy 	/*
220*eda14cbcSMatt Macy 	 * Check for sufficient space.
221*eda14cbcSMatt Macy 	 *
222*eda14cbcSMatt Macy 	 * When the sync task was created, the caller specified the
223*eda14cbcSMatt Macy 	 * type of space checking required.  See the comment in
224*eda14cbcSMatt Macy 	 * zfs_space_check_t for details on the semantics of each
225*eda14cbcSMatt Macy 	 * type of space checking.
226*eda14cbcSMatt Macy 	 *
227*eda14cbcSMatt Macy 	 * We just check against what's on-disk; we don't want any
228*eda14cbcSMatt Macy 	 * in-flight accounting to get in our way, because open context
229*eda14cbcSMatt Macy 	 * may have already used up various in-core limits
230*eda14cbcSMatt Macy 	 * (arc_tempreserve, dsl_pool_tempreserve).
231*eda14cbcSMatt Macy 	 */
232*eda14cbcSMatt Macy 	if (dst->dst_space_check != ZFS_SPACE_CHECK_NONE) {
233*eda14cbcSMatt Macy 		uint64_t quota = dsl_pool_unreserved_space(dp,
234*eda14cbcSMatt Macy 		    dst->dst_space_check);
235*eda14cbcSMatt Macy 		uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes;
236*eda14cbcSMatt Macy 
237*eda14cbcSMatt Macy 		/* MOS space is triple-dittoed, so we multiply by 3. */
238*eda14cbcSMatt Macy 		if (used + dst->dst_space * 3 > quota) {
239*eda14cbcSMatt Macy 			dst->dst_error = SET_ERROR(ENOSPC);
240*eda14cbcSMatt Macy 			if (dst->dst_nowaiter)
241*eda14cbcSMatt Macy 				kmem_free(dst, sizeof (*dst));
242*eda14cbcSMatt Macy 			return;
243*eda14cbcSMatt Macy 		}
244*eda14cbcSMatt Macy 	}
245*eda14cbcSMatt Macy 
246*eda14cbcSMatt Macy 	/*
247*eda14cbcSMatt Macy 	 * Check for errors by calling checkfunc.
248*eda14cbcSMatt Macy 	 */
249*eda14cbcSMatt Macy 	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
250*eda14cbcSMatt Macy 	dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
251*eda14cbcSMatt Macy 	if (dst->dst_error == 0)
252*eda14cbcSMatt Macy 		dst->dst_syncfunc(dst->dst_arg, tx);
253*eda14cbcSMatt Macy 	rrw_exit(&dp->dp_config_rwlock, FTAG);
254*eda14cbcSMatt Macy 	if (dst->dst_nowaiter)
255*eda14cbcSMatt Macy 		kmem_free(dst, sizeof (*dst));
256*eda14cbcSMatt Macy }
257*eda14cbcSMatt Macy 
258*eda14cbcSMatt Macy #if defined(_KERNEL)
259*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_sync_task);
260*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_sync_task_nowait);
261*eda14cbcSMatt Macy #endif
262