1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*eda14cbcSMatt Macy * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy #include <sys/dmu.h> 27*eda14cbcSMatt Macy #include <sys/dmu_tx.h> 28*eda14cbcSMatt Macy #include <sys/dsl_pool.h> 29*eda14cbcSMatt Macy #include <sys/dsl_dir.h> 30*eda14cbcSMatt Macy #include <sys/dsl_synctask.h> 31*eda14cbcSMatt Macy #include <sys/metaslab.h> 32*eda14cbcSMatt Macy 33*eda14cbcSMatt Macy #define DST_AVG_BLKSHIFT 14 34*eda14cbcSMatt Macy 35*eda14cbcSMatt Macy /* ARGSUSED */ 36*eda14cbcSMatt Macy static int 37*eda14cbcSMatt Macy dsl_null_checkfunc(void *arg, dmu_tx_t *tx) 38*eda14cbcSMatt Macy { 39*eda14cbcSMatt Macy return (0); 40*eda14cbcSMatt Macy } 41*eda14cbcSMatt Macy 42*eda14cbcSMatt Macy static int 43*eda14cbcSMatt Macy dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, 44*eda14cbcSMatt Macy dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, 45*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check, boolean_t early) 46*eda14cbcSMatt Macy { 47*eda14cbcSMatt Macy spa_t *spa; 48*eda14cbcSMatt Macy dmu_tx_t *tx; 49*eda14cbcSMatt Macy int err; 50*eda14cbcSMatt Macy dsl_sync_task_t dst = { { { NULL } } }; 51*eda14cbcSMatt Macy dsl_pool_t *dp; 52*eda14cbcSMatt Macy 53*eda14cbcSMatt Macy err = spa_open(pool, &spa, FTAG); 54*eda14cbcSMatt Macy if (err != 0) 55*eda14cbcSMatt Macy return (err); 56*eda14cbcSMatt Macy dp = spa_get_dsl(spa); 57*eda14cbcSMatt Macy 58*eda14cbcSMatt Macy top: 59*eda14cbcSMatt Macy tx = dmu_tx_create_dd(dp->dp_mos_dir); 60*eda14cbcSMatt Macy VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); 61*eda14cbcSMatt Macy 62*eda14cbcSMatt Macy dst.dst_pool = dp; 63*eda14cbcSMatt Macy dst.dst_txg = dmu_tx_get_txg(tx); 64*eda14cbcSMatt Macy dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT; 65*eda14cbcSMatt Macy dst.dst_space_check = space_check; 66*eda14cbcSMatt Macy dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc; 67*eda14cbcSMatt Macy dst.dst_syncfunc = syncfunc; 68*eda14cbcSMatt Macy dst.dst_arg = arg; 69*eda14cbcSMatt Macy dst.dst_error = 0; 70*eda14cbcSMatt Macy dst.dst_nowaiter = B_FALSE; 71*eda14cbcSMatt Macy 72*eda14cbcSMatt Macy dsl_pool_config_enter(dp, FTAG); 73*eda14cbcSMatt Macy err = dst.dst_checkfunc(arg, tx); 74*eda14cbcSMatt Macy dsl_pool_config_exit(dp, FTAG); 75*eda14cbcSMatt Macy 76*eda14cbcSMatt Macy if (err != 0) { 77*eda14cbcSMatt Macy dmu_tx_commit(tx); 78*eda14cbcSMatt Macy spa_close(spa, FTAG); 79*eda14cbcSMatt Macy return (err); 80*eda14cbcSMatt Macy } 81*eda14cbcSMatt Macy 82*eda14cbcSMatt Macy txg_list_t *task_list = (early) ? 83*eda14cbcSMatt Macy &dp->dp_early_sync_tasks : &dp->dp_sync_tasks; 84*eda14cbcSMatt Macy VERIFY(txg_list_add_tail(task_list, &dst, dst.dst_txg)); 85*eda14cbcSMatt Macy 86*eda14cbcSMatt Macy dmu_tx_commit(tx); 87*eda14cbcSMatt Macy 88*eda14cbcSMatt Macy if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) { 89*eda14cbcSMatt Macy /* current contract is to call func once */ 90*eda14cbcSMatt Macy sigfunc(arg, tx); 91*eda14cbcSMatt Macy sigfunc = NULL; /* in case we're performing an EAGAIN retry */ 92*eda14cbcSMatt Macy } 93*eda14cbcSMatt Macy txg_wait_synced(dp, dst.dst_txg); 94*eda14cbcSMatt Macy 95*eda14cbcSMatt Macy if (dst.dst_error == EAGAIN) { 96*eda14cbcSMatt Macy txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE); 97*eda14cbcSMatt Macy goto top; 98*eda14cbcSMatt Macy } 99*eda14cbcSMatt Macy 100*eda14cbcSMatt Macy spa_close(spa, FTAG); 101*eda14cbcSMatt Macy return (dst.dst_error); 102*eda14cbcSMatt Macy } 103*eda14cbcSMatt Macy 104*eda14cbcSMatt Macy /* 105*eda14cbcSMatt Macy * Called from open context to perform a callback in syncing context. Waits 106*eda14cbcSMatt Macy * for the operation to complete. 107*eda14cbcSMatt Macy * 108*eda14cbcSMatt Macy * The checkfunc will be called from open context as a preliminary check 109*eda14cbcSMatt Macy * which can quickly fail. If it succeeds, it will be called again from 110*eda14cbcSMatt Macy * syncing context. The checkfunc should generally be designed to work 111*eda14cbcSMatt Macy * properly in either context, but if necessary it can check 112*eda14cbcSMatt Macy * dmu_tx_is_syncing(tx). 113*eda14cbcSMatt Macy * 114*eda14cbcSMatt Macy * The synctask infrastructure enforces proper locking strategy with respect 115*eda14cbcSMatt Macy * to the dp_config_rwlock -- the lock will always be held when the callbacks 116*eda14cbcSMatt Macy * are called. It will be held for read during the open-context (preliminary) 117*eda14cbcSMatt Macy * call to the checkfunc, and then held for write from syncing context during 118*eda14cbcSMatt Macy * the calls to the check and sync funcs. 119*eda14cbcSMatt Macy * 120*eda14cbcSMatt Macy * A dataset or pool name can be passed as the first argument. Typically, 121*eda14cbcSMatt Macy * the check func will hold, check the return value of the hold, and then 122*eda14cbcSMatt Macy * release the dataset. The sync func will VERIFYO(hold()) the dataset. 123*eda14cbcSMatt Macy * This is safe because no changes can be made between the check and sync funcs, 124*eda14cbcSMatt Macy * and the sync func will only be called if the check func successfully opened 125*eda14cbcSMatt Macy * the dataset. 126*eda14cbcSMatt Macy */ 127*eda14cbcSMatt Macy int 128*eda14cbcSMatt Macy dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, 129*eda14cbcSMatt Macy dsl_syncfunc_t *syncfunc, void *arg, 130*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check) 131*eda14cbcSMatt Macy { 132*eda14cbcSMatt Macy return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, 133*eda14cbcSMatt Macy blocks_modified, space_check, B_FALSE)); 134*eda14cbcSMatt Macy } 135*eda14cbcSMatt Macy 136*eda14cbcSMatt Macy /* 137*eda14cbcSMatt Macy * An early synctask works exactly as a standard synctask with one important 138*eda14cbcSMatt Macy * difference on the way it is handled during syncing context. Standard 139*eda14cbcSMatt Macy * synctasks run after we've written out all the dirty blocks of dirty 140*eda14cbcSMatt Macy * datasets. Early synctasks are executed before writing out any dirty data, 141*eda14cbcSMatt Macy * and thus before standard synctasks. 142*eda14cbcSMatt Macy * 143*eda14cbcSMatt Macy * For that reason, early synctasks can affect the process of writing dirty 144*eda14cbcSMatt Macy * changes to disk for the txg that they run and should be used with caution. 145*eda14cbcSMatt Macy * In addition, early synctasks should not dirty any metaslabs as this would 146*eda14cbcSMatt Macy * invalidate the precondition/invariant for subsequent early synctasks. 147*eda14cbcSMatt Macy * [see dsl_pool_sync() and dsl_early_sync_task_verify()] 148*eda14cbcSMatt Macy */ 149*eda14cbcSMatt Macy int 150*eda14cbcSMatt Macy dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, 151*eda14cbcSMatt Macy dsl_syncfunc_t *syncfunc, void *arg, 152*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check) 153*eda14cbcSMatt Macy { 154*eda14cbcSMatt Macy return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, 155*eda14cbcSMatt Macy blocks_modified, space_check, B_TRUE)); 156*eda14cbcSMatt Macy } 157*eda14cbcSMatt Macy 158*eda14cbcSMatt Macy /* 159*eda14cbcSMatt Macy * A standard synctask that can be interrupted from a signal. The sigfunc 160*eda14cbcSMatt Macy * is called once if a signal occurred while waiting for the task to sync. 161*eda14cbcSMatt Macy */ 162*eda14cbcSMatt Macy int 163*eda14cbcSMatt Macy dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc, 164*eda14cbcSMatt Macy dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, 165*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check) 166*eda14cbcSMatt Macy { 167*eda14cbcSMatt Macy return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg, 168*eda14cbcSMatt Macy blocks_modified, space_check, B_FALSE)); 169*eda14cbcSMatt Macy } 170*eda14cbcSMatt Macy 171*eda14cbcSMatt Macy static void 172*eda14cbcSMatt Macy dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, 173*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx, 174*eda14cbcSMatt Macy boolean_t early) 175*eda14cbcSMatt Macy { 176*eda14cbcSMatt Macy dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP); 177*eda14cbcSMatt Macy 178*eda14cbcSMatt Macy dst->dst_pool = dp; 179*eda14cbcSMatt Macy dst->dst_txg = dmu_tx_get_txg(tx); 180*eda14cbcSMatt Macy dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT; 181*eda14cbcSMatt Macy dst->dst_space_check = space_check; 182*eda14cbcSMatt Macy dst->dst_checkfunc = dsl_null_checkfunc; 183*eda14cbcSMatt Macy dst->dst_syncfunc = syncfunc; 184*eda14cbcSMatt Macy dst->dst_arg = arg; 185*eda14cbcSMatt Macy dst->dst_error = 0; 186*eda14cbcSMatt Macy dst->dst_nowaiter = B_TRUE; 187*eda14cbcSMatt Macy 188*eda14cbcSMatt Macy txg_list_t *task_list = (early) ? 189*eda14cbcSMatt Macy &dp->dp_early_sync_tasks : &dp->dp_sync_tasks; 190*eda14cbcSMatt Macy VERIFY(txg_list_add_tail(task_list, dst, dst->dst_txg)); 191*eda14cbcSMatt Macy } 192*eda14cbcSMatt Macy 193*eda14cbcSMatt Macy void 194*eda14cbcSMatt Macy dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, 195*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx) 196*eda14cbcSMatt Macy { 197*eda14cbcSMatt Macy dsl_sync_task_nowait_common(dp, syncfunc, arg, 198*eda14cbcSMatt Macy blocks_modified, space_check, tx, B_FALSE); 199*eda14cbcSMatt Macy } 200*eda14cbcSMatt Macy 201*eda14cbcSMatt Macy void 202*eda14cbcSMatt Macy dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, 203*eda14cbcSMatt Macy int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx) 204*eda14cbcSMatt Macy { 205*eda14cbcSMatt Macy dsl_sync_task_nowait_common(dp, syncfunc, arg, 206*eda14cbcSMatt Macy blocks_modified, space_check, tx, B_TRUE); 207*eda14cbcSMatt Macy } 208*eda14cbcSMatt Macy 209*eda14cbcSMatt Macy /* 210*eda14cbcSMatt Macy * Called in syncing context to execute the synctask. 211*eda14cbcSMatt Macy */ 212*eda14cbcSMatt Macy void 213*eda14cbcSMatt Macy dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx) 214*eda14cbcSMatt Macy { 215*eda14cbcSMatt Macy dsl_pool_t *dp = dst->dst_pool; 216*eda14cbcSMatt Macy 217*eda14cbcSMatt Macy ASSERT0(dst->dst_error); 218*eda14cbcSMatt Macy 219*eda14cbcSMatt Macy /* 220*eda14cbcSMatt Macy * Check for sufficient space. 221*eda14cbcSMatt Macy * 222*eda14cbcSMatt Macy * When the sync task was created, the caller specified the 223*eda14cbcSMatt Macy * type of space checking required. See the comment in 224*eda14cbcSMatt Macy * zfs_space_check_t for details on the semantics of each 225*eda14cbcSMatt Macy * type of space checking. 226*eda14cbcSMatt Macy * 227*eda14cbcSMatt Macy * We just check against what's on-disk; we don't want any 228*eda14cbcSMatt Macy * in-flight accounting to get in our way, because open context 229*eda14cbcSMatt Macy * may have already used up various in-core limits 230*eda14cbcSMatt Macy * (arc_tempreserve, dsl_pool_tempreserve). 231*eda14cbcSMatt Macy */ 232*eda14cbcSMatt Macy if (dst->dst_space_check != ZFS_SPACE_CHECK_NONE) { 233*eda14cbcSMatt Macy uint64_t quota = dsl_pool_unreserved_space(dp, 234*eda14cbcSMatt Macy dst->dst_space_check); 235*eda14cbcSMatt Macy uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes; 236*eda14cbcSMatt Macy 237*eda14cbcSMatt Macy /* MOS space is triple-dittoed, so we multiply by 3. */ 238*eda14cbcSMatt Macy if (used + dst->dst_space * 3 > quota) { 239*eda14cbcSMatt Macy dst->dst_error = SET_ERROR(ENOSPC); 240*eda14cbcSMatt Macy if (dst->dst_nowaiter) 241*eda14cbcSMatt Macy kmem_free(dst, sizeof (*dst)); 242*eda14cbcSMatt Macy return; 243*eda14cbcSMatt Macy } 244*eda14cbcSMatt Macy } 245*eda14cbcSMatt Macy 246*eda14cbcSMatt Macy /* 247*eda14cbcSMatt Macy * Check for errors by calling checkfunc. 248*eda14cbcSMatt Macy */ 249*eda14cbcSMatt Macy rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); 250*eda14cbcSMatt Macy dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx); 251*eda14cbcSMatt Macy if (dst->dst_error == 0) 252*eda14cbcSMatt Macy dst->dst_syncfunc(dst->dst_arg, tx); 253*eda14cbcSMatt Macy rrw_exit(&dp->dp_config_rwlock, FTAG); 254*eda14cbcSMatt Macy if (dst->dst_nowaiter) 255*eda14cbcSMatt Macy kmem_free(dst, sizeof (*dst)); 256*eda14cbcSMatt Macy } 257*eda14cbcSMatt Macy 258*eda14cbcSMatt Macy #if defined(_KERNEL) 259*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_sync_task); 260*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_sync_task_nowait); 261*eda14cbcSMatt Macy #endif 262