1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy * CDDL HEADER START
3eda14cbcSMatt Macy *
4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy *
8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy * See the License for the specific language governing permissions
11eda14cbcSMatt Macy * and limitations under the License.
12eda14cbcSMatt Macy *
13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy *
19eda14cbcSMatt Macy * CDDL HEADER END
20eda14cbcSMatt Macy */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23eda14cbcSMatt Macy * Portions Copyright 2011 Martin Matuska
24eda14cbcSMatt Macy * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
25eda14cbcSMatt Macy */
26eda14cbcSMatt Macy
27eda14cbcSMatt Macy #include <sys/zfs_context.h>
28eda14cbcSMatt Macy #include <sys/txg_impl.h>
29eda14cbcSMatt Macy #include <sys/dmu_impl.h>
30eda14cbcSMatt Macy #include <sys/spa_impl.h>
31eda14cbcSMatt Macy #include <sys/dmu_tx.h>
32eda14cbcSMatt Macy #include <sys/dsl_pool.h>
33eda14cbcSMatt Macy #include <sys/dsl_scan.h>
34eda14cbcSMatt Macy #include <sys/zil.h>
35eda14cbcSMatt Macy #include <sys/callb.h>
36eda14cbcSMatt Macy #include <sys/trace_zfs.h>
37eda14cbcSMatt Macy
38eda14cbcSMatt Macy /*
39eda14cbcSMatt Macy * ZFS Transaction Groups
40eda14cbcSMatt Macy * ----------------------
41eda14cbcSMatt Macy *
42eda14cbcSMatt Macy * ZFS transaction groups are, as the name implies, groups of transactions
43eda14cbcSMatt Macy * that act on persistent state. ZFS asserts consistency at the granularity of
44eda14cbcSMatt Macy * these transaction groups. Each successive transaction group (txg) is
45eda14cbcSMatt Macy * assigned a 64-bit consecutive identifier. There are three active
46eda14cbcSMatt Macy * transaction group states: open, quiescing, or syncing. At any given time,
47eda14cbcSMatt Macy * there may be an active txg associated with each state; each active txg may
48eda14cbcSMatt Macy * either be processing, or blocked waiting to enter the next state. There may
49eda14cbcSMatt Macy * be up to three active txgs, and there is always a txg in the open state
50eda14cbcSMatt Macy * (though it may be blocked waiting to enter the quiescing state). In broad
51eda14cbcSMatt Macy * strokes, transactions -- operations that change in-memory structures -- are
52eda14cbcSMatt Macy * accepted into the txg in the open state, and are completed while the txg is
53eda14cbcSMatt Macy * in the open or quiescing states. The accumulated changes are written to
54eda14cbcSMatt Macy * disk in the syncing state.
55eda14cbcSMatt Macy *
56eda14cbcSMatt Macy * Open
57eda14cbcSMatt Macy *
58eda14cbcSMatt Macy * When a new txg becomes active, it first enters the open state. New
59eda14cbcSMatt Macy * transactions -- updates to in-memory structures -- are assigned to the
60eda14cbcSMatt Macy * currently open txg. There is always a txg in the open state so that ZFS can
61eda14cbcSMatt Macy * accept new changes (though the txg may refuse new changes if it has hit
62eda14cbcSMatt Macy * some limit). ZFS advances the open txg to the next state for a variety of
63eda14cbcSMatt Macy * reasons such as it hitting a time or size threshold, or the execution of an
64eda14cbcSMatt Macy * administrative action that must be completed in the syncing state.
65eda14cbcSMatt Macy *
66eda14cbcSMatt Macy * Quiescing
67eda14cbcSMatt Macy *
68eda14cbcSMatt Macy * After a txg exits the open state, it enters the quiescing state. The
69eda14cbcSMatt Macy * quiescing state is intended to provide a buffer between accepting new
70eda14cbcSMatt Macy * transactions in the open state and writing them out to stable storage in
71eda14cbcSMatt Macy * the syncing state. While quiescing, transactions can continue their
72eda14cbcSMatt Macy * operation without delaying either of the other states. Typically, a txg is
73eda14cbcSMatt Macy * in the quiescing state very briefly since the operations are bounded by
74eda14cbcSMatt Macy * software latencies rather than, say, slower I/O latencies. After all
75eda14cbcSMatt Macy * transactions complete, the txg is ready to enter the next state.
76eda14cbcSMatt Macy *
77eda14cbcSMatt Macy * Syncing
78eda14cbcSMatt Macy *
79eda14cbcSMatt Macy * In the syncing state, the in-memory state built up during the open and (to
80eda14cbcSMatt Macy * a lesser degree) the quiescing states is written to stable storage. The
81eda14cbcSMatt Macy * process of writing out modified data can, in turn modify more data. For
82eda14cbcSMatt Macy * example when we write new blocks, we need to allocate space for them; those
83eda14cbcSMatt Macy * allocations modify metadata (space maps)... which themselves must be
84eda14cbcSMatt Macy * written to stable storage. During the sync state, ZFS iterates, writing out
85eda14cbcSMatt Macy * data until it converges and all in-memory changes have been written out.
86eda14cbcSMatt Macy * The first such pass is the largest as it encompasses all the modified user
87eda14cbcSMatt Macy * data (as opposed to filesystem metadata). Subsequent passes typically have
88eda14cbcSMatt Macy * far less data to write as they consist exclusively of filesystem metadata.
89eda14cbcSMatt Macy *
90eda14cbcSMatt Macy * To ensure convergence, after a certain number of passes ZFS begins
91eda14cbcSMatt Macy * overwriting locations on stable storage that had been allocated earlier in
92eda14cbcSMatt Macy * the syncing state (and subsequently freed). ZFS usually allocates new
93eda14cbcSMatt Macy * blocks to optimize for large, continuous, writes. For the syncing state to
94eda14cbcSMatt Macy * converge however it must complete a pass where no new blocks are allocated
95eda14cbcSMatt Macy * since each allocation requires a modification of persistent metadata.
96eda14cbcSMatt Macy * Further, to hasten convergence, after a prescribed number of passes, ZFS
97eda14cbcSMatt Macy * also defers frees, and stops compressing.
98eda14cbcSMatt Macy *
99eda14cbcSMatt Macy * In addition to writing out user data, we must also execute synctasks during
100eda14cbcSMatt Macy * the syncing context. A synctask is the mechanism by which some
101eda14cbcSMatt Macy * administrative activities work such as creating and destroying snapshots or
102eda14cbcSMatt Macy * datasets. Note that when a synctask is initiated it enters the open txg,
103eda14cbcSMatt Macy * and ZFS then pushes that txg as quickly as possible to completion of the
104eda14cbcSMatt Macy * syncing state in order to reduce the latency of the administrative
105eda14cbcSMatt Macy * activity. To complete the syncing state, ZFS writes out a new uberblock,
106eda14cbcSMatt Macy * the root of the tree of blocks that comprise all state stored on the ZFS
107eda14cbcSMatt Macy * pool. Finally, if there is a quiesced txg waiting, we signal that it can
108eda14cbcSMatt Macy * now transition to the syncing state.
109eda14cbcSMatt Macy */
110eda14cbcSMatt Macy
111da5137abSMartin Matuska static __attribute__((noreturn)) void txg_sync_thread(void *arg);
112da5137abSMartin Matuska static __attribute__((noreturn)) void txg_quiesce_thread(void *arg);
113eda14cbcSMatt Macy
114be181ee2SMartin Matuska uint_t zfs_txg_timeout = 5; /* max seconds worth of delta per txg */
115eda14cbcSMatt Macy
116eda14cbcSMatt Macy /*
117eda14cbcSMatt Macy * Prepare the txg subsystem.
118eda14cbcSMatt Macy */
119eda14cbcSMatt Macy void
txg_init(dsl_pool_t * dp,uint64_t txg)120eda14cbcSMatt Macy txg_init(dsl_pool_t *dp, uint64_t txg)
121eda14cbcSMatt Macy {
122eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
123eda14cbcSMatt Macy int c;
124da5137abSMartin Matuska memset(tx, 0, sizeof (tx_state_t));
125eda14cbcSMatt Macy
126eda14cbcSMatt Macy tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
127eda14cbcSMatt Macy
128eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
129eda14cbcSMatt Macy int i;
130eda14cbcSMatt Macy
131eda14cbcSMatt Macy mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
132eda14cbcSMatt Macy mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_NOLOCKDEP,
133eda14cbcSMatt Macy NULL);
134eda14cbcSMatt Macy for (i = 0; i < TXG_SIZE; i++) {
135eda14cbcSMatt Macy cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT,
136eda14cbcSMatt Macy NULL);
137eda14cbcSMatt Macy list_create(&tx->tx_cpu[c].tc_callbacks[i],
138eda14cbcSMatt Macy sizeof (dmu_tx_callback_t),
139eda14cbcSMatt Macy offsetof(dmu_tx_callback_t, dcb_node));
140eda14cbcSMatt Macy }
141eda14cbcSMatt Macy }
142eda14cbcSMatt Macy
143eda14cbcSMatt Macy mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL);
144eda14cbcSMatt Macy
145eda14cbcSMatt Macy cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL);
146eda14cbcSMatt Macy cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL);
147eda14cbcSMatt Macy cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL);
148eda14cbcSMatt Macy cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL);
149eda14cbcSMatt Macy cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL);
150eda14cbcSMatt Macy
151eda14cbcSMatt Macy tx->tx_open_txg = txg;
152eda14cbcSMatt Macy }
153eda14cbcSMatt Macy
154eda14cbcSMatt Macy /*
155eda14cbcSMatt Macy * Close down the txg subsystem.
156eda14cbcSMatt Macy */
157eda14cbcSMatt Macy void
txg_fini(dsl_pool_t * dp)158eda14cbcSMatt Macy txg_fini(dsl_pool_t *dp)
159eda14cbcSMatt Macy {
160eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
161eda14cbcSMatt Macy int c;
162eda14cbcSMatt Macy
163eda14cbcSMatt Macy ASSERT0(tx->tx_threads);
164eda14cbcSMatt Macy
165eda14cbcSMatt Macy mutex_destroy(&tx->tx_sync_lock);
166eda14cbcSMatt Macy
167eda14cbcSMatt Macy cv_destroy(&tx->tx_sync_more_cv);
168eda14cbcSMatt Macy cv_destroy(&tx->tx_sync_done_cv);
169eda14cbcSMatt Macy cv_destroy(&tx->tx_quiesce_more_cv);
170eda14cbcSMatt Macy cv_destroy(&tx->tx_quiesce_done_cv);
171eda14cbcSMatt Macy cv_destroy(&tx->tx_exit_cv);
172eda14cbcSMatt Macy
173eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
174eda14cbcSMatt Macy int i;
175eda14cbcSMatt Macy
176eda14cbcSMatt Macy mutex_destroy(&tx->tx_cpu[c].tc_open_lock);
177eda14cbcSMatt Macy mutex_destroy(&tx->tx_cpu[c].tc_lock);
178eda14cbcSMatt Macy for (i = 0; i < TXG_SIZE; i++) {
179eda14cbcSMatt Macy cv_destroy(&tx->tx_cpu[c].tc_cv[i]);
180eda14cbcSMatt Macy list_destroy(&tx->tx_cpu[c].tc_callbacks[i]);
181eda14cbcSMatt Macy }
182eda14cbcSMatt Macy }
183eda14cbcSMatt Macy
184eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq != NULL)
185eda14cbcSMatt Macy taskq_destroy(tx->tx_commit_cb_taskq);
186eda14cbcSMatt Macy
187eda14cbcSMatt Macy vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
188eda14cbcSMatt Macy
189da5137abSMartin Matuska memset(tx, 0, sizeof (tx_state_t));
190eda14cbcSMatt Macy }
191eda14cbcSMatt Macy
192eda14cbcSMatt Macy /*
193eda14cbcSMatt Macy * Start syncing transaction groups.
194eda14cbcSMatt Macy */
195eda14cbcSMatt Macy void
txg_sync_start(dsl_pool_t * dp)196eda14cbcSMatt Macy txg_sync_start(dsl_pool_t *dp)
197eda14cbcSMatt Macy {
198eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
199eda14cbcSMatt Macy
200eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
201eda14cbcSMatt Macy
202eda14cbcSMatt Macy dprintf("pool %p\n", dp);
203eda14cbcSMatt Macy
204eda14cbcSMatt Macy ASSERT0(tx->tx_threads);
205eda14cbcSMatt Macy
206eda14cbcSMatt Macy tx->tx_threads = 2;
207eda14cbcSMatt Macy
208eda14cbcSMatt Macy tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
209eda14cbcSMatt Macy dp, 0, &p0, TS_RUN, defclsyspri);
210eda14cbcSMatt Macy
211eda14cbcSMatt Macy /*
212eda14cbcSMatt Macy * The sync thread can need a larger-than-default stack size on
213eda14cbcSMatt Macy * 32-bit x86. This is due in part to nested pools and
214eda14cbcSMatt Macy * scrub_visitbp() recursion.
215eda14cbcSMatt Macy */
216eda14cbcSMatt Macy tx->tx_sync_thread = thread_create(NULL, 0, txg_sync_thread,
217eda14cbcSMatt Macy dp, 0, &p0, TS_RUN, defclsyspri);
218eda14cbcSMatt Macy
219eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
220eda14cbcSMatt Macy }
221eda14cbcSMatt Macy
222eda14cbcSMatt Macy static void
txg_thread_enter(tx_state_t * tx,callb_cpr_t * cpr)223eda14cbcSMatt Macy txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr)
224eda14cbcSMatt Macy {
225eda14cbcSMatt Macy CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG);
226eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
227eda14cbcSMatt Macy }
228eda14cbcSMatt Macy
229eda14cbcSMatt Macy static void
txg_thread_exit(tx_state_t * tx,callb_cpr_t * cpr,kthread_t ** tpp)230eda14cbcSMatt Macy txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp)
231eda14cbcSMatt Macy {
232eda14cbcSMatt Macy ASSERT(*tpp != NULL);
233eda14cbcSMatt Macy *tpp = NULL;
234eda14cbcSMatt Macy tx->tx_threads--;
235eda14cbcSMatt Macy cv_broadcast(&tx->tx_exit_cv);
236eda14cbcSMatt Macy CALLB_CPR_EXIT(cpr); /* drops &tx->tx_sync_lock */
237eda14cbcSMatt Macy thread_exit();
238eda14cbcSMatt Macy }
239eda14cbcSMatt Macy
240eda14cbcSMatt Macy static void
txg_thread_wait(tx_state_t * tx,callb_cpr_t * cpr,kcondvar_t * cv,clock_t time)241eda14cbcSMatt Macy txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time)
242eda14cbcSMatt Macy {
243eda14cbcSMatt Macy CALLB_CPR_SAFE_BEGIN(cpr);
244eda14cbcSMatt Macy
245eda14cbcSMatt Macy if (time) {
2462c48331dSMatt Macy (void) cv_timedwait_idle(cv, &tx->tx_sync_lock,
247eda14cbcSMatt Macy ddi_get_lbolt() + time);
248eda14cbcSMatt Macy } else {
2492c48331dSMatt Macy cv_wait_idle(cv, &tx->tx_sync_lock);
250eda14cbcSMatt Macy }
251eda14cbcSMatt Macy
252eda14cbcSMatt Macy CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
253eda14cbcSMatt Macy }
254eda14cbcSMatt Macy
255eda14cbcSMatt Macy /*
256eda14cbcSMatt Macy * Stop syncing transaction groups.
257eda14cbcSMatt Macy */
258eda14cbcSMatt Macy void
txg_sync_stop(dsl_pool_t * dp)259eda14cbcSMatt Macy txg_sync_stop(dsl_pool_t *dp)
260eda14cbcSMatt Macy {
261eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
262eda14cbcSMatt Macy
263eda14cbcSMatt Macy dprintf("pool %p\n", dp);
264eda14cbcSMatt Macy /*
265eda14cbcSMatt Macy * Finish off any work in progress.
266eda14cbcSMatt Macy */
267eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
268eda14cbcSMatt Macy
269eda14cbcSMatt Macy /*
270eda14cbcSMatt Macy * We need to ensure that we've vacated the deferred metaslab trees.
271eda14cbcSMatt Macy */
272eda14cbcSMatt Macy txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE);
273eda14cbcSMatt Macy
274eda14cbcSMatt Macy /*
275eda14cbcSMatt Macy * Wake all sync threads and wait for them to die.
276eda14cbcSMatt Macy */
277eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
278eda14cbcSMatt Macy
279eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
280eda14cbcSMatt Macy
281eda14cbcSMatt Macy tx->tx_exiting = 1;
282eda14cbcSMatt Macy
283eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
284eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_done_cv);
285eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
286eda14cbcSMatt Macy
287eda14cbcSMatt Macy while (tx->tx_threads != 0)
288eda14cbcSMatt Macy cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock);
289eda14cbcSMatt Macy
290eda14cbcSMatt Macy tx->tx_exiting = 0;
291eda14cbcSMatt Macy
292eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
293eda14cbcSMatt Macy }
294eda14cbcSMatt Macy
295184c1b94SMartin Matuska /*
296184c1b94SMartin Matuska * Get a handle on the currently open txg and keep it open.
297184c1b94SMartin Matuska *
298184c1b94SMartin Matuska * The txg is guaranteed to stay open until txg_rele_to_quiesce() is called for
299184c1b94SMartin Matuska * the handle. Once txg_rele_to_quiesce() has been called, the txg stays
300184c1b94SMartin Matuska * in quiescing state until txg_rele_to_sync() is called for the handle.
301184c1b94SMartin Matuska *
302184c1b94SMartin Matuska * It is guaranteed that subsequent calls return monotonically increasing
303184c1b94SMartin Matuska * txgs for the same dsl_pool_t. Of course this is not strong monotonicity,
304184c1b94SMartin Matuska * because the same txg can be returned multiple times in a row. This
305184c1b94SMartin Matuska * guarantee holds both for subsequent calls from one thread and for multiple
306184c1b94SMartin Matuska * threads. For example, it is impossible to observe the following sequence
307184c1b94SMartin Matuska * of events:
308184c1b94SMartin Matuska *
309184c1b94SMartin Matuska * Thread 1 Thread 2
310184c1b94SMartin Matuska *
311184c1b94SMartin Matuska * 1 <- txg_hold_open(P, ...)
312184c1b94SMartin Matuska * 2 <- txg_hold_open(P, ...)
313184c1b94SMartin Matuska * 1 <- txg_hold_open(P, ...)
314184c1b94SMartin Matuska *
315184c1b94SMartin Matuska */
316eda14cbcSMatt Macy uint64_t
txg_hold_open(dsl_pool_t * dp,txg_handle_t * th)317eda14cbcSMatt Macy txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
318eda14cbcSMatt Macy {
319eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
320eda14cbcSMatt Macy tx_cpu_t *tc;
321eda14cbcSMatt Macy uint64_t txg;
322eda14cbcSMatt Macy
323eda14cbcSMatt Macy /*
324eda14cbcSMatt Macy * It appears the processor id is simply used as a "random"
325eda14cbcSMatt Macy * number to index into the array, and there isn't any other
326eda14cbcSMatt Macy * significance to the chosen tx_cpu. Because.. Why not use
327eda14cbcSMatt Macy * the current cpu to index into the array?
328eda14cbcSMatt Macy */
3297877fdebSMatt Macy tc = &tx->tx_cpu[CPU_SEQID_UNSTABLE];
330eda14cbcSMatt Macy
331eda14cbcSMatt Macy mutex_enter(&tc->tc_open_lock);
332eda14cbcSMatt Macy txg = tx->tx_open_txg;
333eda14cbcSMatt Macy
334eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
335eda14cbcSMatt Macy tc->tc_count[txg & TXG_MASK]++;
336eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
337eda14cbcSMatt Macy
338eda14cbcSMatt Macy th->th_cpu = tc;
339eda14cbcSMatt Macy th->th_txg = txg;
340eda14cbcSMatt Macy
341eda14cbcSMatt Macy return (txg);
342eda14cbcSMatt Macy }
343eda14cbcSMatt Macy
344eda14cbcSMatt Macy void
txg_rele_to_quiesce(txg_handle_t * th)345eda14cbcSMatt Macy txg_rele_to_quiesce(txg_handle_t *th)
346eda14cbcSMatt Macy {
347eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
348eda14cbcSMatt Macy
349eda14cbcSMatt Macy ASSERT(!MUTEX_HELD(&tc->tc_lock));
350eda14cbcSMatt Macy mutex_exit(&tc->tc_open_lock);
351eda14cbcSMatt Macy }
352eda14cbcSMatt Macy
353eda14cbcSMatt Macy void
txg_register_callbacks(txg_handle_t * th,list_t * tx_callbacks)354eda14cbcSMatt Macy txg_register_callbacks(txg_handle_t *th, list_t *tx_callbacks)
355eda14cbcSMatt Macy {
356eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
357eda14cbcSMatt Macy int g = th->th_txg & TXG_MASK;
358eda14cbcSMatt Macy
359eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
360eda14cbcSMatt Macy list_move_tail(&tc->tc_callbacks[g], tx_callbacks);
361eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
362eda14cbcSMatt Macy }
363eda14cbcSMatt Macy
364eda14cbcSMatt Macy void
txg_rele_to_sync(txg_handle_t * th)365eda14cbcSMatt Macy txg_rele_to_sync(txg_handle_t *th)
366eda14cbcSMatt Macy {
367eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
368eda14cbcSMatt Macy int g = th->th_txg & TXG_MASK;
369eda14cbcSMatt Macy
370eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
371eda14cbcSMatt Macy ASSERT(tc->tc_count[g] != 0);
372eda14cbcSMatt Macy if (--tc->tc_count[g] == 0)
373eda14cbcSMatt Macy cv_broadcast(&tc->tc_cv[g]);
374eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
375eda14cbcSMatt Macy
376eda14cbcSMatt Macy th->th_cpu = NULL; /* defensive */
377eda14cbcSMatt Macy }
378eda14cbcSMatt Macy
379eda14cbcSMatt Macy /*
380eda14cbcSMatt Macy * Blocks until all transactions in the group are committed.
381eda14cbcSMatt Macy *
382eda14cbcSMatt Macy * On return, the transaction group has reached a stable state in which it can
383eda14cbcSMatt Macy * then be passed off to the syncing context.
384eda14cbcSMatt Macy */
385eda14cbcSMatt Macy static void
txg_quiesce(dsl_pool_t * dp,uint64_t txg)386eda14cbcSMatt Macy txg_quiesce(dsl_pool_t *dp, uint64_t txg)
387eda14cbcSMatt Macy {
388eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
389eda14cbcSMatt Macy uint64_t tx_open_time;
390eda14cbcSMatt Macy int g = txg & TXG_MASK;
391eda14cbcSMatt Macy int c;
392eda14cbcSMatt Macy
393eda14cbcSMatt Macy /*
394eda14cbcSMatt Macy * Grab all tc_open_locks so nobody else can get into this txg.
395eda14cbcSMatt Macy */
396eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++)
397eda14cbcSMatt Macy mutex_enter(&tx->tx_cpu[c].tc_open_lock);
398eda14cbcSMatt Macy
399eda14cbcSMatt Macy ASSERT(txg == tx->tx_open_txg);
400eda14cbcSMatt Macy tx->tx_open_txg++;
401eda14cbcSMatt Macy tx->tx_open_time = tx_open_time = gethrtime();
402eda14cbcSMatt Macy
403eda14cbcSMatt Macy DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
404eda14cbcSMatt Macy DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
405eda14cbcSMatt Macy
406eda14cbcSMatt Macy /*
407eda14cbcSMatt Macy * Now that we've incremented tx_open_txg, we can let threads
408eda14cbcSMatt Macy * enter the next transaction group.
409eda14cbcSMatt Macy */
410eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++)
411eda14cbcSMatt Macy mutex_exit(&tx->tx_cpu[c].tc_open_lock);
412eda14cbcSMatt Macy
413eda14cbcSMatt Macy spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx_open_time);
414eda14cbcSMatt Macy spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);
415eda14cbcSMatt Macy
416eda14cbcSMatt Macy /*
417184c1b94SMartin Matuska * Quiesce the transaction group by waiting for everyone to
418184c1b94SMartin Matuska * call txg_rele_to_sync() for their open transaction handles.
419eda14cbcSMatt Macy */
420eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
421eda14cbcSMatt Macy tx_cpu_t *tc = &tx->tx_cpu[c];
422eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
423eda14cbcSMatt Macy while (tc->tc_count[g] != 0)
424eda14cbcSMatt Macy cv_wait(&tc->tc_cv[g], &tc->tc_lock);
425eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
426eda14cbcSMatt Macy }
427eda14cbcSMatt Macy
428eda14cbcSMatt Macy spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_QUIESCED, gethrtime());
429eda14cbcSMatt Macy }
430eda14cbcSMatt Macy
431eda14cbcSMatt Macy static void
txg_do_callbacks(void * cb_list)432bb2d13b6SMartin Matuska txg_do_callbacks(void *cb_list)
433eda14cbcSMatt Macy {
434eda14cbcSMatt Macy dmu_tx_do_callbacks(cb_list, 0);
435eda14cbcSMatt Macy
436eda14cbcSMatt Macy list_destroy(cb_list);
437eda14cbcSMatt Macy
438eda14cbcSMatt Macy kmem_free(cb_list, sizeof (list_t));
439eda14cbcSMatt Macy }
440eda14cbcSMatt Macy
441eda14cbcSMatt Macy /*
442eda14cbcSMatt Macy * Dispatch the commit callbacks registered on this txg to worker threads.
443eda14cbcSMatt Macy *
444eda14cbcSMatt Macy * If no callbacks are registered for a given TXG, nothing happens.
445eda14cbcSMatt Macy * This function creates a taskq for the associated pool, if needed.
446eda14cbcSMatt Macy */
447eda14cbcSMatt Macy static void
txg_dispatch_callbacks(dsl_pool_t * dp,uint64_t txg)448eda14cbcSMatt Macy txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
449eda14cbcSMatt Macy {
450eda14cbcSMatt Macy int c;
451eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
452eda14cbcSMatt Macy list_t *cb_list;
453eda14cbcSMatt Macy
454eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
455eda14cbcSMatt Macy tx_cpu_t *tc = &tx->tx_cpu[c];
456eda14cbcSMatt Macy /*
457eda14cbcSMatt Macy * No need to lock tx_cpu_t at this point, since this can
458eda14cbcSMatt Macy * only be called once a txg has been synced.
459eda14cbcSMatt Macy */
460eda14cbcSMatt Macy
461eda14cbcSMatt Macy int g = txg & TXG_MASK;
462eda14cbcSMatt Macy
463eda14cbcSMatt Macy if (list_is_empty(&tc->tc_callbacks[g]))
464eda14cbcSMatt Macy continue;
465eda14cbcSMatt Macy
466eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq == NULL) {
467eda14cbcSMatt Macy /*
468eda14cbcSMatt Macy * Commit callback taskq hasn't been created yet.
469eda14cbcSMatt Macy */
470eda14cbcSMatt Macy tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
4717877fdebSMatt Macy 100, defclsyspri, boot_ncpus, boot_ncpus * 2,
4727877fdebSMatt Macy TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
4737877fdebSMatt Macy TASKQ_THREADS_CPU_PCT);
474eda14cbcSMatt Macy }
475eda14cbcSMatt Macy
476eda14cbcSMatt Macy cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
477eda14cbcSMatt Macy list_create(cb_list, sizeof (dmu_tx_callback_t),
478eda14cbcSMatt Macy offsetof(dmu_tx_callback_t, dcb_node));
479eda14cbcSMatt Macy
480eda14cbcSMatt Macy list_move_tail(cb_list, &tc->tc_callbacks[g]);
481eda14cbcSMatt Macy
482bb2d13b6SMartin Matuska (void) taskq_dispatch(tx->tx_commit_cb_taskq,
483eda14cbcSMatt Macy txg_do_callbacks, cb_list, TQ_SLEEP);
484eda14cbcSMatt Macy }
485eda14cbcSMatt Macy }
486eda14cbcSMatt Macy
487eda14cbcSMatt Macy /*
488eda14cbcSMatt Macy * Wait for pending commit callbacks of already-synced transactions to finish
489eda14cbcSMatt Macy * processing.
490eda14cbcSMatt Macy * Calling this function from within a commit callback will deadlock.
491eda14cbcSMatt Macy */
492eda14cbcSMatt Macy void
txg_wait_callbacks(dsl_pool_t * dp)493eda14cbcSMatt Macy txg_wait_callbacks(dsl_pool_t *dp)
494eda14cbcSMatt Macy {
495eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
496eda14cbcSMatt Macy
497eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq != NULL)
498eda14cbcSMatt Macy taskq_wait_outstanding(tx->tx_commit_cb_taskq, 0);
499eda14cbcSMatt Macy }
500eda14cbcSMatt Macy
501eda14cbcSMatt Macy static boolean_t
txg_is_quiescing(dsl_pool_t * dp)502eda14cbcSMatt Macy txg_is_quiescing(dsl_pool_t *dp)
503eda14cbcSMatt Macy {
504eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
505eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
506eda14cbcSMatt Macy return (tx->tx_quiescing_txg != 0);
507eda14cbcSMatt Macy }
508eda14cbcSMatt Macy
509eda14cbcSMatt Macy static boolean_t
txg_has_quiesced_to_sync(dsl_pool_t * dp)510eda14cbcSMatt Macy txg_has_quiesced_to_sync(dsl_pool_t *dp)
511eda14cbcSMatt Macy {
512eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
513eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
514eda14cbcSMatt Macy return (tx->tx_quiesced_txg != 0);
515eda14cbcSMatt Macy }
516eda14cbcSMatt Macy
517da5137abSMartin Matuska static __attribute__((noreturn)) void
txg_sync_thread(void * arg)518eda14cbcSMatt Macy txg_sync_thread(void *arg)
519eda14cbcSMatt Macy {
520eda14cbcSMatt Macy dsl_pool_t *dp = arg;
521eda14cbcSMatt Macy spa_t *spa = dp->dp_spa;
522eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
523eda14cbcSMatt Macy callb_cpr_t cpr;
524eda14cbcSMatt Macy clock_t start, delta;
525eda14cbcSMatt Macy
526eda14cbcSMatt Macy (void) spl_fstrans_mark();
527eda14cbcSMatt Macy txg_thread_enter(tx, &cpr);
528eda14cbcSMatt Macy
529eda14cbcSMatt Macy start = delta = 0;
530eda14cbcSMatt Macy for (;;) {
531eda14cbcSMatt Macy clock_t timeout = zfs_txg_timeout * hz;
532eda14cbcSMatt Macy clock_t timer;
533eda14cbcSMatt Macy uint64_t txg;
534eda14cbcSMatt Macy
535eda14cbcSMatt Macy /*
536eda14cbcSMatt Macy * We sync when we're scanning, there's someone waiting
537eda14cbcSMatt Macy * on us, or the quiesce thread has handed off a txg to
538eda14cbcSMatt Macy * us, or we have reached our timeout.
539eda14cbcSMatt Macy */
540eda14cbcSMatt Macy timer = (delta >= timeout ? 0 : timeout - delta);
541eda14cbcSMatt Macy while (!dsl_scan_active(dp->dp_scan) &&
542eda14cbcSMatt Macy !tx->tx_exiting && timer > 0 &&
543eda14cbcSMatt Macy tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
5447cd22ac4SMartin Matuska !txg_has_quiesced_to_sync(dp)) {
545eda14cbcSMatt Macy dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
54633b8c039SMartin Matuska (u_longlong_t)tx->tx_synced_txg,
54733b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting, dp);
548eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
549eda14cbcSMatt Macy delta = ddi_get_lbolt() - start;
550eda14cbcSMatt Macy timer = (delta > timeout ? 0 : timeout - delta);
551eda14cbcSMatt Macy }
552eda14cbcSMatt Macy
553eda14cbcSMatt Macy /*
554*b985c9caSMartin Matuska * When we're suspended, nothing should be changing and for
555*b985c9caSMartin Matuska * MMP we don't want to bump anything that would make it
556*b985c9caSMartin Matuska * harder to detect if another host is changing it when
557*b985c9caSMartin Matuska * resuming after a MMP suspend.
558*b985c9caSMartin Matuska */
559*b985c9caSMartin Matuska if (spa_suspended(spa))
560*b985c9caSMartin Matuska continue;
561*b985c9caSMartin Matuska
562*b985c9caSMartin Matuska /*
563eda14cbcSMatt Macy * Wait until the quiesce thread hands off a txg to us,
564eda14cbcSMatt Macy * prompting it to do so if necessary.
565eda14cbcSMatt Macy */
566eda14cbcSMatt Macy while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
5677cd22ac4SMartin Matuska if (txg_is_quiescing(dp)) {
5687cd22ac4SMartin Matuska txg_thread_wait(tx, &cpr,
5697cd22ac4SMartin Matuska &tx->tx_quiesce_done_cv, 0);
5707cd22ac4SMartin Matuska continue;
5717cd22ac4SMartin Matuska }
572eda14cbcSMatt Macy if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
573eda14cbcSMatt Macy tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
574eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
575eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
576eda14cbcSMatt Macy }
577eda14cbcSMatt Macy
578eda14cbcSMatt Macy if (tx->tx_exiting)
579eda14cbcSMatt Macy txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
580eda14cbcSMatt Macy
581eda14cbcSMatt Macy /*
582eda14cbcSMatt Macy * Consume the quiesced txg which has been handed off to
583eda14cbcSMatt Macy * us. This may cause the quiescing thread to now be
584eda14cbcSMatt Macy * able to quiesce another txg, so we must signal it.
585eda14cbcSMatt Macy */
586eda14cbcSMatt Macy ASSERT(tx->tx_quiesced_txg != 0);
587eda14cbcSMatt Macy txg = tx->tx_quiesced_txg;
588eda14cbcSMatt Macy tx->tx_quiesced_txg = 0;
589eda14cbcSMatt Macy tx->tx_syncing_txg = txg;
590eda14cbcSMatt Macy DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
591eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
592eda14cbcSMatt Macy
593eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
59433b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
59533b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
596eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
597eda14cbcSMatt Macy
598eda14cbcSMatt Macy txg_stat_t *ts = spa_txg_history_init_io(spa, txg, dp);
599eda14cbcSMatt Macy start = ddi_get_lbolt();
600eda14cbcSMatt Macy spa_sync(spa, txg);
601eda14cbcSMatt Macy delta = ddi_get_lbolt() - start;
602eda14cbcSMatt Macy spa_txg_history_fini_io(spa, ts);
603eda14cbcSMatt Macy
604eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
605eda14cbcSMatt Macy tx->tx_synced_txg = txg;
606eda14cbcSMatt Macy tx->tx_syncing_txg = 0;
607eda14cbcSMatt Macy DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg);
608eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_done_cv);
609eda14cbcSMatt Macy
610eda14cbcSMatt Macy /*
611eda14cbcSMatt Macy * Dispatch commit callbacks to worker threads.
612eda14cbcSMatt Macy */
613eda14cbcSMatt Macy txg_dispatch_callbacks(dp, txg);
614eda14cbcSMatt Macy }
615eda14cbcSMatt Macy }
616eda14cbcSMatt Macy
617da5137abSMartin Matuska static __attribute__((noreturn)) void
txg_quiesce_thread(void * arg)618eda14cbcSMatt Macy txg_quiesce_thread(void *arg)
619eda14cbcSMatt Macy {
620eda14cbcSMatt Macy dsl_pool_t *dp = arg;
621eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
622eda14cbcSMatt Macy callb_cpr_t cpr;
623eda14cbcSMatt Macy
624eda14cbcSMatt Macy txg_thread_enter(tx, &cpr);
625eda14cbcSMatt Macy
626eda14cbcSMatt Macy for (;;) {
627eda14cbcSMatt Macy uint64_t txg;
628eda14cbcSMatt Macy
629eda14cbcSMatt Macy /*
630eda14cbcSMatt Macy * We quiesce when there's someone waiting on us.
631eda14cbcSMatt Macy * However, we can only have one txg in "quiescing" or
632eda14cbcSMatt Macy * "quiesced, waiting to sync" state. So we wait until
633eda14cbcSMatt Macy * the "quiesced, waiting to sync" txg has been consumed
634eda14cbcSMatt Macy * by the sync thread.
635eda14cbcSMatt Macy */
636eda14cbcSMatt Macy while (!tx->tx_exiting &&
637eda14cbcSMatt Macy (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting ||
638eda14cbcSMatt Macy txg_has_quiesced_to_sync(dp)))
639eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0);
640eda14cbcSMatt Macy
641eda14cbcSMatt Macy if (tx->tx_exiting)
642eda14cbcSMatt Macy txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread);
643eda14cbcSMatt Macy
644eda14cbcSMatt Macy txg = tx->tx_open_txg;
645eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
64633b8c039SMartin Matuska (u_longlong_t)txg,
64733b8c039SMartin Matuska (u_longlong_t)tx->tx_quiesce_txg_waiting,
64833b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
649eda14cbcSMatt Macy tx->tx_quiescing_txg = txg;
650eda14cbcSMatt Macy
651eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
652eda14cbcSMatt Macy txg_quiesce(dp, txg);
653eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
654eda14cbcSMatt Macy
655eda14cbcSMatt Macy /*
656eda14cbcSMatt Macy * Hand this txg off to the sync thread.
657eda14cbcSMatt Macy */
65833b8c039SMartin Matuska dprintf("quiesce done, handing off txg %llu\n",
65933b8c039SMartin Matuska (u_longlong_t)txg);
660eda14cbcSMatt Macy tx->tx_quiescing_txg = 0;
661eda14cbcSMatt Macy tx->tx_quiesced_txg = txg;
662eda14cbcSMatt Macy DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg);
663eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
664eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_done_cv);
665eda14cbcSMatt Macy }
666eda14cbcSMatt Macy }
667eda14cbcSMatt Macy
668eda14cbcSMatt Macy /*
669eda14cbcSMatt Macy * Delay this thread by delay nanoseconds if we are still in the open
670eda14cbcSMatt Macy * transaction group and there is already a waiting txg quiescing or quiesced.
671eda14cbcSMatt Macy * Abort the delay if this txg stalls or enters the quiescing state.
672eda14cbcSMatt Macy */
673eda14cbcSMatt Macy void
txg_delay(dsl_pool_t * dp,uint64_t txg,hrtime_t delay,hrtime_t resolution)674eda14cbcSMatt Macy txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
675eda14cbcSMatt Macy {
676eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
677eda14cbcSMatt Macy hrtime_t start = gethrtime();
678eda14cbcSMatt Macy
679eda14cbcSMatt Macy /* don't delay if this txg could transition to quiescing immediately */
680eda14cbcSMatt Macy if (tx->tx_open_txg > txg ||
681eda14cbcSMatt Macy tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1)
682eda14cbcSMatt Macy return;
683eda14cbcSMatt Macy
684eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
685eda14cbcSMatt Macy if (tx->tx_open_txg > txg || tx->tx_synced_txg == txg-1) {
686eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
687eda14cbcSMatt Macy return;
688eda14cbcSMatt Macy }
689eda14cbcSMatt Macy
690eda14cbcSMatt Macy while (gethrtime() - start < delay &&
691eda14cbcSMatt Macy tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) {
692eda14cbcSMatt Macy (void) cv_timedwait_hires(&tx->tx_quiesce_more_cv,
693eda14cbcSMatt Macy &tx->tx_sync_lock, delay, resolution, 0);
694eda14cbcSMatt Macy }
695eda14cbcSMatt Macy
696eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_delay);
697eda14cbcSMatt Macy
698eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
699eda14cbcSMatt Macy }
700eda14cbcSMatt Macy
701eda14cbcSMatt Macy static boolean_t
txg_wait_synced_impl(dsl_pool_t * dp,uint64_t txg,boolean_t wait_sig)702eda14cbcSMatt Macy txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig)
703eda14cbcSMatt Macy {
704eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
705eda14cbcSMatt Macy
706eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
707eda14cbcSMatt Macy
708eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
709eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
710eda14cbcSMatt Macy if (txg == 0)
711eda14cbcSMatt Macy txg = tx->tx_open_txg + TXG_DEFER_SIZE;
712eda14cbcSMatt Macy if (tx->tx_sync_txg_waiting < txg)
713eda14cbcSMatt Macy tx->tx_sync_txg_waiting = txg;
714eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
71533b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
71633b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
717eda14cbcSMatt Macy while (tx->tx_synced_txg < txg) {
718eda14cbcSMatt Macy dprintf("broadcasting sync more "
719eda14cbcSMatt Macy "tx_synced=%llu waiting=%llu dp=%px\n",
72033b8c039SMartin Matuska (u_longlong_t)tx->tx_synced_txg,
72133b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting, dp);
722eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
723eda14cbcSMatt Macy if (wait_sig) {
724eda14cbcSMatt Macy /*
725eda14cbcSMatt Macy * Condition wait here but stop if the thread receives a
726eda14cbcSMatt Macy * signal. The caller may call txg_wait_synced*() again
727eda14cbcSMatt Macy * to resume waiting for this txg.
728eda14cbcSMatt Macy */
729eda14cbcSMatt Macy if (cv_wait_io_sig(&tx->tx_sync_done_cv,
730eda14cbcSMatt Macy &tx->tx_sync_lock) == 0) {
731eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
732eda14cbcSMatt Macy return (B_TRUE);
733eda14cbcSMatt Macy }
734eda14cbcSMatt Macy } else {
735eda14cbcSMatt Macy cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
736eda14cbcSMatt Macy }
737eda14cbcSMatt Macy }
738eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
739eda14cbcSMatt Macy return (B_FALSE);
740eda14cbcSMatt Macy }
741eda14cbcSMatt Macy
742eda14cbcSMatt Macy void
txg_wait_synced(dsl_pool_t * dp,uint64_t txg)743eda14cbcSMatt Macy txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
744eda14cbcSMatt Macy {
745eda14cbcSMatt Macy VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE));
746eda14cbcSMatt Macy }
747eda14cbcSMatt Macy
748eda14cbcSMatt Macy /*
749eda14cbcSMatt Macy * Similar to a txg_wait_synced but it can be interrupted from a signal.
750eda14cbcSMatt Macy * Returns B_TRUE if the thread was signaled while waiting.
751eda14cbcSMatt Macy */
752eda14cbcSMatt Macy boolean_t
txg_wait_synced_sig(dsl_pool_t * dp,uint64_t txg)753eda14cbcSMatt Macy txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg)
754eda14cbcSMatt Macy {
755eda14cbcSMatt Macy return (txg_wait_synced_impl(dp, txg, B_TRUE));
756eda14cbcSMatt Macy }
757eda14cbcSMatt Macy
758eda14cbcSMatt Macy /*
759eda14cbcSMatt Macy * Wait for the specified open transaction group. Set should_quiesce
760eda14cbcSMatt Macy * when the current open txg should be quiesced immediately.
761eda14cbcSMatt Macy */
762eda14cbcSMatt Macy void
txg_wait_open(dsl_pool_t * dp,uint64_t txg,boolean_t should_quiesce)763eda14cbcSMatt Macy txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
764eda14cbcSMatt Macy {
765eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
766eda14cbcSMatt Macy
767eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
768eda14cbcSMatt Macy
769eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
770eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
771eda14cbcSMatt Macy if (txg == 0)
772eda14cbcSMatt Macy txg = tx->tx_open_txg + 1;
773eda14cbcSMatt Macy if (tx->tx_quiesce_txg_waiting < txg && should_quiesce)
774eda14cbcSMatt Macy tx->tx_quiesce_txg_waiting = txg;
775eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
77633b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
77733b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
778eda14cbcSMatt Macy while (tx->tx_open_txg < txg) {
779eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
780eda14cbcSMatt Macy /*
781eda14cbcSMatt Macy * Callers setting should_quiesce will use cv_wait_io() and
782eda14cbcSMatt Macy * be accounted for as iowait time. Otherwise, the caller is
783eda14cbcSMatt Macy * understood to be idle and cv_wait_sig() is used to prevent
784eda14cbcSMatt Macy * incorrectly inflating the system load average.
785eda14cbcSMatt Macy */
786eda14cbcSMatt Macy if (should_quiesce == B_TRUE) {
787eda14cbcSMatt Macy cv_wait_io(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
788eda14cbcSMatt Macy } else {
7892c48331dSMatt Macy cv_wait_idle(&tx->tx_quiesce_done_cv,
7902c48331dSMatt Macy &tx->tx_sync_lock);
791eda14cbcSMatt Macy }
792eda14cbcSMatt Macy }
793eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
794eda14cbcSMatt Macy }
795eda14cbcSMatt Macy
796eda14cbcSMatt Macy /*
7977cd22ac4SMartin Matuska * Pass in the txg number that should be synced.
798eda14cbcSMatt Macy */
799eda14cbcSMatt Macy void
txg_kick(dsl_pool_t * dp,uint64_t txg)8007cd22ac4SMartin Matuska txg_kick(dsl_pool_t *dp, uint64_t txg)
801eda14cbcSMatt Macy {
802eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
803eda14cbcSMatt Macy
804eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
805eda14cbcSMatt Macy
8067cd22ac4SMartin Matuska if (tx->tx_sync_txg_waiting >= txg)
8077cd22ac4SMartin Matuska return;
8087cd22ac4SMartin Matuska
809eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
8107cd22ac4SMartin Matuska if (tx->tx_sync_txg_waiting < txg) {
8117cd22ac4SMartin Matuska tx->tx_sync_txg_waiting = txg;
8127cd22ac4SMartin Matuska cv_broadcast(&tx->tx_sync_more_cv);
813eda14cbcSMatt Macy }
814eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
815eda14cbcSMatt Macy }
816eda14cbcSMatt Macy
817eda14cbcSMatt Macy boolean_t
txg_stalled(dsl_pool_t * dp)818eda14cbcSMatt Macy txg_stalled(dsl_pool_t *dp)
819eda14cbcSMatt Macy {
820eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
821eda14cbcSMatt Macy return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg);
822eda14cbcSMatt Macy }
823eda14cbcSMatt Macy
824eda14cbcSMatt Macy boolean_t
txg_sync_waiting(dsl_pool_t * dp)825eda14cbcSMatt Macy txg_sync_waiting(dsl_pool_t *dp)
826eda14cbcSMatt Macy {
827eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
828eda14cbcSMatt Macy
829eda14cbcSMatt Macy return (tx->tx_syncing_txg <= tx->tx_sync_txg_waiting ||
830eda14cbcSMatt Macy tx->tx_quiesced_txg != 0);
831eda14cbcSMatt Macy }
832eda14cbcSMatt Macy
833eda14cbcSMatt Macy /*
834eda14cbcSMatt Macy * Verify that this txg is active (open, quiescing, syncing). Non-active
835eda14cbcSMatt Macy * txg's should not be manipulated.
836eda14cbcSMatt Macy */
837eda14cbcSMatt Macy #ifdef ZFS_DEBUG
838eda14cbcSMatt Macy void
txg_verify(spa_t * spa,uint64_t txg)839eda14cbcSMatt Macy txg_verify(spa_t *spa, uint64_t txg)
840eda14cbcSMatt Macy {
841eda14cbcSMatt Macy dsl_pool_t *dp __maybe_unused = spa_get_dsl(spa);
842eda14cbcSMatt Macy if (txg <= TXG_INITIAL || txg == ZILTEST_TXG)
843eda14cbcSMatt Macy return;
844eda14cbcSMatt Macy ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
845eda14cbcSMatt Macy ASSERT3U(txg, >=, dp->dp_tx.tx_synced_txg);
846eda14cbcSMatt Macy ASSERT3U(txg, >=, dp->dp_tx.tx_open_txg - TXG_CONCURRENT_STATES);
847eda14cbcSMatt Macy }
848eda14cbcSMatt Macy #endif
849eda14cbcSMatt Macy
850eda14cbcSMatt Macy /*
851eda14cbcSMatt Macy * Per-txg object lists.
852eda14cbcSMatt Macy */
853eda14cbcSMatt Macy void
txg_list_create(txg_list_t * tl,spa_t * spa,size_t offset)854eda14cbcSMatt Macy txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset)
855eda14cbcSMatt Macy {
856eda14cbcSMatt Macy int t;
857eda14cbcSMatt Macy
858eda14cbcSMatt Macy mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
859eda14cbcSMatt Macy
860eda14cbcSMatt Macy tl->tl_offset = offset;
861eda14cbcSMatt Macy tl->tl_spa = spa;
862eda14cbcSMatt Macy
863eda14cbcSMatt Macy for (t = 0; t < TXG_SIZE; t++)
864eda14cbcSMatt Macy tl->tl_head[t] = NULL;
865eda14cbcSMatt Macy }
866eda14cbcSMatt Macy
867eda14cbcSMatt Macy static boolean_t
txg_list_empty_impl(txg_list_t * tl,uint64_t txg)868eda14cbcSMatt Macy txg_list_empty_impl(txg_list_t *tl, uint64_t txg)
869eda14cbcSMatt Macy {
870eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tl->tl_lock));
871eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
872eda14cbcSMatt Macy return (tl->tl_head[txg & TXG_MASK] == NULL);
873eda14cbcSMatt Macy }
874eda14cbcSMatt Macy
875eda14cbcSMatt Macy boolean_t
txg_list_empty(txg_list_t * tl,uint64_t txg)876eda14cbcSMatt Macy txg_list_empty(txg_list_t *tl, uint64_t txg)
877eda14cbcSMatt Macy {
878eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
879eda14cbcSMatt Macy boolean_t ret = txg_list_empty_impl(tl, txg);
880eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
881eda14cbcSMatt Macy
882eda14cbcSMatt Macy return (ret);
883eda14cbcSMatt Macy }
884eda14cbcSMatt Macy
885eda14cbcSMatt Macy void
txg_list_destroy(txg_list_t * tl)886eda14cbcSMatt Macy txg_list_destroy(txg_list_t *tl)
887eda14cbcSMatt Macy {
888eda14cbcSMatt Macy int t;
889eda14cbcSMatt Macy
890eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
891eda14cbcSMatt Macy for (t = 0; t < TXG_SIZE; t++)
892eda14cbcSMatt Macy ASSERT(txg_list_empty_impl(tl, t));
893eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
894eda14cbcSMatt Macy
895eda14cbcSMatt Macy mutex_destroy(&tl->tl_lock);
896eda14cbcSMatt Macy }
897eda14cbcSMatt Macy
898eda14cbcSMatt Macy /*
899eda14cbcSMatt Macy * Returns true if all txg lists are empty.
900eda14cbcSMatt Macy *
901eda14cbcSMatt Macy * Warning: this is inherently racy (an item could be added immediately
902eda14cbcSMatt Macy * after this function returns).
903eda14cbcSMatt Macy */
904eda14cbcSMatt Macy boolean_t
txg_all_lists_empty(txg_list_t * tl)905eda14cbcSMatt Macy txg_all_lists_empty(txg_list_t *tl)
906eda14cbcSMatt Macy {
9077b5e6873SMartin Matuska boolean_t res = B_TRUE;
9087b5e6873SMartin Matuska for (int i = 0; i < TXG_SIZE; i++)
9097b5e6873SMartin Matuska res &= (tl->tl_head[i] == NULL);
9107b5e6873SMartin Matuska return (res);
911eda14cbcSMatt Macy }
912eda14cbcSMatt Macy
913eda14cbcSMatt Macy /*
914eda14cbcSMatt Macy * Add an entry to the list (unless it's already on the list).
915eda14cbcSMatt Macy * Returns B_TRUE if it was actually added.
916eda14cbcSMatt Macy */
917eda14cbcSMatt Macy boolean_t
txg_list_add(txg_list_t * tl,void * p,uint64_t txg)918eda14cbcSMatt Macy txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
919eda14cbcSMatt Macy {
920eda14cbcSMatt Macy int t = txg & TXG_MASK;
921eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
922eda14cbcSMatt Macy boolean_t add;
923eda14cbcSMatt Macy
924eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
925eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
926eda14cbcSMatt Macy add = (tn->tn_member[t] == 0);
927eda14cbcSMatt Macy if (add) {
928eda14cbcSMatt Macy tn->tn_member[t] = 1;
929eda14cbcSMatt Macy tn->tn_next[t] = tl->tl_head[t];
930eda14cbcSMatt Macy tl->tl_head[t] = tn;
931eda14cbcSMatt Macy }
932eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
933eda14cbcSMatt Macy
934eda14cbcSMatt Macy return (add);
935eda14cbcSMatt Macy }
936eda14cbcSMatt Macy
937eda14cbcSMatt Macy /*
938eda14cbcSMatt Macy * Add an entry to the end of the list, unless it's already on the list.
939eda14cbcSMatt Macy * (walks list to find end)
940eda14cbcSMatt Macy * Returns B_TRUE if it was actually added.
941eda14cbcSMatt Macy */
942eda14cbcSMatt Macy boolean_t
txg_list_add_tail(txg_list_t * tl,void * p,uint64_t txg)943eda14cbcSMatt Macy txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
944eda14cbcSMatt Macy {
945eda14cbcSMatt Macy int t = txg & TXG_MASK;
946eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
947eda14cbcSMatt Macy boolean_t add;
948eda14cbcSMatt Macy
949eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
950eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
951eda14cbcSMatt Macy add = (tn->tn_member[t] == 0);
952eda14cbcSMatt Macy if (add) {
953eda14cbcSMatt Macy txg_node_t **tp;
954eda14cbcSMatt Macy
955eda14cbcSMatt Macy for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
956eda14cbcSMatt Macy continue;
957eda14cbcSMatt Macy
958eda14cbcSMatt Macy tn->tn_member[t] = 1;
959eda14cbcSMatt Macy tn->tn_next[t] = NULL;
960eda14cbcSMatt Macy *tp = tn;
961eda14cbcSMatt Macy }
962eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
963eda14cbcSMatt Macy
964eda14cbcSMatt Macy return (add);
965eda14cbcSMatt Macy }
966eda14cbcSMatt Macy
967eda14cbcSMatt Macy /*
968eda14cbcSMatt Macy * Remove the head of the list and return it.
969eda14cbcSMatt Macy */
970eda14cbcSMatt Macy void *
txg_list_remove(txg_list_t * tl,uint64_t txg)971eda14cbcSMatt Macy txg_list_remove(txg_list_t *tl, uint64_t txg)
972eda14cbcSMatt Macy {
973eda14cbcSMatt Macy int t = txg & TXG_MASK;
974eda14cbcSMatt Macy txg_node_t *tn;
975eda14cbcSMatt Macy void *p = NULL;
976eda14cbcSMatt Macy
977eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
978eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
979eda14cbcSMatt Macy if ((tn = tl->tl_head[t]) != NULL) {
980eda14cbcSMatt Macy ASSERT(tn->tn_member[t]);
981eda14cbcSMatt Macy ASSERT(tn->tn_next[t] == NULL || tn->tn_next[t]->tn_member[t]);
982eda14cbcSMatt Macy p = (char *)tn - tl->tl_offset;
983eda14cbcSMatt Macy tl->tl_head[t] = tn->tn_next[t];
984eda14cbcSMatt Macy tn->tn_next[t] = NULL;
985eda14cbcSMatt Macy tn->tn_member[t] = 0;
986eda14cbcSMatt Macy }
987eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
988eda14cbcSMatt Macy
989eda14cbcSMatt Macy return (p);
990eda14cbcSMatt Macy }
991eda14cbcSMatt Macy
992eda14cbcSMatt Macy /*
993eda14cbcSMatt Macy * Remove a specific item from the list and return it.
994eda14cbcSMatt Macy */
995eda14cbcSMatt Macy void *
txg_list_remove_this(txg_list_t * tl,void * p,uint64_t txg)996eda14cbcSMatt Macy txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
997eda14cbcSMatt Macy {
998eda14cbcSMatt Macy int t = txg & TXG_MASK;
999eda14cbcSMatt Macy txg_node_t *tn, **tp;
1000eda14cbcSMatt Macy
1001eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1002eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1003eda14cbcSMatt Macy
1004eda14cbcSMatt Macy for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
1005eda14cbcSMatt Macy if ((char *)tn - tl->tl_offset == p) {
1006eda14cbcSMatt Macy *tp = tn->tn_next[t];
1007eda14cbcSMatt Macy tn->tn_next[t] = NULL;
1008eda14cbcSMatt Macy tn->tn_member[t] = 0;
1009eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1010eda14cbcSMatt Macy return (p);
1011eda14cbcSMatt Macy }
1012eda14cbcSMatt Macy }
1013eda14cbcSMatt Macy
1014eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1015eda14cbcSMatt Macy
1016eda14cbcSMatt Macy return (NULL);
1017eda14cbcSMatt Macy }
1018eda14cbcSMatt Macy
1019eda14cbcSMatt Macy boolean_t
txg_list_member(txg_list_t * tl,void * p,uint64_t txg)1020eda14cbcSMatt Macy txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
1021eda14cbcSMatt Macy {
1022eda14cbcSMatt Macy int t = txg & TXG_MASK;
1023eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
1024eda14cbcSMatt Macy
1025eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1026eda14cbcSMatt Macy return (tn->tn_member[t] != 0);
1027eda14cbcSMatt Macy }
1028eda14cbcSMatt Macy
1029eda14cbcSMatt Macy /*
1030eda14cbcSMatt Macy * Walk a txg list
1031eda14cbcSMatt Macy */
1032eda14cbcSMatt Macy void *
txg_list_head(txg_list_t * tl,uint64_t txg)1033eda14cbcSMatt Macy txg_list_head(txg_list_t *tl, uint64_t txg)
1034eda14cbcSMatt Macy {
1035eda14cbcSMatt Macy int t = txg & TXG_MASK;
1036eda14cbcSMatt Macy txg_node_t *tn;
1037eda14cbcSMatt Macy
1038eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1039eda14cbcSMatt Macy tn = tl->tl_head[t];
1040eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1041eda14cbcSMatt Macy
1042eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1043eda14cbcSMatt Macy return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
1044eda14cbcSMatt Macy }
1045eda14cbcSMatt Macy
1046eda14cbcSMatt Macy void *
txg_list_next(txg_list_t * tl,void * p,uint64_t txg)1047eda14cbcSMatt Macy txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
1048eda14cbcSMatt Macy {
1049eda14cbcSMatt Macy int t = txg & TXG_MASK;
1050eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
1051eda14cbcSMatt Macy
1052eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1053eda14cbcSMatt Macy
1054eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1055eda14cbcSMatt Macy tn = tn->tn_next[t];
1056eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1057eda14cbcSMatt Macy
1058eda14cbcSMatt Macy return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
1059eda14cbcSMatt Macy }
1060eda14cbcSMatt Macy
1061eda14cbcSMatt Macy EXPORT_SYMBOL(txg_init);
1062eda14cbcSMatt Macy EXPORT_SYMBOL(txg_fini);
1063eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_start);
1064eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_stop);
1065eda14cbcSMatt Macy EXPORT_SYMBOL(txg_hold_open);
1066eda14cbcSMatt Macy EXPORT_SYMBOL(txg_rele_to_quiesce);
1067eda14cbcSMatt Macy EXPORT_SYMBOL(txg_rele_to_sync);
1068eda14cbcSMatt Macy EXPORT_SYMBOL(txg_register_callbacks);
1069eda14cbcSMatt Macy EXPORT_SYMBOL(txg_delay);
1070eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_synced);
1071eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_open);
1072eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_callbacks);
1073eda14cbcSMatt Macy EXPORT_SYMBOL(txg_stalled);
1074eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_waiting);
1075eda14cbcSMatt Macy
1076be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, timeout, UINT, ZMOD_RW,
1077eda14cbcSMatt Macy "Max seconds worth of delta per txg");
1078