xref: /netbsd-src/external/cddl/osnet/dist/uts/common/fs/zfs/trim_map.c (revision ba2539a9805a0544ff82c0003cc02fe1eee5603d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
23  * All rights reserved.
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/spa_impl.h>
28 #include <sys/vdev_impl.h>
29 #include <sys/trim_map.h>
30 #include <sys/time.h>
31 
32 /*
33  * Calculate the zio end, upgrading based on ashift which would be
34  * done by zio_vdev_io_start.
35  *
36  * This makes free range consolidation much more effective
37  * than it would otherwise be as well as ensuring that entire
38  * blocks are invalidated by writes.
39  */
40 #define	TRIM_ZIO_END(vd, offset, size)	(offset +		\
41  	P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
42 
43 /* Maximal segment size for ATA TRIM. */
44 #define TRIM_MAP_SIZE_FACTOR	(512 << 16)
45 
46 #define TRIM_MAP_SEGS(size)	(1 + (size) / TRIM_MAP_SIZE_FACTOR)
47 
48 #define TRIM_MAP_ADD(tm, ts)	do {				\
49 	list_insert_tail(&(tm)->tm_head, (ts));			\
50 	(tm)->tm_pending += TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
51 } while (0)
52 
53 #define TRIM_MAP_REM(tm, ts)	do {				\
54 	list_remove(&(tm)->tm_head, (ts));			\
55 	(tm)->tm_pending -= TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
56 } while (0)
57 
58 typedef struct trim_map {
59 	list_t		tm_head;		/* List of segments sorted by txg. */
60 	avl_tree_t	tm_queued_frees;	/* AVL tree of segments waiting for TRIM. */
61 	avl_tree_t	tm_inflight_frees;	/* AVL tree of in-flight TRIMs. */
62 	avl_tree_t	tm_inflight_writes;	/* AVL tree of in-flight writes. */
63 	list_t		tm_pending_writes;	/* Writes blocked on in-flight frees. */
64 	kmutex_t	tm_lock;
65 	uint64_t	tm_pending;		/* Count of pending TRIMs. */
66 } trim_map_t;
67 
68 typedef struct trim_seg {
69 	avl_node_t	ts_node;	/* AVL node. */
70 	list_node_t	ts_next;	/* List element. */
71 	uint64_t	ts_start;	/* Starting offset of this segment. */
72 	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
73 	uint64_t	ts_txg;		/* Segment creation txg. */
74 	hrtime_t	ts_time;	/* Segment creation time. */
75 } trim_seg_t;
76 
77 extern boolean_t zfs_trim_enabled;
78 
79 static u_int trim_txg_delay = 32;	/* Keep deleted data up to 32 TXG */
80 static u_int trim_timeout = 30;		/* Keep deleted data up to 30s */
81 static u_int trim_max_interval = 1;	/* 1s delays between TRIMs */
82 static u_int trim_vdev_max_pending = 10000; /* Keep up to 10K segments */
83 
84 SYSCTL_DECL(_vfs_zfs);
85 SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM");
86 
87 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay,
88     0, "Delay TRIMs by up to this many TXGs");
89 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0,
90     "Delay TRIMs by up to this many seconds");
91 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN,
92     &trim_max_interval, 0,
93     "Maximum interval between TRIM queue processing (seconds)");
94 
95 SYSCTL_DECL(_vfs_zfs_vdev);
96 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN,
97     &trim_vdev_max_pending, 0,
98     "Maximum pending TRIM segments for a vdev");
99 
100 static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
101 
102 static int
trim_map_seg_compare(const void * x1,const void * x2)103 trim_map_seg_compare(const void *x1, const void *x2)
104 {
105 	const trim_seg_t *s1 = x1;
106 	const trim_seg_t *s2 = x2;
107 
108 	if (s1->ts_start < s2->ts_start) {
109 		if (s1->ts_end > s2->ts_start)
110 			return (0);
111 		return (-1);
112 	}
113 	if (s1->ts_start > s2->ts_start) {
114 		if (s1->ts_start < s2->ts_end)
115 			return (0);
116 		return (1);
117 	}
118 	return (0);
119 }
120 
121 static int
trim_map_zio_compare(const void * x1,const void * x2)122 trim_map_zio_compare(const void *x1, const void *x2)
123 {
124 	const zio_t *z1 = x1;
125 	const zio_t *z2 = x2;
126 
127 	if (z1->io_offset < z2->io_offset) {
128 		if (z1->io_offset + z1->io_size > z2->io_offset)
129 			return (0);
130 		return (-1);
131 	}
132 	if (z1->io_offset > z2->io_offset) {
133 		if (z1->io_offset < z2->io_offset + z2->io_size)
134 			return (0);
135 		return (1);
136 	}
137 	return (0);
138 }
139 
140 void
trim_map_create(vdev_t * vd)141 trim_map_create(vdev_t *vd)
142 {
143 	trim_map_t *tm;
144 
145 	ASSERT(zfs_trim_enabled && !vd->vdev_notrim &&
146 		vd->vdev_ops->vdev_op_leaf);
147 
148 	tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
149 	mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL);
150 	list_create(&tm->tm_head, sizeof (trim_seg_t),
151 	    offsetof(trim_seg_t, ts_next));
152 	list_create(&tm->tm_pending_writes, sizeof (zio_t),
153 	    offsetof(zio_t, io_trim_link));
154 	avl_create(&tm->tm_queued_frees, trim_map_seg_compare,
155 	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
156 	avl_create(&tm->tm_inflight_frees, trim_map_seg_compare,
157 	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
158 	avl_create(&tm->tm_inflight_writes, trim_map_zio_compare,
159 	    sizeof (zio_t), offsetof(zio_t, io_trim_node));
160 	vd->vdev_trimmap = tm;
161 }
162 
163 void
trim_map_destroy(vdev_t * vd)164 trim_map_destroy(vdev_t *vd)
165 {
166 	trim_map_t *tm;
167 	trim_seg_t *ts;
168 
169 	ASSERT(vd->vdev_ops->vdev_op_leaf);
170 
171 	if (!zfs_trim_enabled)
172 		return;
173 
174 	tm = vd->vdev_trimmap;
175 	if (tm == NULL)
176 		return;
177 
178 	/*
179 	 * We may have been called before trim_map_vdev_commit_done()
180 	 * had a chance to run, so do it now to prune the remaining
181 	 * inflight frees.
182 	 */
183 	trim_map_vdev_commit_done(vd->vdev_spa, vd);
184 
185 	mutex_enter(&tm->tm_lock);
186 	while ((ts = list_head(&tm->tm_head)) != NULL) {
187 		avl_remove(&tm->tm_queued_frees, ts);
188 		TRIM_MAP_REM(tm, ts);
189 		kmem_free(ts, sizeof (*ts));
190 	}
191 	mutex_exit(&tm->tm_lock);
192 
193 	avl_destroy(&tm->tm_queued_frees);
194 	avl_destroy(&tm->tm_inflight_frees);
195 	avl_destroy(&tm->tm_inflight_writes);
196 	list_destroy(&tm->tm_pending_writes);
197 	list_destroy(&tm->tm_head);
198 	mutex_destroy(&tm->tm_lock);
199 	kmem_free(tm, sizeof (*tm));
200 	vd->vdev_trimmap = NULL;
201 }
202 
203 static void
trim_map_segment_add(trim_map_t * tm,uint64_t start,uint64_t end,uint64_t txg)204 trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
205 {
206 	avl_index_t where;
207 	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
208 	boolean_t merge_before, merge_after;
209 	hrtime_t time;
210 
211 	ASSERT(MUTEX_HELD(&tm->tm_lock));
212 	VERIFY(start < end);
213 
214 	time = gethrtime();
215 	tsearch.ts_start = start;
216 	tsearch.ts_end = end;
217 
218 	ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
219 	if (ts != NULL) {
220 		if (start < ts->ts_start)
221 			trim_map_segment_add(tm, start, ts->ts_start, txg);
222 		if (end > ts->ts_end)
223 			trim_map_segment_add(tm, ts->ts_end, end, txg);
224 		return;
225 	}
226 
227 	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
228 	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
229 
230 	merge_before = (ts_before != NULL && ts_before->ts_end == start);
231 	merge_after = (ts_after != NULL && ts_after->ts_start == end);
232 
233 	if (merge_before && merge_after) {
234 		avl_remove(&tm->tm_queued_frees, ts_before);
235 		TRIM_MAP_REM(tm, ts_before);
236 		TRIM_MAP_REM(tm, ts_after);
237 		ts_after->ts_start = ts_before->ts_start;
238 		ts_after->ts_txg = txg;
239 		ts_after->ts_time = time;
240 		TRIM_MAP_ADD(tm, ts_after);
241 		kmem_free(ts_before, sizeof (*ts_before));
242 	} else if (merge_before) {
243 		TRIM_MAP_REM(tm, ts_before);
244 		ts_before->ts_end = end;
245 		ts_before->ts_txg = txg;
246 		ts_before->ts_time = time;
247 		TRIM_MAP_ADD(tm, ts_before);
248 	} else if (merge_after) {
249 		TRIM_MAP_REM(tm, ts_after);
250 		ts_after->ts_start = start;
251 		ts_after->ts_txg = txg;
252 		ts_after->ts_time = time;
253 		TRIM_MAP_ADD(tm, ts_after);
254 	} else {
255 		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
256 		ts->ts_start = start;
257 		ts->ts_end = end;
258 		ts->ts_txg = txg;
259 		ts->ts_time = time;
260 		avl_insert(&tm->tm_queued_frees, ts, where);
261 		TRIM_MAP_ADD(tm, ts);
262 	}
263 }
264 
265 static void
trim_map_segment_remove(trim_map_t * tm,trim_seg_t * ts,uint64_t start,uint64_t end)266 trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
267     uint64_t end)
268 {
269 	trim_seg_t *nts;
270 	boolean_t left_over, right_over;
271 
272 	ASSERT(MUTEX_HELD(&tm->tm_lock));
273 
274 	left_over = (ts->ts_start < start);
275 	right_over = (ts->ts_end > end);
276 
277 	TRIM_MAP_REM(tm, ts);
278 	if (left_over && right_over) {
279 		nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
280 		nts->ts_start = end;
281 		nts->ts_end = ts->ts_end;
282 		nts->ts_txg = ts->ts_txg;
283 		nts->ts_time = ts->ts_time;
284 		ts->ts_end = start;
285 		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
286 		TRIM_MAP_ADD(tm, ts);
287 		TRIM_MAP_ADD(tm, nts);
288 	} else if (left_over) {
289 		ts->ts_end = start;
290 		TRIM_MAP_ADD(tm, ts);
291 	} else if (right_over) {
292 		ts->ts_start = end;
293 		TRIM_MAP_ADD(tm, ts);
294 	} else {
295 		avl_remove(&tm->tm_queued_frees, ts);
296 		kmem_free(ts, sizeof (*ts));
297 	}
298 }
299 
300 static void
trim_map_free_locked(trim_map_t * tm,uint64_t start,uint64_t end,uint64_t txg)301 trim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
302 {
303 	zio_t zsearch, *zs;
304 
305 	ASSERT(MUTEX_HELD(&tm->tm_lock));
306 
307 	zsearch.io_offset = start;
308 	zsearch.io_size = end - start;
309 
310 	zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL);
311 	if (zs == NULL) {
312 		trim_map_segment_add(tm, start, end, txg);
313 		return;
314 	}
315 	if (start < zs->io_offset)
316 		trim_map_free_locked(tm, start, zs->io_offset, txg);
317 	if (zs->io_offset + zs->io_size < end)
318 		trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg);
319 }
320 
321 void
trim_map_free(vdev_t * vd,uint64_t offset,uint64_t size,uint64_t txg)322 trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg)
323 {
324 	trim_map_t *tm = vd->vdev_trimmap;
325 
326 	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
327 		return;
328 
329 	mutex_enter(&tm->tm_lock);
330 	trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg);
331 	mutex_exit(&tm->tm_lock);
332 }
333 
334 boolean_t
trim_map_write_start(zio_t * zio)335 trim_map_write_start(zio_t *zio)
336 {
337 	vdev_t *vd = zio->io_vd;
338 	trim_map_t *tm = vd->vdev_trimmap;
339 	trim_seg_t tsearch, *ts;
340 	boolean_t left_over, right_over;
341 	uint64_t start, end;
342 
343 	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
344 		return (B_TRUE);
345 
346 	start = zio->io_offset;
347 	end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
348 	tsearch.ts_start = start;
349 	tsearch.ts_end = end;
350 
351 	mutex_enter(&tm->tm_lock);
352 
353 	/*
354 	 * Checking for colliding in-flight frees.
355 	 */
356 	ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL);
357 	if (ts != NULL) {
358 		list_insert_tail(&tm->tm_pending_writes, zio);
359 		mutex_exit(&tm->tm_lock);
360 		return (B_FALSE);
361 	}
362 
363 	ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
364 	if (ts != NULL) {
365 		/*
366 		 * Loop until all overlapping segments are removed.
367 		 */
368 		do {
369 			trim_map_segment_remove(tm, ts, start, end);
370 			ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
371 		} while (ts != NULL);
372 	}
373 	avl_add(&tm->tm_inflight_writes, zio);
374 
375 	mutex_exit(&tm->tm_lock);
376 
377 	return (B_TRUE);
378 }
379 
380 void
trim_map_write_done(zio_t * zio)381 trim_map_write_done(zio_t *zio)
382 {
383 	vdev_t *vd = zio->io_vd;
384 	trim_map_t *tm = vd->vdev_trimmap;
385 
386 	/*
387 	 * Don't check for vdev_notrim, since the write could have
388 	 * started before vdev_notrim was set.
389 	 */
390 	if (!zfs_trim_enabled || tm == NULL)
391 		return;
392 
393 	mutex_enter(&tm->tm_lock);
394 	/*
395 	 * Don't fail if the write isn't in the tree, since the write
396 	 * could have started after vdev_notrim was set.
397 	 */
398 	if (zio->io_trim_node.avl_child[0] ||
399 	    zio->io_trim_node.avl_child[1] ||
400 	    AVL_XPARENT(&zio->io_trim_node) ||
401 	    tm->tm_inflight_writes.avl_root == &zio->io_trim_node)
402 		avl_remove(&tm->tm_inflight_writes, zio);
403 	mutex_exit(&tm->tm_lock);
404 }
405 
406 /*
407  * Return the oldest segment (the one with the lowest txg / time) or NULL if:
408  * 1. The list is empty
409  * 2. The first element's txg is greater than txgsafe
410  * 3. The first element's txg is not greater than the txg argument and the
411  *    the first element's time is not greater than time argument
412  */
413 static trim_seg_t *
trim_map_first(trim_map_t * tm,uint64_t txg,uint64_t txgsafe,hrtime_t time,boolean_t force)414 trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time,
415     boolean_t force)
416 {
417 	trim_seg_t *ts;
418 
419 	ASSERT(MUTEX_HELD(&tm->tm_lock));
420 	VERIFY(txgsafe >= txg);
421 
422 	ts = list_head(&tm->tm_head);
423 	if (ts != NULL && ts->ts_txg <= txgsafe &&
424 	    (ts->ts_txg <= txg || ts->ts_time <= time || force))
425 		return (ts);
426 	return (NULL);
427 }
428 
429 static void
trim_map_vdev_commit(spa_t * spa,zio_t * zio,vdev_t * vd)430 trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
431 {
432 	trim_map_t *tm = vd->vdev_trimmap;
433 	trim_seg_t *ts;
434 	uint64_t size, offset, txgtarget, txgsafe;
435 	int64_t hard, soft;
436 	hrtime_t timelimit;
437 
438 	ASSERT(vd->vdev_ops->vdev_op_leaf);
439 
440 	if (tm == NULL)
441 		return;
442 
443 	timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC;
444 	if (vd->vdev_isl2cache) {
445 		txgsafe = UINT64_MAX;
446 		txgtarget = UINT64_MAX;
447 	} else {
448 		txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
449 		if (txgsafe > trim_txg_delay)
450 			txgtarget = txgsafe - trim_txg_delay;
451 		else
452 			txgtarget = 0;
453 	}
454 
455 	mutex_enter(&tm->tm_lock);
456 	hard = 0;
457 	if (tm->tm_pending > trim_vdev_max_pending)
458 		hard = (tm->tm_pending - trim_vdev_max_pending) / 4;
459 	soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64);
460 	/* Loop until we have sent all outstanding free's */
461 	while (soft > 0 &&
462 	    (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0))
463 	    != NULL) {
464 		TRIM_MAP_REM(tm, ts);
465 		avl_remove(&tm->tm_queued_frees, ts);
466 		avl_add(&tm->tm_inflight_frees, ts);
467 		size = ts->ts_end - ts->ts_start;
468 		offset = ts->ts_start;
469 		/*
470 		 * We drop the lock while we call zio_nowait as the IO
471 		 * scheduler can result in a different IO being run e.g.
472 		 * a write which would result in a recursive lock.
473 		 */
474 		mutex_exit(&tm->tm_lock);
475 
476 		zio_nowait(zio_trim(zio, spa, vd, offset, size));
477 
478 		soft -= TRIM_MAP_SEGS(size);
479 		hard -= TRIM_MAP_SEGS(size);
480 		mutex_enter(&tm->tm_lock);
481 	}
482 	mutex_exit(&tm->tm_lock);
483 }
484 
485 static void
trim_map_vdev_commit_done(spa_t * spa,vdev_t * vd)486 trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd)
487 {
488 	trim_map_t *tm = vd->vdev_trimmap;
489 	trim_seg_t *ts;
490 	list_t pending_writes;
491 	zio_t *zio;
492 	uint64_t start, size;
493 	void *cookie;
494 
495 	ASSERT(vd->vdev_ops->vdev_op_leaf);
496 
497 	if (tm == NULL)
498 		return;
499 
500 	mutex_enter(&tm->tm_lock);
501 	if (!avl_is_empty(&tm->tm_inflight_frees)) {
502 		cookie = NULL;
503 		while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees,
504 		    &cookie)) != NULL) {
505 			kmem_free(ts, sizeof (*ts));
506 		}
507 	}
508 	list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t,
509 	    io_trim_link));
510 	list_move_tail(&pending_writes, &tm->tm_pending_writes);
511 	mutex_exit(&tm->tm_lock);
512 
513 	while ((zio = list_remove_head(&pending_writes)) != NULL) {
514 		zio_vdev_io_reissue(zio);
515 		zio_execute(zio);
516 	}
517 	list_destroy(&pending_writes);
518 }
519 
520 static void
trim_map_commit(spa_t * spa,zio_t * zio,vdev_t * vd)521 trim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
522 {
523 	int c;
524 
525 	if (vd == NULL)
526 		return;
527 
528 	if (vd->vdev_ops->vdev_op_leaf) {
529 		trim_map_vdev_commit(spa, zio, vd);
530 	} else {
531 		for (c = 0; c < vd->vdev_children; c++)
532 			trim_map_commit(spa, zio, vd->vdev_child[c]);
533 	}
534 }
535 
536 static void
trim_map_commit_done(spa_t * spa,vdev_t * vd)537 trim_map_commit_done(spa_t *spa, vdev_t *vd)
538 {
539 	int c;
540 
541 	if (vd == NULL)
542 		return;
543 
544 	if (vd->vdev_ops->vdev_op_leaf) {
545 		trim_map_vdev_commit_done(spa, vd);
546 	} else {
547 		for (c = 0; c < vd->vdev_children; c++)
548 			trim_map_commit_done(spa, vd->vdev_child[c]);
549 	}
550 }
551 
552 static void
trim_thread(void * arg)553 trim_thread(void *arg)
554 {
555 	spa_t *spa = arg;
556 	zio_t *zio;
557 
558 #ifdef __FreeBSD__
559 #ifdef _KERNEL
560 	(void) snprintf(curthread->td_name, sizeof(curthread->td_name),
561 	    "trim %s", spa_name(spa));
562 #endif
563 #endif
564 #ifdef __NetBSD__
565 #ifdef _KERNEL
566 	size_t sz;
567 	char *name, *oname;
568 	struct lwp *l = curlwp;
569 
570 	name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
571 	snprintf(name, MAXCOMLEN, "trim %s", spa_name(spa));
572 	name[MAXCOMLEN - 1] = 0;
573 
574 	lwp_lock(l);
575 	oname = l->l_name;
576 	l->l_name = name;
577 	lwp_unlock(l);
578 
579 	if (oname != NULL)
580 		kmem_free(oname, MAXCOMLEN);
581 #endif
582 #endif
583 
584 	for (;;) {
585 		mutex_enter(&spa->spa_trim_lock);
586 		if (spa->spa_trim_thread == NULL) {
587 			spa->spa_trim_thread = curthread;
588 			cv_signal(&spa->spa_trim_cv);
589 			mutex_exit(&spa->spa_trim_lock);
590 			thread_exit();
591 		}
592 
593 		(void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock,
594 		    hz * trim_max_interval);
595 		mutex_exit(&spa->spa_trim_lock);
596 
597 		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
598 
599 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
600 		trim_map_commit(spa, zio, spa->spa_root_vdev);
601 		(void) zio_wait(zio);
602 		trim_map_commit_done(spa, spa->spa_root_vdev);
603 		spa_config_exit(spa, SCL_STATE, FTAG);
604 	}
605 }
606 
607 void
trim_thread_create(spa_t * spa)608 trim_thread_create(spa_t *spa)
609 {
610 
611 	if (!zfs_trim_enabled)
612 		return;
613 
614 	mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
615 	cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL);
616 	mutex_enter(&spa->spa_trim_lock);
617 	spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0,
618 	    TS_RUN, minclsyspri);
619 	mutex_exit(&spa->spa_trim_lock);
620 }
621 
622 void
trim_thread_destroy(spa_t * spa)623 trim_thread_destroy(spa_t *spa)
624 {
625 
626 	if (!zfs_trim_enabled)
627 		return;
628 	if (spa->spa_trim_thread == NULL)
629 		return;
630 
631 	mutex_enter(&spa->spa_trim_lock);
632 	/* Setting spa_trim_thread to NULL tells the thread to stop. */
633 	spa->spa_trim_thread = NULL;
634 	cv_signal(&spa->spa_trim_cv);
635 	/* The thread will set it back to != NULL on exit. */
636 	while (spa->spa_trim_thread == NULL)
637 		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
638 	spa->spa_trim_thread = NULL;
639 	mutex_exit(&spa->spa_trim_lock);
640 
641 	cv_destroy(&spa->spa_trim_cv);
642 	mutex_destroy(&spa->spa_trim_lock);
643 }
644 
645 void
trim_thread_wakeup(spa_t * spa)646 trim_thread_wakeup(spa_t *spa)
647 {
648 
649 	if (!zfs_trim_enabled)
650 		return;
651 	if (spa->spa_trim_thread == NULL)
652 		return;
653 
654 	mutex_enter(&spa->spa_trim_lock);
655 	cv_signal(&spa->spa_trim_cv);
656 	mutex_exit(&spa->spa_trim_lock);
657 }
658