xref: /freebsd-src/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c (revision 87bf66d4a7488c496af110d4d05cc0273d49f82e)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
14  * Copyright (c) 2016 by Delphix. All rights reserved.
15  */
16 
17 /*
18  * See abd.c for a general overview of the arc buffered data (ABD).
19  *
20  * Using a large proportion of scattered ABDs decreases ARC fragmentation since
21  * when we are at the limit of allocatable space, using equal-size chunks will
22  * allow us to quickly reclaim enough space for a new large allocation (assuming
23  * it is also scattered).
24  *
25  * ABDs are allocated scattered by default unless the caller uses
26  * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
27  */
28 
29 #include <sys/abd_impl.h>
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/zio.h>
33 #include <sys/zfs_context.h>
34 #include <sys/zfs_znode.h>
35 #include <sys/vm.h>
36 
37 typedef struct abd_stats {
38 	kstat_named_t abdstat_struct_size;
39 	kstat_named_t abdstat_scatter_cnt;
40 	kstat_named_t abdstat_scatter_data_size;
41 	kstat_named_t abdstat_scatter_chunk_waste;
42 	kstat_named_t abdstat_linear_cnt;
43 	kstat_named_t abdstat_linear_data_size;
44 } abd_stats_t;
45 
46 static abd_stats_t abd_stats = {
47 	/* Amount of memory occupied by all of the abd_t struct allocations */
48 	{ "struct_size",			KSTAT_DATA_UINT64 },
49 	/*
50 	 * The number of scatter ABDs which are currently allocated, excluding
51 	 * ABDs which don't own their data (for instance the ones which were
52 	 * allocated through abd_get_offset()).
53 	 */
54 	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
55 	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
56 	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
57 	/*
58 	 * The amount of space wasted at the end of the last chunk across all
59 	 * scatter ABDs tracked by scatter_cnt.
60 	 */
61 	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
62 	/*
63 	 * The number of linear ABDs which are currently allocated, excluding
64 	 * ABDs which don't own their data (for instance the ones which were
65 	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
66 	 * ABD takes ownership of its buf then it will become tracked.
67 	 */
68 	{ "linear_cnt",				KSTAT_DATA_UINT64 },
69 	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
70 	{ "linear_data_size",			KSTAT_DATA_UINT64 },
71 };
72 
73 struct {
74 	wmsum_t abdstat_struct_size;
75 	wmsum_t abdstat_scatter_cnt;
76 	wmsum_t abdstat_scatter_data_size;
77 	wmsum_t abdstat_scatter_chunk_waste;
78 	wmsum_t abdstat_linear_cnt;
79 	wmsum_t abdstat_linear_data_size;
80 } abd_sums;
81 
82 /*
83  * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
84  * ABD's for.  Smaller allocations will use linear ABD's which use
85  * zio_[data_]buf_alloc().
86  *
87  * Scatter ABD's use at least one page each, so sub-page allocations waste
88  * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
89  * half of each page).  Using linear ABD's for small allocations means that
90  * they will be put on slabs which contain many allocations.
91  *
92  * Linear ABDs for multi-page allocations are easier to use, and in some cases
93  * it allows to avoid buffer copying.  But allocation and especially free
94  * of multi-page linear ABDs are expensive operations due to KVA mapping and
95  * unmapping, and with time they cause KVA fragmentations.
96  */
97 static size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
98 
99 SYSCTL_DECL(_vfs_zfs);
100 
101 SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
102 	&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
103 SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
104 	&zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
105 
106 kmem_cache_t *abd_chunk_cache;
107 static kstat_t *abd_ksp;
108 
109 /*
110  * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
111  * just a single zero'd page-sized buffer. This allows us to conserve
112  * memory by only using a single zero buffer for the scatter chunks.
113  */
114 abd_t *abd_zero_scatter = NULL;
115 
116 static uint_t
117 abd_chunkcnt_for_bytes(size_t size)
118 {
119 	return ((size + PAGE_MASK) >> PAGE_SHIFT);
120 }
121 
122 static inline uint_t
123 abd_scatter_chunkcnt(abd_t *abd)
124 {
125 	ASSERT(!abd_is_linear(abd));
126 	return (abd_chunkcnt_for_bytes(
127 	    ABD_SCATTER(abd).abd_offset + abd->abd_size));
128 }
129 
130 boolean_t
131 abd_size_alloc_linear(size_t size)
132 {
133 	return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
134 }
135 
136 void
137 abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
138 {
139 	uint_t n;
140 
141 	n = abd_scatter_chunkcnt(abd);
142 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
143 	int waste = (n << PAGE_SHIFT) - abd->abd_size;
144 	if (op == ABDSTAT_INCR) {
145 		ABDSTAT_BUMP(abdstat_scatter_cnt);
146 		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
147 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
148 		arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
149 	} else {
150 		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
151 		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
152 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
153 		arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
154 	}
155 }
156 
157 void
158 abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
159 {
160 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
161 	if (op == ABDSTAT_INCR) {
162 		ABDSTAT_BUMP(abdstat_linear_cnt);
163 		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
164 	} else {
165 		ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
166 		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
167 	}
168 }
169 
170 void
171 abd_verify_scatter(abd_t *abd)
172 {
173 	uint_t i, n;
174 
175 	/*
176 	 * There is no scatter linear pages in FreeBSD so there is
177 	 * an error if the ABD has been marked as a linear page.
178 	 */
179 	ASSERT(!abd_is_linear_page(abd));
180 	ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
181 	n = abd_scatter_chunkcnt(abd);
182 	for (i = 0; i < n; i++) {
183 		ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
184 	}
185 }
186 
187 void
188 abd_alloc_chunks(abd_t *abd, size_t size)
189 {
190 	uint_t i, n;
191 
192 	n = abd_chunkcnt_for_bytes(size);
193 	for (i = 0; i < n; i++) {
194 		ABD_SCATTER(abd).abd_chunks[i] =
195 		    kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
196 	}
197 }
198 
199 void
200 abd_free_chunks(abd_t *abd)
201 {
202 	uint_t i, n;
203 
204 	/*
205 	 * Scatter ABDs may be constructed by abd_alloc_from_pages() from
206 	 * an array of pages. In which case they should not be freed.
207 	 */
208 	if (!abd_is_from_pages(abd)) {
209 		n = abd_scatter_chunkcnt(abd);
210 		for (i = 0; i < n; i++) {
211 			kmem_cache_free(abd_chunk_cache,
212 			    ABD_SCATTER(abd).abd_chunks[i]);
213 		}
214 	}
215 }
216 
217 abd_t *
218 abd_alloc_struct_impl(size_t size)
219 {
220 	uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
221 	/*
222 	 * In the event we are allocating a gang ABD, the size passed in
223 	 * will be 0. We must make sure to set abd_size to the size of an
224 	 * ABD struct as opposed to an ABD scatter with 0 chunks. The gang
225 	 * ABD struct allocation accounts for an additional 24 bytes over
226 	 * a scatter ABD with 0 chunks.
227 	 */
228 	size_t abd_size = MAX(sizeof (abd_t),
229 	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
230 	abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
231 	ASSERT3P(abd, !=, NULL);
232 	ABDSTAT_INCR(abdstat_struct_size, abd_size);
233 
234 	return (abd);
235 }
236 
237 void
238 abd_free_struct_impl(abd_t *abd)
239 {
240 	uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
241 	    abd_scatter_chunkcnt(abd);
242 	ssize_t size = MAX(sizeof (abd_t),
243 	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
244 	kmem_free(abd, size);
245 	ABDSTAT_INCR(abdstat_struct_size, -size);
246 }
247 
248 /*
249  * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
250  * each chunk in the scatterlist will be set to the same area.
251  */
252 _Static_assert(ZERO_REGION_SIZE >= PAGE_SIZE, "zero_region too small");
253 static void
254 abd_alloc_zero_scatter(void)
255 {
256 	uint_t i, n;
257 
258 	n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
259 	abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
260 	abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
261 	abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
262 
263 	ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
264 
265 	for (i = 0; i < n; i++) {
266 		ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
267 		    __DECONST(void *, zero_region);
268 	}
269 
270 	ABDSTAT_BUMP(abdstat_scatter_cnt);
271 	ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
272 }
273 
274 static void
275 abd_free_zero_scatter(void)
276 {
277 	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
278 	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
279 
280 	abd_free_struct(abd_zero_scatter);
281 	abd_zero_scatter = NULL;
282 }
283 
284 static int
285 abd_kstats_update(kstat_t *ksp, int rw)
286 {
287 	abd_stats_t *as = ksp->ks_data;
288 
289 	if (rw == KSTAT_WRITE)
290 		return (EACCES);
291 	as->abdstat_struct_size.value.ui64 =
292 	    wmsum_value(&abd_sums.abdstat_struct_size);
293 	as->abdstat_scatter_cnt.value.ui64 =
294 	    wmsum_value(&abd_sums.abdstat_scatter_cnt);
295 	as->abdstat_scatter_data_size.value.ui64 =
296 	    wmsum_value(&abd_sums.abdstat_scatter_data_size);
297 	as->abdstat_scatter_chunk_waste.value.ui64 =
298 	    wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
299 	as->abdstat_linear_cnt.value.ui64 =
300 	    wmsum_value(&abd_sums.abdstat_linear_cnt);
301 	as->abdstat_linear_data_size.value.ui64 =
302 	    wmsum_value(&abd_sums.abdstat_linear_data_size);
303 	return (0);
304 }
305 
306 void
307 abd_init(void)
308 {
309 	abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
310 	    NULL, NULL, NULL, NULL, 0, KMC_NODEBUG | KMC_RECLAIMABLE);
311 
312 	wmsum_init(&abd_sums.abdstat_struct_size, 0);
313 	wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
314 	wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
315 	wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
316 	wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
317 	wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
318 
319 	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
320 	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
321 	if (abd_ksp != NULL) {
322 		abd_ksp->ks_data = &abd_stats;
323 		abd_ksp->ks_update = abd_kstats_update;
324 		kstat_install(abd_ksp);
325 	}
326 
327 	abd_alloc_zero_scatter();
328 }
329 
330 void
331 abd_fini(void)
332 {
333 	abd_free_zero_scatter();
334 
335 	if (abd_ksp != NULL) {
336 		kstat_delete(abd_ksp);
337 		abd_ksp = NULL;
338 	}
339 
340 	wmsum_fini(&abd_sums.abdstat_struct_size);
341 	wmsum_fini(&abd_sums.abdstat_scatter_cnt);
342 	wmsum_fini(&abd_sums.abdstat_scatter_data_size);
343 	wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
344 	wmsum_fini(&abd_sums.abdstat_linear_cnt);
345 	wmsum_fini(&abd_sums.abdstat_linear_data_size);
346 
347 	kmem_cache_destroy(abd_chunk_cache);
348 	abd_chunk_cache = NULL;
349 }
350 
351 void
352 abd_free_linear_page(abd_t *abd)
353 {
354 	ASSERT3P(abd->abd_u.abd_linear.sf, !=, NULL);
355 	zfs_unmap_page(abd->abd_u.abd_linear.sf);
356 }
357 
358 /*
359  * If we're going to use this ABD for doing I/O using the block layer, the
360  * consumer of the ABD data doesn't care if it's scattered or not, and we don't
361  * plan to store this ABD in memory for a long period of time, we should
362  * allocate the ABD type that requires the least data copying to do the I/O.
363  *
364  * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
365  * using a scatter/gather list we should switch to that and replace this call
366  * with vanilla abd_alloc().
367  */
368 abd_t *
369 abd_alloc_for_io(size_t size, boolean_t is_metadata)
370 {
371 	return (abd_alloc_linear(size, is_metadata));
372 }
373 
374 static abd_t *
375 abd_get_offset_from_pages(abd_t *abd, abd_t *sabd, size_t chunkcnt,
376     size_t new_offset)
377 {
378 	ASSERT(abd_is_from_pages(sabd));
379 
380 	/*
381 	 * Set the child child chunks to point at the parent chunks as
382 	 * the chunks are just pages and we don't want to copy them.
383 	 */
384 	size_t parent_offset = new_offset / PAGE_SIZE;
385 	ASSERT3U(parent_offset, <, abd_scatter_chunkcnt(sabd));
386 	for (int i = 0; i < chunkcnt; i++)
387 		ABD_SCATTER(abd).abd_chunks[i] =
388 		    ABD_SCATTER(sabd).abd_chunks[parent_offset + i];
389 
390 	abd->abd_flags |= ABD_FLAG_FROM_PAGES;
391 	return (abd);
392 }
393 
394 abd_t *
395 abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
396     size_t size)
397 {
398 	abd_verify(sabd);
399 	ASSERT3U(off, <=, sabd->abd_size);
400 
401 	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
402 	size_t chunkcnt = abd_chunkcnt_for_bytes(
403 	    (new_offset & PAGE_MASK) + size);
404 
405 	ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
406 
407 	/*
408 	 * If an abd struct is provided, it is only the minimum size.  If we
409 	 * need additional chunks, we need to allocate a new struct.
410 	 */
411 	if (abd != NULL &&
412 	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
413 	    sizeof (abd_t)) {
414 		abd = NULL;
415 	}
416 
417 	if (abd == NULL)
418 		abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
419 
420 	/*
421 	 * Even if this buf is filesystem metadata, we only track that
422 	 * if we own the underlying data buffer, which is not true in
423 	 * this case. Therefore, we don't ever use ABD_FLAG_META here.
424 	 */
425 
426 	ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
427 
428 	if (abd_is_from_pages(sabd)) {
429 		return (abd_get_offset_from_pages(abd, sabd, chunkcnt,
430 		    new_offset));
431 	}
432 
433 	/* Copy the scatterlist starting at the correct offset */
434 	(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
435 	    &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
436 	    chunkcnt * sizeof (void *));
437 
438 	return (abd);
439 }
440 
441 /*
442  * Allocate a scatter ABD structure from user pages.
443  */
444 abd_t *
445 abd_alloc_from_pages(vm_page_t *pages, unsigned long offset, uint64_t size)
446 {
447 	VERIFY3U(size, <=, DMU_MAX_ACCESS);
448 	ASSERT3U(offset, <, PAGE_SIZE);
449 	ASSERT3P(pages, !=, NULL);
450 
451 	abd_t *abd = abd_alloc_struct(size);
452 	abd->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_FROM_PAGES;
453 	abd->abd_size = size;
454 
455 	if ((offset + size) <= PAGE_SIZE) {
456 		/*
457 		 * There is only a single page worth of data, so we will just
458 		 * use  a linear ABD. We have to make sure to take into account
459 		 * the offset though. In all other cases our offset will be 0
460 		 * as we are always PAGE_SIZE aligned.
461 		 */
462 		abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;
463 		ABD_LINEAR_BUF(abd) = (char *)zfs_map_page(pages[0],
464 		    &abd->abd_u.abd_linear.sf) + offset;
465 	} else {
466 		ABD_SCATTER(abd).abd_offset = offset;
467 		ASSERT0(ABD_SCATTER(abd).abd_offset);
468 
469 		/*
470 		 * Setting the ABD's abd_chunks to point to the user pages.
471 		 */
472 		for (int i = 0; i < abd_chunkcnt_for_bytes(size); i++)
473 			ABD_SCATTER(abd).abd_chunks[i] = pages[i];
474 	}
475 
476 	return (abd);
477 }
478 
479 /*
480  * Initialize the abd_iter.
481  */
482 void
483 abd_iter_init(struct abd_iter *aiter, abd_t *abd)
484 {
485 	ASSERT(!abd_is_gang(abd));
486 	abd_verify(abd);
487 	memset(aiter, 0, sizeof (struct abd_iter));
488 	aiter->iter_abd = abd;
489 }
490 
491 /*
492  * This is just a helper function to see if we have exhausted the
493  * abd_iter and reached the end.
494  */
495 boolean_t
496 abd_iter_at_end(struct abd_iter *aiter)
497 {
498 	return (aiter->iter_pos == aiter->iter_abd->abd_size);
499 }
500 
501 /*
502  * Advance the iterator by a certain amount. Cannot be called when a chunk is
503  * in use. This can be safely called when the aiter has already exhausted, in
504  * which case this does nothing.
505  */
506 void
507 abd_iter_advance(struct abd_iter *aiter, size_t amount)
508 {
509 	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
510 	ASSERT0(aiter->iter_mapsize);
511 
512 	/* There's nothing left to advance to, so do nothing */
513 	if (abd_iter_at_end(aiter))
514 		return;
515 
516 	aiter->iter_pos += amount;
517 }
518 
519 /*
520  * Map the current chunk into aiter. This can be safely called when the aiter
521  * has already exhausted, in which case this does nothing.
522  */
523 void
524 abd_iter_map(struct abd_iter *aiter)
525 {
526 	void *paddr;
527 
528 	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
529 	ASSERT0(aiter->iter_mapsize);
530 
531 	/* There's nothing left to iterate over, so do nothing */
532 	if (abd_iter_at_end(aiter))
533 		return;
534 
535 	abd_t *abd = aiter->iter_abd;
536 	size_t offset = aiter->iter_pos;
537 	if (abd_is_linear(abd)) {
538 		aiter->iter_mapsize = abd->abd_size - offset;
539 		paddr = ABD_LINEAR_BUF(abd);
540 	} else if (abd_is_from_pages(abd)) {
541 		aiter->sf = NULL;
542 		offset += ABD_SCATTER(abd).abd_offset;
543 		size_t index = offset / PAGE_SIZE;
544 		offset &= PAGE_MASK;
545 		aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
546 		    abd->abd_size - aiter->iter_pos);
547 		paddr = zfs_map_page(
548 		    ABD_SCATTER(aiter->iter_abd).abd_chunks[index],
549 		    &aiter->sf);
550 	} else {
551 		offset += ABD_SCATTER(abd).abd_offset;
552 		paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
553 		offset &= PAGE_MASK;
554 		aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
555 		    abd->abd_size - aiter->iter_pos);
556 	}
557 	aiter->iter_mapaddr = (char *)paddr + offset;
558 }
559 
560 /*
561  * Unmap the current chunk from aiter. This can be safely called when the aiter
562  * has already exhausted, in which case this does nothing.
563  */
564 void
565 abd_iter_unmap(struct abd_iter *aiter)
566 {
567 	if (!abd_iter_at_end(aiter)) {
568 		ASSERT3P(aiter->iter_mapaddr, !=, NULL);
569 		ASSERT3U(aiter->iter_mapsize, >, 0);
570 	}
571 
572 	if (abd_is_from_pages(aiter->iter_abd) &&
573 	    !abd_is_linear_page(aiter->iter_abd)) {
574 		ASSERT3P(aiter->sf, !=, NULL);
575 		zfs_unmap_page(aiter->sf);
576 	}
577 
578 	aiter->iter_mapaddr = NULL;
579 	aiter->iter_mapsize = 0;
580 }
581 
582 void
583 abd_cache_reap_now(void)
584 {
585 	kmem_cache_reap_soon(abd_chunk_cache);
586 }
587 
588 /*
589  * Borrow a raw buffer from an ABD without copying the contents of the ABD
590  * into the buffer. If the ABD is scattered, this will alloate a raw buffer
591  * whose contents are undefined. To copy over the existing data in the ABD, use
592  * abd_borrow_buf_copy() instead.
593  */
594 void *
595 abd_borrow_buf(abd_t *abd, size_t n)
596 {
597 	void *buf;
598 	abd_verify(abd);
599 	ASSERT3U(abd->abd_size, >=, 0);
600 	if (abd_is_linear(abd)) {
601 		buf = abd_to_buf(abd);
602 	} else {
603 		buf = zio_buf_alloc(n);
604 	}
605 #ifdef ZFS_DEBUG
606 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
607 #endif
608 	return (buf);
609 }
610 
611 void *
612 abd_borrow_buf_copy(abd_t *abd, size_t n)
613 {
614 	void *buf = abd_borrow_buf(abd, n);
615 	if (!abd_is_linear(abd)) {
616 		abd_copy_to_buf(buf, abd, n);
617 	}
618 	return (buf);
619 }
620 
621 /*
622  * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
623  * not change the contents of the ABD. If you want any changes you made to
624  * buf to be copied back to abd, use abd_return_buf_copy() instead. If the
625  * ABD is not constructed from user pages from Direct I/O then an ASSERT
626  * checks to make sure the contents of the buffer have not changed since it was
627  * borrowed. We can not ASSERT the contents of the buffer have not changed if
628  * it is composed of user pages. While Direct I/O write pages are placed under
629  * write protection and can not be changed, this is not the case for Direct I/O
630  * reads. The pages of a Direct I/O read could be manipulated at any time.
631  * Checksum verifications in the ZIO pipeline check for this issue and handle
632  * it by returning an error on checksum verification failure.
633  */
634 void
635 abd_return_buf(abd_t *abd, void *buf, size_t n)
636 {
637 	abd_verify(abd);
638 	ASSERT3U(abd->abd_size, >=, n);
639 #ifdef ZFS_DEBUG
640 	(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
641 #endif
642 	if (abd_is_from_pages(abd)) {
643 		if (!abd_is_linear_page(abd))
644 			zio_buf_free(buf, n);
645 	} else if (abd_is_linear(abd)) {
646 		ASSERT3P(buf, ==, abd_to_buf(abd));
647 	} else if (abd_is_gang(abd)) {
648 #ifdef ZFS_DEBUG
649 		/*
650 		 * We have to be careful with gang ABD's that we do not ASSERT
651 		 * for any ABD's that contain user pages from Direct I/O. See
652 		 * the comment above about Direct I/O read buffers possibly
653 		 * being manipulated. In order to handle this, we jsut iterate
654 		 * through the gang ABD and only verify ABD's that are not from
655 		 * user pages.
656 		 */
657 		void *cmp_buf = buf;
658 
659 		for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
660 		    cabd != NULL;
661 		    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
662 			if (!abd_is_from_pages(cabd)) {
663 				ASSERT0(abd_cmp_buf(cabd, cmp_buf,
664 				    cabd->abd_size));
665 			}
666 			cmp_buf = (char *)cmp_buf + cabd->abd_size;
667 		}
668 #endif
669 		zio_buf_free(buf, n);
670 	} else {
671 		ASSERT0(abd_cmp_buf(abd, buf, n));
672 		zio_buf_free(buf, n);
673 	}
674 }
675 
676 void
677 abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
678 {
679 	if (!abd_is_linear(abd)) {
680 		abd_copy_from_buf(abd, buf, n);
681 	}
682 	abd_return_buf(abd, buf, n);
683 }
684