xref: /freebsd-src/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1184c1b94SMartin Matuska /*
2184c1b94SMartin Matuska  * CDDL HEADER START
3184c1b94SMartin Matuska  *
4184c1b94SMartin Matuska  * The contents of this file are subject to the terms of the
5184c1b94SMartin Matuska  * Common Development and Distribution License (the "License").
6184c1b94SMartin Matuska  * You may not use this file except in compliance with the License.
7184c1b94SMartin Matuska  *
8184c1b94SMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10184c1b94SMartin Matuska  * See the License for the specific language governing permissions
11184c1b94SMartin Matuska  * and limitations under the License.
12184c1b94SMartin Matuska  *
13184c1b94SMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14184c1b94SMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15184c1b94SMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16184c1b94SMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17184c1b94SMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18184c1b94SMartin Matuska  *
19184c1b94SMartin Matuska  * CDDL HEADER END
20184c1b94SMartin Matuska  */
21184c1b94SMartin Matuska /*
22184c1b94SMartin Matuska  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23184c1b94SMartin Matuska  * Use is subject to license terms.
24184c1b94SMartin Matuska  */
25184c1b94SMartin Matuska 
26184c1b94SMartin Matuska /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27184c1b94SMartin Matuska /*	  All Rights Reserved	*/
28184c1b94SMartin Matuska 
29184c1b94SMartin Matuska /*
30184c1b94SMartin Matuska  * University Copyright- Copyright (c) 1982, 1986, 1988
31184c1b94SMartin Matuska  * The Regents of the University of California
32184c1b94SMartin Matuska  * All Rights Reserved
33184c1b94SMartin Matuska  *
34184c1b94SMartin Matuska  * University Acknowledgment- Portions of this document are derived from
35184c1b94SMartin Matuska  * software developed by the University of California, Berkeley, and its
36184c1b94SMartin Matuska  * contributors.
37184c1b94SMartin Matuska  */
38184c1b94SMartin Matuska /*
39184c1b94SMartin Matuska  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40184c1b94SMartin Matuska  */
41184c1b94SMartin Matuska 
42184c1b94SMartin Matuska #ifdef _KERNEL
43184c1b94SMartin Matuska 
447a7741afSMartin Matuska #include <sys/errno.h>
457a7741afSMartin Matuska #include <sys/vmem.h>
467a7741afSMartin Matuska #include <sys/sysmacros.h>
47184c1b94SMartin Matuska #include <sys/types.h>
48184c1b94SMartin Matuska #include <sys/uio_impl.h>
49184c1b94SMartin Matuska #include <sys/sysmacros.h>
50da5137abSMartin Matuska #include <sys/string.h>
517a7741afSMartin Matuska #include <sys/zfs_refcount.h>
527a7741afSMartin Matuska #include <sys/zfs_debug.h>
53184c1b94SMartin Matuska #include <linux/kmap_compat.h>
54184c1b94SMartin Matuska #include <linux/uaccess.h>
557a7741afSMartin Matuska #include <linux/pagemap.h>
567a7741afSMartin Matuska #include <linux/mman.h>
57184c1b94SMartin Matuska 
58184c1b94SMartin Matuska /*
59184c1b94SMartin Matuska  * Move "n" bytes at byte address "p"; "rw" indicates the direction
60184c1b94SMartin Matuska  * of the move, and the I/O parameters are provided in "uio", which is
61184c1b94SMartin Matuska  * update to reflect the data which was moved.  Returns 0 on success or
62184c1b94SMartin Matuska  * a non-zero errno on failure.
63184c1b94SMartin Matuska  */
64184c1b94SMartin Matuska static int
65184c1b94SMartin Matuska zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
66184c1b94SMartin Matuska {
67184c1b94SMartin Matuska 	const struct iovec *iov = uio->uio_iov;
68184c1b94SMartin Matuska 	size_t skip = uio->uio_skip;
69184c1b94SMartin Matuska 	ulong_t cnt;
70184c1b94SMartin Matuska 
71*dd215568SMartin Matuska 	ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE);
72184c1b94SMartin Matuska 	while (n && uio->uio_resid) {
73184c1b94SMartin Matuska 		cnt = MIN(iov->iov_len - skip, n);
74184c1b94SMartin Matuska 		if (rw == UIO_READ)
75da5137abSMartin Matuska 			memcpy(iov->iov_base + skip, p, cnt);
76184c1b94SMartin Matuska 		else
77da5137abSMartin Matuska 			memcpy(p, iov->iov_base + skip, cnt);
78184c1b94SMartin Matuska 		skip += cnt;
79184c1b94SMartin Matuska 		if (skip == iov->iov_len) {
80184c1b94SMartin Matuska 			skip = 0;
81184c1b94SMartin Matuska 			uio->uio_iov = (++iov);
82184c1b94SMartin Matuska 			uio->uio_iovcnt--;
83184c1b94SMartin Matuska 		}
84184c1b94SMartin Matuska 		uio->uio_skip = skip;
85184c1b94SMartin Matuska 		uio->uio_resid -= cnt;
86184c1b94SMartin Matuska 		uio->uio_loffset += cnt;
87184c1b94SMartin Matuska 		p = (caddr_t)p + cnt;
88184c1b94SMartin Matuska 		n -= cnt;
89184c1b94SMartin Matuska 	}
90184c1b94SMartin Matuska 	return (0);
91184c1b94SMartin Matuska }
92184c1b94SMartin Matuska 
93184c1b94SMartin Matuska static int
941f1e2261SMartin Matuska zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
95184c1b94SMartin Matuska {
96184c1b94SMartin Matuska 	const struct bio_vec *bv = uio->uio_bvec;
97184c1b94SMartin Matuska 	size_t skip = uio->uio_skip;
98184c1b94SMartin Matuska 	ulong_t cnt;
99184c1b94SMartin Matuska 
100184c1b94SMartin Matuska 	while (n && uio->uio_resid) {
101184c1b94SMartin Matuska 		void *paddr;
102184c1b94SMartin Matuska 		cnt = MIN(bv->bv_len - skip, n);
103184c1b94SMartin Matuska 
10475e1fea6SMartin Matuska 		paddr = zfs_kmap_local(bv->bv_page);
1051f1e2261SMartin Matuska 		if (rw == UIO_READ) {
1061f1e2261SMartin Matuska 			/* Copy from buffer 'p' to the bvec data */
107da5137abSMartin Matuska 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
1081f1e2261SMartin Matuska 		} else {
1091f1e2261SMartin Matuska 			/* Copy from bvec data to buffer 'p' */
110da5137abSMartin Matuska 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
1111f1e2261SMartin Matuska 		}
11275e1fea6SMartin Matuska 		zfs_kunmap_local(paddr);
113184c1b94SMartin Matuska 
114184c1b94SMartin Matuska 		skip += cnt;
115184c1b94SMartin Matuska 		if (skip == bv->bv_len) {
116184c1b94SMartin Matuska 			skip = 0;
117184c1b94SMartin Matuska 			uio->uio_bvec = (++bv);
118184c1b94SMartin Matuska 			uio->uio_iovcnt--;
119184c1b94SMartin Matuska 		}
120184c1b94SMartin Matuska 		uio->uio_skip = skip;
121184c1b94SMartin Matuska 		uio->uio_resid -= cnt;
122184c1b94SMartin Matuska 		uio->uio_loffset += cnt;
123184c1b94SMartin Matuska 		p = (caddr_t)p + cnt;
124184c1b94SMartin Matuska 		n -= cnt;
125184c1b94SMartin Matuska 	}
126184c1b94SMartin Matuska 	return (0);
127184c1b94SMartin Matuska }
128184c1b94SMartin Matuska 
1291f1e2261SMartin Matuska static void
1301f1e2261SMartin Matuska zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
1311f1e2261SMartin Matuska     struct bio_vec *bv)
1321f1e2261SMartin Matuska {
1331f1e2261SMartin Matuska 	void *paddr;
1341f1e2261SMartin Matuska 
13575e1fea6SMartin Matuska 	paddr = zfs_kmap_local(bv->bv_page);
1361f1e2261SMartin Matuska 	if (rw == UIO_READ) {
1371f1e2261SMartin Matuska 		/* Copy from buffer 'p' to the bvec data */
1381f1e2261SMartin Matuska 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
1391f1e2261SMartin Matuska 	} else {
1401f1e2261SMartin Matuska 		/* Copy from bvec data to buffer 'p' */
1411f1e2261SMartin Matuska 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
1421f1e2261SMartin Matuska 	}
14375e1fea6SMartin Matuska 	zfs_kunmap_local(paddr);
1441f1e2261SMartin Matuska }
1451f1e2261SMartin Matuska 
1461f1e2261SMartin Matuska /*
1471f1e2261SMartin Matuska  * Copy 'n' bytes of data between the buffer p[] and the data represented
1481f1e2261SMartin Matuska  * by the request in the uio.
1491f1e2261SMartin Matuska  */
1501f1e2261SMartin Matuska static int
1511f1e2261SMartin Matuska zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
1521f1e2261SMartin Matuska {
1531f1e2261SMartin Matuska 	struct request *rq = uio->rq;
1541f1e2261SMartin Matuska 	struct bio_vec bv;
1551f1e2261SMartin Matuska 	struct req_iterator iter;
1561f1e2261SMartin Matuska 	size_t this_seg_start;	/* logical offset */
1571f1e2261SMartin Matuska 	size_t this_seg_end;		/* logical offset */
1581f1e2261SMartin Matuska 	size_t skip_in_seg;
1591f1e2261SMartin Matuska 	size_t copy_from_seg;
1601f1e2261SMartin Matuska 	size_t orig_loffset;
1611f1e2261SMartin Matuska 	int copied = 0;
1621f1e2261SMartin Matuska 
1631f1e2261SMartin Matuska 	/*
1641f1e2261SMartin Matuska 	 * Get the original logical offset of this entire request (because
1651f1e2261SMartin Matuska 	 * uio->uio_loffset will be modified over time).
1661f1e2261SMartin Matuska 	 */
1671f1e2261SMartin Matuska 	orig_loffset = io_offset(NULL, rq);
1681f1e2261SMartin Matuska 	this_seg_start = orig_loffset;
1691f1e2261SMartin Matuska 
1701f1e2261SMartin Matuska 	rq_for_each_segment(bv, rq, iter) {
1711f1e2261SMartin Matuska 		/*
1721f1e2261SMartin Matuska 		 * Lookup what the logical offset of the last byte of this
1731f1e2261SMartin Matuska 		 * segment is.
1741f1e2261SMartin Matuska 		 */
1751f1e2261SMartin Matuska 		this_seg_end = this_seg_start + bv.bv_len - 1;
1761f1e2261SMartin Matuska 
1771f1e2261SMartin Matuska 		/*
1781f1e2261SMartin Matuska 		 * We only need to operate on segments that have data we're
1791f1e2261SMartin Matuska 		 * copying.
1801f1e2261SMartin Matuska 		 */
1811f1e2261SMartin Matuska 		if (uio->uio_loffset >= this_seg_start &&
1821f1e2261SMartin Matuska 		    uio->uio_loffset <= this_seg_end) {
1831f1e2261SMartin Matuska 			/*
1841f1e2261SMartin Matuska 			 * Some, or all, of the data in this segment needs to be
1851f1e2261SMartin Matuska 			 * copied.
1861f1e2261SMartin Matuska 			 */
1871f1e2261SMartin Matuska 
1881f1e2261SMartin Matuska 			/*
1891f1e2261SMartin Matuska 			 * We may be not be copying from the first byte in the
1901f1e2261SMartin Matuska 			 * segment.  Figure out how many bytes to skip copying
1911f1e2261SMartin Matuska 			 * from the beginning of this segment.
1921f1e2261SMartin Matuska 			 */
1931f1e2261SMartin Matuska 			skip_in_seg = uio->uio_loffset - this_seg_start;
1941f1e2261SMartin Matuska 
1951f1e2261SMartin Matuska 			/*
1961f1e2261SMartin Matuska 			 * Calculate the total number of bytes from this
1971f1e2261SMartin Matuska 			 * segment that we will be copying.
1981f1e2261SMartin Matuska 			 */
1991f1e2261SMartin Matuska 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
2001f1e2261SMartin Matuska 
2011f1e2261SMartin Matuska 			/* Copy the bytes */
2021f1e2261SMartin Matuska 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
2031f1e2261SMartin Matuska 			p = ((char *)p) + copy_from_seg;
2041f1e2261SMartin Matuska 
2051f1e2261SMartin Matuska 			n -= copy_from_seg;
2061f1e2261SMartin Matuska 			uio->uio_resid -= copy_from_seg;
2071f1e2261SMartin Matuska 			uio->uio_loffset += copy_from_seg;
2081f1e2261SMartin Matuska 			copied = 1;	/* We copied some data */
2091f1e2261SMartin Matuska 		}
2101f1e2261SMartin Matuska 
2111f1e2261SMartin Matuska 		this_seg_start = this_seg_end + 1;
2121f1e2261SMartin Matuska 	}
2131f1e2261SMartin Matuska 
2141f1e2261SMartin Matuska 	if (!copied) {
2151f1e2261SMartin Matuska 		/* Didn't copy anything */
2161f1e2261SMartin Matuska 		uio->uio_resid = 0;
2171f1e2261SMartin Matuska 	}
2181f1e2261SMartin Matuska 	return (0);
2191f1e2261SMartin Matuska }
2201f1e2261SMartin Matuska 
2211f1e2261SMartin Matuska static int
2221f1e2261SMartin Matuska zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
2231f1e2261SMartin Matuska {
2241f1e2261SMartin Matuska 	if (uio->rq != NULL)
2251f1e2261SMartin Matuska 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
2261f1e2261SMartin Matuska 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
2271f1e2261SMartin Matuska }
2281f1e2261SMartin Matuska 
229184c1b94SMartin Matuska static int
230184c1b94SMartin Matuska zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
231184c1b94SMartin Matuska     boolean_t revert)
232184c1b94SMartin Matuska {
233184c1b94SMartin Matuska 	size_t cnt = MIN(n, uio->uio_resid);
234184c1b94SMartin Matuska 
235184c1b94SMartin Matuska 	if (uio->uio_skip)
236184c1b94SMartin Matuska 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
237184c1b94SMartin Matuska 
238184c1b94SMartin Matuska 	if (rw == UIO_READ)
239184c1b94SMartin Matuska 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
240184c1b94SMartin Matuska 	else
241184c1b94SMartin Matuska 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
242184c1b94SMartin Matuska 
243184c1b94SMartin Matuska 	/*
244184c1b94SMartin Matuska 	 * When operating on a full pipe no bytes are processed.
245184c1b94SMartin Matuska 	 * In which case return EFAULT which is converted to EAGAIN
246184c1b94SMartin Matuska 	 * by the kernel's generic_file_splice_read() function.
247184c1b94SMartin Matuska 	 */
248184c1b94SMartin Matuska 	if (cnt == 0)
249184c1b94SMartin Matuska 		return (EFAULT);
250184c1b94SMartin Matuska 
251184c1b94SMartin Matuska 	/*
252184c1b94SMartin Matuska 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
253184c1b94SMartin Matuska 	 * to avoid consuming the uio and its iov_iter structure.
254184c1b94SMartin Matuska 	 */
255184c1b94SMartin Matuska 	if (revert)
256184c1b94SMartin Matuska 		iov_iter_revert(uio->uio_iter, cnt);
257184c1b94SMartin Matuska 
258184c1b94SMartin Matuska 	uio->uio_resid -= cnt;
259184c1b94SMartin Matuska 	uio->uio_loffset += cnt;
260184c1b94SMartin Matuska 
261184c1b94SMartin Matuska 	return (0);
262184c1b94SMartin Matuska }
263184c1b94SMartin Matuska 
264184c1b94SMartin Matuska int
265184c1b94SMartin Matuska zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
266184c1b94SMartin Matuska {
267184c1b94SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC)
268184c1b94SMartin Matuska 		return (zfs_uiomove_bvec(p, n, rw, uio));
269184c1b94SMartin Matuska 	else if (uio->uio_segflg == UIO_ITER)
270184c1b94SMartin Matuska 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
271184c1b94SMartin Matuska 	else
272184c1b94SMartin Matuska 		return (zfs_uiomove_iov(p, n, rw, uio));
273184c1b94SMartin Matuska }
274184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiomove);
275184c1b94SMartin Matuska 
276184c1b94SMartin Matuska /*
277184c1b94SMartin Matuska  * Fault in the pages of the first n bytes specified by the uio structure.
278184c1b94SMartin Matuska  * 1 byte in each page is touched and the uio struct is unmodified. Any
279184c1b94SMartin Matuska  * error will terminate the process as this is only a best attempt to get
280184c1b94SMartin Matuska  * the pages resident.
281184c1b94SMartin Matuska  */
282184c1b94SMartin Matuska int
283184c1b94SMartin Matuska zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
284184c1b94SMartin Matuska {
2857a7741afSMartin Matuska 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC ||
2867a7741afSMartin Matuska 	    (uio->uio_extflg & UIO_DIRECT)) {
2877a7741afSMartin Matuska 		/*
2887a7741afSMartin Matuska 		 * There's never a need to fault in kernel pages or Direct I/O
2897a7741afSMartin Matuska 		 * write pages. Direct I/O write pages have been pinned in so
2907a7741afSMartin Matuska 		 * there is never a time for these pages a fault will occur.
2917a7741afSMartin Matuska 		 */
292184c1b94SMartin Matuska 		return (0);
293*dd215568SMartin Matuska 	} else  {
294*dd215568SMartin Matuska 		ASSERT3S(uio->uio_segflg, ==, UIO_ITER);
295184c1b94SMartin Matuska 		/*
296*dd215568SMartin Matuska 		 * At least a Linux 4.18 kernel, iov_iter_fault_in_readable()
297184c1b94SMartin Matuska 		 * can be relied on to fault in user pages when referenced.
298184c1b94SMartin Matuska 		 */
299184c1b94SMartin Matuska 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
300184c1b94SMartin Matuska 			return (EFAULT);
301184c1b94SMartin Matuska 	}
302184c1b94SMartin Matuska 
303184c1b94SMartin Matuska 	return (0);
304184c1b94SMartin Matuska }
305184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uio_prefaultpages);
306184c1b94SMartin Matuska 
307184c1b94SMartin Matuska /*
308184c1b94SMartin Matuska  * The same as zfs_uiomove() but doesn't modify uio structure.
309184c1b94SMartin Matuska  * return in cbytes how many bytes were copied.
310184c1b94SMartin Matuska  */
311184c1b94SMartin Matuska int
312184c1b94SMartin Matuska zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
313184c1b94SMartin Matuska {
314184c1b94SMartin Matuska 	zfs_uio_t uio_copy;
315184c1b94SMartin Matuska 	int ret;
316184c1b94SMartin Matuska 
317da5137abSMartin Matuska 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
318184c1b94SMartin Matuska 
319184c1b94SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC)
320184c1b94SMartin Matuska 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
321184c1b94SMartin Matuska 	else if (uio->uio_segflg == UIO_ITER)
322184c1b94SMartin Matuska 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
323184c1b94SMartin Matuska 	else
324184c1b94SMartin Matuska 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
325184c1b94SMartin Matuska 
326184c1b94SMartin Matuska 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
327184c1b94SMartin Matuska 
328184c1b94SMartin Matuska 	return (ret);
329184c1b94SMartin Matuska }
330184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiocopy);
331184c1b94SMartin Matuska 
332184c1b94SMartin Matuska /*
333184c1b94SMartin Matuska  * Drop the next n chars out of *uio.
334184c1b94SMartin Matuska  */
335184c1b94SMartin Matuska void
336184c1b94SMartin Matuska zfs_uioskip(zfs_uio_t *uio, size_t n)
337184c1b94SMartin Matuska {
338184c1b94SMartin Matuska 	if (n > uio->uio_resid)
339184c1b94SMartin Matuska 		return;
3401f1e2261SMartin Matuska 	/*
3411f1e2261SMartin Matuska 	 * When using a uio with a struct request, we simply
3421f1e2261SMartin Matuska 	 * use uio_loffset as a pointer to the next logical byte to
3431f1e2261SMartin Matuska 	 * copy in the request.  We don't have to do any fancy
3441f1e2261SMartin Matuska 	 * accounting with uio_bvec/uio_iovcnt since we don't use
3451f1e2261SMartin Matuska 	 * them.
3461f1e2261SMartin Matuska 	 */
3471f1e2261SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
348184c1b94SMartin Matuska 		uio->uio_skip += n;
349184c1b94SMartin Matuska 		while (uio->uio_iovcnt &&
350184c1b94SMartin Matuska 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
351184c1b94SMartin Matuska 			uio->uio_skip -= uio->uio_bvec->bv_len;
352184c1b94SMartin Matuska 			uio->uio_bvec++;
353184c1b94SMartin Matuska 			uio->uio_iovcnt--;
354184c1b94SMartin Matuska 		}
355184c1b94SMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
356184c1b94SMartin Matuska 		iov_iter_advance(uio->uio_iter, n);
357184c1b94SMartin Matuska 	} else {
358*dd215568SMartin Matuska 		ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE);
359184c1b94SMartin Matuska 		uio->uio_skip += n;
360184c1b94SMartin Matuska 		while (uio->uio_iovcnt &&
361184c1b94SMartin Matuska 		    uio->uio_skip >= uio->uio_iov->iov_len) {
362184c1b94SMartin Matuska 			uio->uio_skip -= uio->uio_iov->iov_len;
363184c1b94SMartin Matuska 			uio->uio_iov++;
364184c1b94SMartin Matuska 			uio->uio_iovcnt--;
365184c1b94SMartin Matuska 		}
366184c1b94SMartin Matuska 	}
3677a7741afSMartin Matuska 
368184c1b94SMartin Matuska 	uio->uio_loffset += n;
369184c1b94SMartin Matuska 	uio->uio_resid -= n;
370184c1b94SMartin Matuska }
371184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uioskip);
372184c1b94SMartin Matuska 
3737a7741afSMartin Matuska /*
3747a7741afSMartin Matuska  * Check if the uio is page-aligned in memory.
3757a7741afSMartin Matuska  */
3767a7741afSMartin Matuska boolean_t
3777a7741afSMartin Matuska zfs_uio_page_aligned(zfs_uio_t *uio)
3787a7741afSMartin Matuska {
3797a7741afSMartin Matuska 	boolean_t aligned = B_TRUE;
3807a7741afSMartin Matuska 
381*dd215568SMartin Matuska 	if (uio->uio_segflg == UIO_SYSSPACE) {
3827a7741afSMartin Matuska 		const struct iovec *iov = uio->uio_iov;
3837a7741afSMartin Matuska 		size_t skip = uio->uio_skip;
3847a7741afSMartin Matuska 
3857a7741afSMartin Matuska 		for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
3867a7741afSMartin Matuska 			uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
3877a7741afSMartin Matuska 			size_t size = iov->iov_len - skip;
3887a7741afSMartin Matuska 			if ((addr & (PAGE_SIZE - 1)) ||
3897a7741afSMartin Matuska 			    (size & (PAGE_SIZE - 1))) {
3907a7741afSMartin Matuska 				aligned = B_FALSE;
3917a7741afSMartin Matuska 				break;
3927a7741afSMartin Matuska 			}
3937a7741afSMartin Matuska 			skip = 0;
3947a7741afSMartin Matuska 		}
3957a7741afSMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
3967a7741afSMartin Matuska 		unsigned long alignment =
3977a7741afSMartin Matuska 		    iov_iter_alignment(uio->uio_iter);
3987a7741afSMartin Matuska 		aligned = IS_P2ALIGNED(alignment, PAGE_SIZE);
3997a7741afSMartin Matuska 	} else {
4007a7741afSMartin Matuska 		/* Currently not supported */
4017a7741afSMartin Matuska 		aligned = B_FALSE;
4027a7741afSMartin Matuska 	}
4037a7741afSMartin Matuska 
4047a7741afSMartin Matuska 	return (aligned);
4057a7741afSMartin Matuska }
4067a7741afSMartin Matuska 
4077a7741afSMartin Matuska 
4087a7741afSMartin Matuska #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
4097a7741afSMartin Matuska #define	ZFS_MARKEED_PAGE	0x0
4107a7741afSMartin Matuska #define	IS_ZFS_MARKED_PAGE(_p)	0
4117a7741afSMartin Matuska #define	zfs_mark_page(_p)
4127a7741afSMartin Matuska #define	zfs_unmark_page(_p)
4137a7741afSMartin Matuska #define	IS_ZERO_PAGE(_p)	0
4147a7741afSMartin Matuska 
4157a7741afSMartin Matuska #else
4167a7741afSMartin Matuska /*
4177a7741afSMartin Matuska  * Mark pages to know if they were allocated to replace ZERO_PAGE() for
4187a7741afSMartin Matuska  * Direct I/O writes.
4197a7741afSMartin Matuska  */
4207a7741afSMartin Matuska #define	ZFS_MARKED_PAGE		0x5a465350414745 /* ASCII: ZFSPAGE */
4217a7741afSMartin Matuska #define	IS_ZFS_MARKED_PAGE(_p) \
4227a7741afSMartin Matuska 	(page_private(_p) == (unsigned long)ZFS_MARKED_PAGE)
4237a7741afSMartin Matuska #define	IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0))
4247a7741afSMartin Matuska 
4257a7741afSMartin Matuska static inline void
4267a7741afSMartin Matuska zfs_mark_page(struct page *page)
4277a7741afSMartin Matuska {
4287a7741afSMartin Matuska 	ASSERT3P(page, !=, NULL);
4297a7741afSMartin Matuska 	get_page(page);
4307a7741afSMartin Matuska 	SetPagePrivate(page);
4317a7741afSMartin Matuska 	set_page_private(page, ZFS_MARKED_PAGE);
4327a7741afSMartin Matuska }
4337a7741afSMartin Matuska 
4347a7741afSMartin Matuska static inline void
4357a7741afSMartin Matuska zfs_unmark_page(struct page *page)
4367a7741afSMartin Matuska {
4377a7741afSMartin Matuska 	ASSERT3P(page, !=, NULL);
4387a7741afSMartin Matuska 	set_page_private(page, 0UL);
4397a7741afSMartin Matuska 	ClearPagePrivate(page);
4407a7741afSMartin Matuska 	put_page(page);
4417a7741afSMartin Matuska }
4427a7741afSMartin Matuska #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
4437a7741afSMartin Matuska 
444*dd215568SMartin Matuska #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
4457a7741afSMartin Matuska static void
4467a7741afSMartin Matuska zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
4477a7741afSMartin Matuska {
4487a7741afSMartin Matuska 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
4497a7741afSMartin Matuska 
4507a7741afSMartin Matuska 	for (long i = 0; i < uio->uio_dio.npages; i++) {
4517a7741afSMartin Matuska 		struct page *p = uio->uio_dio.pages[i];
4527a7741afSMartin Matuska 		lock_page(p);
4537a7741afSMartin Matuska 
4547a7741afSMartin Matuska 		if (IS_ZERO_PAGE(p)) {
4557a7741afSMartin Matuska 			/*
4567a7741afSMartin Matuska 			 * If the user page points the kernels ZERO_PAGE() a
4577a7741afSMartin Matuska 			 * new zero filled page will just be allocated so the
4587a7741afSMartin Matuska 			 * contents of the page can not be changed by the user
4597a7741afSMartin Matuska 			 * while a Direct I/O write is taking place.
4607a7741afSMartin Matuska 			 */
4617a7741afSMartin Matuska 			gfp_t gfp_zero_page  = __GFP_NOWARN | GFP_NOIO |
4627a7741afSMartin Matuska 			    __GFP_ZERO | GFP_KERNEL;
4637a7741afSMartin Matuska 
4647a7741afSMartin Matuska 			ASSERT0(IS_ZFS_MARKED_PAGE(p));
4657a7741afSMartin Matuska 			unlock_page(p);
4667a7741afSMartin Matuska 			put_page(p);
4677a7741afSMartin Matuska 
46817aab35aSMartin Matuska 			uio->uio_dio.pages[i] =
46917aab35aSMartin Matuska 			    __page_cache_alloc(gfp_zero_page);
47017aab35aSMartin Matuska 			zfs_mark_page(uio->uio_dio.pages[i]);
4717a7741afSMartin Matuska 		} else {
4727a7741afSMartin Matuska 			unlock_page(p);
4737a7741afSMartin Matuska 		}
4747a7741afSMartin Matuska 	}
4757a7741afSMartin Matuska }
476*dd215568SMartin Matuska #endif
4777a7741afSMartin Matuska 
4787a7741afSMartin Matuska void
4797a7741afSMartin Matuska zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
4807a7741afSMartin Matuska {
4817a7741afSMartin Matuska 
4827a7741afSMartin Matuska 	ASSERT(uio->uio_extflg & UIO_DIRECT);
4837a7741afSMartin Matuska 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
4847a7741afSMartin Matuska 
485*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
486*dd215568SMartin Matuska 	unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
487*dd215568SMartin Matuska #else
4887a7741afSMartin Matuska 	for (long i = 0; i < uio->uio_dio.npages; i++) {
4897a7741afSMartin Matuska 		struct page *p = uio->uio_dio.pages[i];
4907a7741afSMartin Matuska 
4917a7741afSMartin Matuska 		if (IS_ZFS_MARKED_PAGE(p)) {
4927a7741afSMartin Matuska 			zfs_unmark_page(p);
4937a7741afSMartin Matuska 			__free_page(p);
4947a7741afSMartin Matuska 			continue;
4957a7741afSMartin Matuska 		}
4967a7741afSMartin Matuska 
4977a7741afSMartin Matuska 		put_page(p);
4987a7741afSMartin Matuska 	}
499*dd215568SMartin Matuska #endif
5007a7741afSMartin Matuska 	vmem_free(uio->uio_dio.pages,
5017a7741afSMartin Matuska 	    uio->uio_dio.npages * sizeof (struct page *));
5027a7741afSMartin Matuska }
5037a7741afSMartin Matuska 
504*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
5057a7741afSMartin Matuska static int
506*dd215568SMartin Matuska zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
5077a7741afSMartin Matuska {
508*dd215568SMartin Matuska 	long res;
509*dd215568SMartin Matuska 	size_t skip = uio->uio_skip;
510*dd215568SMartin Matuska 	size_t len = uio->uio_resid - skip;
511*dd215568SMartin Matuska 	unsigned int gup_flags = 0;
512*dd215568SMartin Matuska 	unsigned long addr;
513*dd215568SMartin Matuska 	unsigned long nr_pages;
5147a7741afSMartin Matuska 
5157a7741afSMartin Matuska 	/*
516*dd215568SMartin Matuska 	 * Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could
517*dd215568SMartin Matuska 	 * possibly be used here in the future to allow for P2P operations with
518*dd215568SMartin Matuska 	 * user pages.
5197a7741afSMartin Matuska 	 */
520*dd215568SMartin Matuska 	if (rw == UIO_READ)
521*dd215568SMartin Matuska 		gup_flags = FOLL_WRITE;
522*dd215568SMartin Matuska 
523*dd215568SMartin Matuska 	if (len == 0)
524*dd215568SMartin Matuska 		return (0);
525*dd215568SMartin Matuska 
526*dd215568SMartin Matuska #if defined(HAVE_ITER_IS_UBUF)
527*dd215568SMartin Matuska 	if (iter_is_ubuf(uio->uio_iter)) {
528*dd215568SMartin Matuska 		nr_pages = DIV_ROUND_UP(len, PAGE_SIZE);
529*dd215568SMartin Matuska 		addr = (unsigned long)uio->uio_iter->ubuf + skip;
530*dd215568SMartin Matuska 		res = pin_user_pages_unlocked(addr, nr_pages,
531*dd215568SMartin Matuska 		    &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
5327a7741afSMartin Matuska 		if (res < 0) {
5337a7741afSMartin Matuska 			return (SET_ERROR(-res));
5347a7741afSMartin Matuska 		} else if (len != (res * PAGE_SIZE)) {
535*dd215568SMartin Matuska 			uio->uio_dio.npages += res;
5367a7741afSMartin Matuska 			return (SET_ERROR(EFAULT));
5377a7741afSMartin Matuska 		}
538*dd215568SMartin Matuska 		uio->uio_dio.npages += res;
5397a7741afSMartin Matuska 		return (0);
5407a7741afSMartin Matuska 	}
541*dd215568SMartin Matuska #endif
542*dd215568SMartin Matuska 	const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter);
5437a7741afSMartin Matuska 	for (int i = 0; i < uio->uio_iovcnt; i++) {
544*dd215568SMartin Matuska 		size_t amt = iovp->iov_len - skip;
545*dd215568SMartin Matuska 		if (amt == 0) {
5467a7741afSMartin Matuska 			iovp++;
5477a7741afSMartin Matuska 			skip = 0;
5487a7741afSMartin Matuska 			continue;
5497a7741afSMartin Matuska 		}
5507a7741afSMartin Matuska 
551*dd215568SMartin Matuska 		addr = (unsigned long)iovp->iov_base + skip;
552*dd215568SMartin Matuska 		nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE);
553*dd215568SMartin Matuska 		res = pin_user_pages_unlocked(addr, nr_pages,
554*dd215568SMartin Matuska 		    &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
555*dd215568SMartin Matuska 		if (res < 0) {
556*dd215568SMartin Matuska 			return (SET_ERROR(-res));
557*dd215568SMartin Matuska 		} else if (amt != (res * PAGE_SIZE)) {
558*dd215568SMartin Matuska 			uio->uio_dio.npages += res;
559*dd215568SMartin Matuska 			return (SET_ERROR(EFAULT));
560*dd215568SMartin Matuska 		}
5617a7741afSMartin Matuska 
562*dd215568SMartin Matuska 		len -= amt;
563*dd215568SMartin Matuska 		uio->uio_dio.npages += res;
5647a7741afSMartin Matuska 		skip = 0;
5657a7741afSMartin Matuska 		iovp++;
566*dd215568SMartin Matuska 	};
5677a7741afSMartin Matuska 
5687a7741afSMartin Matuska 	ASSERT0(len);
5697a7741afSMartin Matuska 
5707a7741afSMartin Matuska 	return (0);
5717a7741afSMartin Matuska }
5727a7741afSMartin Matuska 
573*dd215568SMartin Matuska #else
5747a7741afSMartin Matuska static int
5757a7741afSMartin Matuska zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
5767a7741afSMartin Matuska {
577*dd215568SMartin Matuska 	size_t start;
5787a7741afSMartin Matuska 	size_t wanted = uio->uio_resid - uio->uio_skip;
5797a7741afSMartin Matuska 	ssize_t rollback = 0;
5807a7741afSMartin Matuska 	ssize_t cnt;
5817a7741afSMartin Matuska 	unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
5827a7741afSMartin Matuska 
5837a7741afSMartin Matuska 	while (wanted) {
5847a7741afSMartin Matuska 		cnt = iov_iter_get_pages(uio->uio_iter,
5857a7741afSMartin Matuska 		    &uio->uio_dio.pages[uio->uio_dio.npages],
586*dd215568SMartin Matuska 		    wanted, maxpages, &start);
5877a7741afSMartin Matuska 		if (cnt < 0) {
5887a7741afSMartin Matuska 			iov_iter_revert(uio->uio_iter, rollback);
5897a7741afSMartin Matuska 			return (SET_ERROR(-cnt));
5907a7741afSMartin Matuska 		}
591*dd215568SMartin Matuska 		/*
592*dd215568SMartin Matuska 		 * All Direct I/O operations must be page aligned.
593*dd215568SMartin Matuska 		 */
594*dd215568SMartin Matuska 		ASSERT(IS_P2ALIGNED(start, PAGE_SIZE));
5957a7741afSMartin Matuska 		uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
5967a7741afSMartin Matuska 		rollback += cnt;
5977a7741afSMartin Matuska 		wanted -= cnt;
5987a7741afSMartin Matuska 		iov_iter_advance(uio->uio_iter, cnt);
5997a7741afSMartin Matuska 
6007a7741afSMartin Matuska 	}
6017a7741afSMartin Matuska 	ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
6027a7741afSMartin Matuska 	iov_iter_revert(uio->uio_iter, rollback);
6037a7741afSMartin Matuska 
6047a7741afSMartin Matuska 	return (0);
6057a7741afSMartin Matuska }
606*dd215568SMartin Matuska #endif /* HAVE_PIN_USER_PAGES_UNLOCKED */
6077a7741afSMartin Matuska 
6087a7741afSMartin Matuska /*
6097a7741afSMartin Matuska  * This function pins user pages. In the event that the user pages were not
6107a7741afSMartin Matuska  * successfully pinned an error value is returned.
6117a7741afSMartin Matuska  *
6127a7741afSMartin Matuska  * On success, 0 is returned.
6137a7741afSMartin Matuska  */
6147a7741afSMartin Matuska int
6157a7741afSMartin Matuska zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
6167a7741afSMartin Matuska {
6177a7741afSMartin Matuska 	int error = 0;
6187a7741afSMartin Matuska 	long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
6197a7741afSMartin Matuska 	size_t size = npages * sizeof (struct page *);
6207a7741afSMartin Matuska 
621*dd215568SMartin Matuska 	if (uio->uio_segflg == UIO_ITER) {
6227a7741afSMartin Matuska 		uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
623*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
624*dd215568SMartin Matuska 		error = zfs_uio_pin_user_pages(uio, rw);
625*dd215568SMartin Matuska #else
6267a7741afSMartin Matuska 		error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
6277a7741afSMartin Matuska #endif
6287a7741afSMartin Matuska 	} else {
6297a7741afSMartin Matuska 		return (SET_ERROR(EOPNOTSUPP));
6307a7741afSMartin Matuska 	}
6317a7741afSMartin Matuska 
6327a7741afSMartin Matuska 	ASSERT3S(uio->uio_dio.npages, >=, 0);
6337a7741afSMartin Matuska 
6347a7741afSMartin Matuska 	if (error) {
635*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
636*dd215568SMartin Matuska 		unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
637*dd215568SMartin Matuska #else
6387a7741afSMartin Matuska 		for (long i = 0; i < uio->uio_dio.npages; i++)
6397a7741afSMartin Matuska 			put_page(uio->uio_dio.pages[i]);
640*dd215568SMartin Matuska #endif
6417a7741afSMartin Matuska 		vmem_free(uio->uio_dio.pages, size);
6427a7741afSMartin Matuska 		return (error);
6437a7741afSMartin Matuska 	} else {
6447a7741afSMartin Matuska 		ASSERT3S(uio->uio_dio.npages, ==, npages);
6457a7741afSMartin Matuska 	}
6467a7741afSMartin Matuska 
647*dd215568SMartin Matuska #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
648*dd215568SMartin Matuska 	if (rw == UIO_WRITE)
6497a7741afSMartin Matuska 		zfs_uio_dio_check_for_zero_page(uio);
650*dd215568SMartin Matuska #endif
6517a7741afSMartin Matuska 
6527a7741afSMartin Matuska 	uio->uio_extflg |= UIO_DIRECT;
6537a7741afSMartin Matuska 
6547a7741afSMartin Matuska 	return (0);
6557a7741afSMartin Matuska }
6567a7741afSMartin Matuska 
657184c1b94SMartin Matuska #endif /* _KERNEL */
658