1184c1b94SMartin Matuska /* 2184c1b94SMartin Matuska * CDDL HEADER START 3184c1b94SMartin Matuska * 4184c1b94SMartin Matuska * The contents of this file are subject to the terms of the 5184c1b94SMartin Matuska * Common Development and Distribution License (the "License"). 6184c1b94SMartin Matuska * You may not use this file except in compliance with the License. 7184c1b94SMartin Matuska * 8184c1b94SMartin Matuska * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10184c1b94SMartin Matuska * See the License for the specific language governing permissions 11184c1b94SMartin Matuska * and limitations under the License. 12184c1b94SMartin Matuska * 13184c1b94SMartin Matuska * When distributing Covered Code, include this CDDL HEADER in each 14184c1b94SMartin Matuska * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15184c1b94SMartin Matuska * If applicable, add the following below this CDDL HEADER, with the 16184c1b94SMartin Matuska * fields enclosed by brackets "[]" replaced with your own identifying 17184c1b94SMartin Matuska * information: Portions Copyright [yyyy] [name of copyright owner] 18184c1b94SMartin Matuska * 19184c1b94SMartin Matuska * CDDL HEADER END 20184c1b94SMartin Matuska */ 21184c1b94SMartin Matuska /* 22184c1b94SMartin Matuska * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23184c1b94SMartin Matuska * Use is subject to license terms. 24184c1b94SMartin Matuska */ 25184c1b94SMartin Matuska 26184c1b94SMartin Matuska /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27184c1b94SMartin Matuska /* All Rights Reserved */ 28184c1b94SMartin Matuska 29184c1b94SMartin Matuska /* 30184c1b94SMartin Matuska * University Copyright- Copyright (c) 1982, 1986, 1988 31184c1b94SMartin Matuska * The Regents of the University of California 32184c1b94SMartin Matuska * All Rights Reserved 33184c1b94SMartin Matuska * 34184c1b94SMartin Matuska * University Acknowledgment- Portions of this document are derived from 35184c1b94SMartin Matuska * software developed by the University of California, Berkeley, and its 36184c1b94SMartin Matuska * contributors. 37184c1b94SMartin Matuska */ 38184c1b94SMartin Matuska /* 39184c1b94SMartin Matuska * Copyright (c) 2015 by Chunwei Chen. All rights reserved. 40184c1b94SMartin Matuska */ 41184c1b94SMartin Matuska 42184c1b94SMartin Matuska #ifdef _KERNEL 43184c1b94SMartin Matuska 447a7741afSMartin Matuska #include <sys/errno.h> 457a7741afSMartin Matuska #include <sys/vmem.h> 467a7741afSMartin Matuska #include <sys/sysmacros.h> 47184c1b94SMartin Matuska #include <sys/types.h> 48184c1b94SMartin Matuska #include <sys/uio_impl.h> 49184c1b94SMartin Matuska #include <sys/sysmacros.h> 50da5137abSMartin Matuska #include <sys/string.h> 517a7741afSMartin Matuska #include <sys/zfs_refcount.h> 527a7741afSMartin Matuska #include <sys/zfs_debug.h> 53184c1b94SMartin Matuska #include <linux/kmap_compat.h> 54184c1b94SMartin Matuska #include <linux/uaccess.h> 557a7741afSMartin Matuska #include <linux/pagemap.h> 567a7741afSMartin Matuska #include <linux/mman.h> 57184c1b94SMartin Matuska 58184c1b94SMartin Matuska /* 59184c1b94SMartin Matuska * Move "n" bytes at byte address "p"; "rw" indicates the direction 60184c1b94SMartin Matuska * of the move, and the I/O parameters are provided in "uio", which is 61184c1b94SMartin Matuska * update to reflect the data which was moved. Returns 0 on success or 62184c1b94SMartin Matuska * a non-zero errno on failure. 63184c1b94SMartin Matuska */ 64184c1b94SMartin Matuska static int 65184c1b94SMartin Matuska zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 66184c1b94SMartin Matuska { 67184c1b94SMartin Matuska const struct iovec *iov = uio->uio_iov; 68184c1b94SMartin Matuska size_t skip = uio->uio_skip; 69184c1b94SMartin Matuska ulong_t cnt; 70184c1b94SMartin Matuska 71*dd215568SMartin Matuska ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); 72184c1b94SMartin Matuska while (n && uio->uio_resid) { 73184c1b94SMartin Matuska cnt = MIN(iov->iov_len - skip, n); 74184c1b94SMartin Matuska if (rw == UIO_READ) 75da5137abSMartin Matuska memcpy(iov->iov_base + skip, p, cnt); 76184c1b94SMartin Matuska else 77da5137abSMartin Matuska memcpy(p, iov->iov_base + skip, cnt); 78184c1b94SMartin Matuska skip += cnt; 79184c1b94SMartin Matuska if (skip == iov->iov_len) { 80184c1b94SMartin Matuska skip = 0; 81184c1b94SMartin Matuska uio->uio_iov = (++iov); 82184c1b94SMartin Matuska uio->uio_iovcnt--; 83184c1b94SMartin Matuska } 84184c1b94SMartin Matuska uio->uio_skip = skip; 85184c1b94SMartin Matuska uio->uio_resid -= cnt; 86184c1b94SMartin Matuska uio->uio_loffset += cnt; 87184c1b94SMartin Matuska p = (caddr_t)p + cnt; 88184c1b94SMartin Matuska n -= cnt; 89184c1b94SMartin Matuska } 90184c1b94SMartin Matuska return (0); 91184c1b94SMartin Matuska } 92184c1b94SMartin Matuska 93184c1b94SMartin Matuska static int 941f1e2261SMartin Matuska zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 95184c1b94SMartin Matuska { 96184c1b94SMartin Matuska const struct bio_vec *bv = uio->uio_bvec; 97184c1b94SMartin Matuska size_t skip = uio->uio_skip; 98184c1b94SMartin Matuska ulong_t cnt; 99184c1b94SMartin Matuska 100184c1b94SMartin Matuska while (n && uio->uio_resid) { 101184c1b94SMartin Matuska void *paddr; 102184c1b94SMartin Matuska cnt = MIN(bv->bv_len - skip, n); 103184c1b94SMartin Matuska 10475e1fea6SMartin Matuska paddr = zfs_kmap_local(bv->bv_page); 1051f1e2261SMartin Matuska if (rw == UIO_READ) { 1061f1e2261SMartin Matuska /* Copy from buffer 'p' to the bvec data */ 107da5137abSMartin Matuska memcpy(paddr + bv->bv_offset + skip, p, cnt); 1081f1e2261SMartin Matuska } else { 1091f1e2261SMartin Matuska /* Copy from bvec data to buffer 'p' */ 110da5137abSMartin Matuska memcpy(p, paddr + bv->bv_offset + skip, cnt); 1111f1e2261SMartin Matuska } 11275e1fea6SMartin Matuska zfs_kunmap_local(paddr); 113184c1b94SMartin Matuska 114184c1b94SMartin Matuska skip += cnt; 115184c1b94SMartin Matuska if (skip == bv->bv_len) { 116184c1b94SMartin Matuska skip = 0; 117184c1b94SMartin Matuska uio->uio_bvec = (++bv); 118184c1b94SMartin Matuska uio->uio_iovcnt--; 119184c1b94SMartin Matuska } 120184c1b94SMartin Matuska uio->uio_skip = skip; 121184c1b94SMartin Matuska uio->uio_resid -= cnt; 122184c1b94SMartin Matuska uio->uio_loffset += cnt; 123184c1b94SMartin Matuska p = (caddr_t)p + cnt; 124184c1b94SMartin Matuska n -= cnt; 125184c1b94SMartin Matuska } 126184c1b94SMartin Matuska return (0); 127184c1b94SMartin Matuska } 128184c1b94SMartin Matuska 1291f1e2261SMartin Matuska static void 1301f1e2261SMartin Matuska zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, 1311f1e2261SMartin Matuska struct bio_vec *bv) 1321f1e2261SMartin Matuska { 1331f1e2261SMartin Matuska void *paddr; 1341f1e2261SMartin Matuska 13575e1fea6SMartin Matuska paddr = zfs_kmap_local(bv->bv_page); 1361f1e2261SMartin Matuska if (rw == UIO_READ) { 1371f1e2261SMartin Matuska /* Copy from buffer 'p' to the bvec data */ 1381f1e2261SMartin Matuska memcpy(paddr + bv->bv_offset + skip, p, cnt); 1391f1e2261SMartin Matuska } else { 1401f1e2261SMartin Matuska /* Copy from bvec data to buffer 'p' */ 1411f1e2261SMartin Matuska memcpy(p, paddr + bv->bv_offset + skip, cnt); 1421f1e2261SMartin Matuska } 14375e1fea6SMartin Matuska zfs_kunmap_local(paddr); 1441f1e2261SMartin Matuska } 1451f1e2261SMartin Matuska 1461f1e2261SMartin Matuska /* 1471f1e2261SMartin Matuska * Copy 'n' bytes of data between the buffer p[] and the data represented 1481f1e2261SMartin Matuska * by the request in the uio. 1491f1e2261SMartin Matuska */ 1501f1e2261SMartin Matuska static int 1511f1e2261SMartin Matuska zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 1521f1e2261SMartin Matuska { 1531f1e2261SMartin Matuska struct request *rq = uio->rq; 1541f1e2261SMartin Matuska struct bio_vec bv; 1551f1e2261SMartin Matuska struct req_iterator iter; 1561f1e2261SMartin Matuska size_t this_seg_start; /* logical offset */ 1571f1e2261SMartin Matuska size_t this_seg_end; /* logical offset */ 1581f1e2261SMartin Matuska size_t skip_in_seg; 1591f1e2261SMartin Matuska size_t copy_from_seg; 1601f1e2261SMartin Matuska size_t orig_loffset; 1611f1e2261SMartin Matuska int copied = 0; 1621f1e2261SMartin Matuska 1631f1e2261SMartin Matuska /* 1641f1e2261SMartin Matuska * Get the original logical offset of this entire request (because 1651f1e2261SMartin Matuska * uio->uio_loffset will be modified over time). 1661f1e2261SMartin Matuska */ 1671f1e2261SMartin Matuska orig_loffset = io_offset(NULL, rq); 1681f1e2261SMartin Matuska this_seg_start = orig_loffset; 1691f1e2261SMartin Matuska 1701f1e2261SMartin Matuska rq_for_each_segment(bv, rq, iter) { 1711f1e2261SMartin Matuska /* 1721f1e2261SMartin Matuska * Lookup what the logical offset of the last byte of this 1731f1e2261SMartin Matuska * segment is. 1741f1e2261SMartin Matuska */ 1751f1e2261SMartin Matuska this_seg_end = this_seg_start + bv.bv_len - 1; 1761f1e2261SMartin Matuska 1771f1e2261SMartin Matuska /* 1781f1e2261SMartin Matuska * We only need to operate on segments that have data we're 1791f1e2261SMartin Matuska * copying. 1801f1e2261SMartin Matuska */ 1811f1e2261SMartin Matuska if (uio->uio_loffset >= this_seg_start && 1821f1e2261SMartin Matuska uio->uio_loffset <= this_seg_end) { 1831f1e2261SMartin Matuska /* 1841f1e2261SMartin Matuska * Some, or all, of the data in this segment needs to be 1851f1e2261SMartin Matuska * copied. 1861f1e2261SMartin Matuska */ 1871f1e2261SMartin Matuska 1881f1e2261SMartin Matuska /* 1891f1e2261SMartin Matuska * We may be not be copying from the first byte in the 1901f1e2261SMartin Matuska * segment. Figure out how many bytes to skip copying 1911f1e2261SMartin Matuska * from the beginning of this segment. 1921f1e2261SMartin Matuska */ 1931f1e2261SMartin Matuska skip_in_seg = uio->uio_loffset - this_seg_start; 1941f1e2261SMartin Matuska 1951f1e2261SMartin Matuska /* 1961f1e2261SMartin Matuska * Calculate the total number of bytes from this 1971f1e2261SMartin Matuska * segment that we will be copying. 1981f1e2261SMartin Matuska */ 1991f1e2261SMartin Matuska copy_from_seg = MIN(bv.bv_len - skip_in_seg, n); 2001f1e2261SMartin Matuska 2011f1e2261SMartin Matuska /* Copy the bytes */ 2021f1e2261SMartin Matuska zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv); 2031f1e2261SMartin Matuska p = ((char *)p) + copy_from_seg; 2041f1e2261SMartin Matuska 2051f1e2261SMartin Matuska n -= copy_from_seg; 2061f1e2261SMartin Matuska uio->uio_resid -= copy_from_seg; 2071f1e2261SMartin Matuska uio->uio_loffset += copy_from_seg; 2081f1e2261SMartin Matuska copied = 1; /* We copied some data */ 2091f1e2261SMartin Matuska } 2101f1e2261SMartin Matuska 2111f1e2261SMartin Matuska this_seg_start = this_seg_end + 1; 2121f1e2261SMartin Matuska } 2131f1e2261SMartin Matuska 2141f1e2261SMartin Matuska if (!copied) { 2151f1e2261SMartin Matuska /* Didn't copy anything */ 2161f1e2261SMartin Matuska uio->uio_resid = 0; 2171f1e2261SMartin Matuska } 2181f1e2261SMartin Matuska return (0); 2191f1e2261SMartin Matuska } 2201f1e2261SMartin Matuska 2211f1e2261SMartin Matuska static int 2221f1e2261SMartin Matuska zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 2231f1e2261SMartin Matuska { 2241f1e2261SMartin Matuska if (uio->rq != NULL) 2251f1e2261SMartin Matuska return (zfs_uiomove_bvec_rq(p, n, rw, uio)); 2261f1e2261SMartin Matuska return (zfs_uiomove_bvec_impl(p, n, rw, uio)); 2271f1e2261SMartin Matuska } 2281f1e2261SMartin Matuska 229184c1b94SMartin Matuska static int 230184c1b94SMartin Matuska zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, 231184c1b94SMartin Matuska boolean_t revert) 232184c1b94SMartin Matuska { 233184c1b94SMartin Matuska size_t cnt = MIN(n, uio->uio_resid); 234184c1b94SMartin Matuska 235184c1b94SMartin Matuska if (uio->uio_skip) 236184c1b94SMartin Matuska iov_iter_advance(uio->uio_iter, uio->uio_skip); 237184c1b94SMartin Matuska 238184c1b94SMartin Matuska if (rw == UIO_READ) 239184c1b94SMartin Matuska cnt = copy_to_iter(p, cnt, uio->uio_iter); 240184c1b94SMartin Matuska else 241184c1b94SMartin Matuska cnt = copy_from_iter(p, cnt, uio->uio_iter); 242184c1b94SMartin Matuska 243184c1b94SMartin Matuska /* 244184c1b94SMartin Matuska * When operating on a full pipe no bytes are processed. 245184c1b94SMartin Matuska * In which case return EFAULT which is converted to EAGAIN 246184c1b94SMartin Matuska * by the kernel's generic_file_splice_read() function. 247184c1b94SMartin Matuska */ 248184c1b94SMartin Matuska if (cnt == 0) 249184c1b94SMartin Matuska return (EFAULT); 250184c1b94SMartin Matuska 251184c1b94SMartin Matuska /* 252184c1b94SMartin Matuska * Revert advancing the uio_iter. This is set by zfs_uiocopy() 253184c1b94SMartin Matuska * to avoid consuming the uio and its iov_iter structure. 254184c1b94SMartin Matuska */ 255184c1b94SMartin Matuska if (revert) 256184c1b94SMartin Matuska iov_iter_revert(uio->uio_iter, cnt); 257184c1b94SMartin Matuska 258184c1b94SMartin Matuska uio->uio_resid -= cnt; 259184c1b94SMartin Matuska uio->uio_loffset += cnt; 260184c1b94SMartin Matuska 261184c1b94SMartin Matuska return (0); 262184c1b94SMartin Matuska } 263184c1b94SMartin Matuska 264184c1b94SMartin Matuska int 265184c1b94SMartin Matuska zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 266184c1b94SMartin Matuska { 267184c1b94SMartin Matuska if (uio->uio_segflg == UIO_BVEC) 268184c1b94SMartin Matuska return (zfs_uiomove_bvec(p, n, rw, uio)); 269184c1b94SMartin Matuska else if (uio->uio_segflg == UIO_ITER) 270184c1b94SMartin Matuska return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); 271184c1b94SMartin Matuska else 272184c1b94SMartin Matuska return (zfs_uiomove_iov(p, n, rw, uio)); 273184c1b94SMartin Matuska } 274184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiomove); 275184c1b94SMartin Matuska 276184c1b94SMartin Matuska /* 277184c1b94SMartin Matuska * Fault in the pages of the first n bytes specified by the uio structure. 278184c1b94SMartin Matuska * 1 byte in each page is touched and the uio struct is unmodified. Any 279184c1b94SMartin Matuska * error will terminate the process as this is only a best attempt to get 280184c1b94SMartin Matuska * the pages resident. 281184c1b94SMartin Matuska */ 282184c1b94SMartin Matuska int 283184c1b94SMartin Matuska zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) 284184c1b94SMartin Matuska { 2857a7741afSMartin Matuska if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC || 2867a7741afSMartin Matuska (uio->uio_extflg & UIO_DIRECT)) { 2877a7741afSMartin Matuska /* 2887a7741afSMartin Matuska * There's never a need to fault in kernel pages or Direct I/O 2897a7741afSMartin Matuska * write pages. Direct I/O write pages have been pinned in so 2907a7741afSMartin Matuska * there is never a time for these pages a fault will occur. 2917a7741afSMartin Matuska */ 292184c1b94SMartin Matuska return (0); 293*dd215568SMartin Matuska } else { 294*dd215568SMartin Matuska ASSERT3S(uio->uio_segflg, ==, UIO_ITER); 295184c1b94SMartin Matuska /* 296*dd215568SMartin Matuska * At least a Linux 4.18 kernel, iov_iter_fault_in_readable() 297184c1b94SMartin Matuska * can be relied on to fault in user pages when referenced. 298184c1b94SMartin Matuska */ 299184c1b94SMartin Matuska if (iov_iter_fault_in_readable(uio->uio_iter, n)) 300184c1b94SMartin Matuska return (EFAULT); 301184c1b94SMartin Matuska } 302184c1b94SMartin Matuska 303184c1b94SMartin Matuska return (0); 304184c1b94SMartin Matuska } 305184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uio_prefaultpages); 306184c1b94SMartin Matuska 307184c1b94SMartin Matuska /* 308184c1b94SMartin Matuska * The same as zfs_uiomove() but doesn't modify uio structure. 309184c1b94SMartin Matuska * return in cbytes how many bytes were copied. 310184c1b94SMartin Matuska */ 311184c1b94SMartin Matuska int 312184c1b94SMartin Matuska zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) 313184c1b94SMartin Matuska { 314184c1b94SMartin Matuska zfs_uio_t uio_copy; 315184c1b94SMartin Matuska int ret; 316184c1b94SMartin Matuska 317da5137abSMartin Matuska memcpy(&uio_copy, uio, sizeof (zfs_uio_t)); 318184c1b94SMartin Matuska 319184c1b94SMartin Matuska if (uio->uio_segflg == UIO_BVEC) 320184c1b94SMartin Matuska ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); 321184c1b94SMartin Matuska else if (uio->uio_segflg == UIO_ITER) 322184c1b94SMartin Matuska ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); 323184c1b94SMartin Matuska else 324184c1b94SMartin Matuska ret = zfs_uiomove_iov(p, n, rw, &uio_copy); 325184c1b94SMartin Matuska 326184c1b94SMartin Matuska *cbytes = uio->uio_resid - uio_copy.uio_resid; 327184c1b94SMartin Matuska 328184c1b94SMartin Matuska return (ret); 329184c1b94SMartin Matuska } 330184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiocopy); 331184c1b94SMartin Matuska 332184c1b94SMartin Matuska /* 333184c1b94SMartin Matuska * Drop the next n chars out of *uio. 334184c1b94SMartin Matuska */ 335184c1b94SMartin Matuska void 336184c1b94SMartin Matuska zfs_uioskip(zfs_uio_t *uio, size_t n) 337184c1b94SMartin Matuska { 338184c1b94SMartin Matuska if (n > uio->uio_resid) 339184c1b94SMartin Matuska return; 3401f1e2261SMartin Matuska /* 3411f1e2261SMartin Matuska * When using a uio with a struct request, we simply 3421f1e2261SMartin Matuska * use uio_loffset as a pointer to the next logical byte to 3431f1e2261SMartin Matuska * copy in the request. We don't have to do any fancy 3441f1e2261SMartin Matuska * accounting with uio_bvec/uio_iovcnt since we don't use 3451f1e2261SMartin Matuska * them. 3461f1e2261SMartin Matuska */ 3471f1e2261SMartin Matuska if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) { 348184c1b94SMartin Matuska uio->uio_skip += n; 349184c1b94SMartin Matuska while (uio->uio_iovcnt && 350184c1b94SMartin Matuska uio->uio_skip >= uio->uio_bvec->bv_len) { 351184c1b94SMartin Matuska uio->uio_skip -= uio->uio_bvec->bv_len; 352184c1b94SMartin Matuska uio->uio_bvec++; 353184c1b94SMartin Matuska uio->uio_iovcnt--; 354184c1b94SMartin Matuska } 355184c1b94SMartin Matuska } else if (uio->uio_segflg == UIO_ITER) { 356184c1b94SMartin Matuska iov_iter_advance(uio->uio_iter, n); 357184c1b94SMartin Matuska } else { 358*dd215568SMartin Matuska ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); 359184c1b94SMartin Matuska uio->uio_skip += n; 360184c1b94SMartin Matuska while (uio->uio_iovcnt && 361184c1b94SMartin Matuska uio->uio_skip >= uio->uio_iov->iov_len) { 362184c1b94SMartin Matuska uio->uio_skip -= uio->uio_iov->iov_len; 363184c1b94SMartin Matuska uio->uio_iov++; 364184c1b94SMartin Matuska uio->uio_iovcnt--; 365184c1b94SMartin Matuska } 366184c1b94SMartin Matuska } 3677a7741afSMartin Matuska 368184c1b94SMartin Matuska uio->uio_loffset += n; 369184c1b94SMartin Matuska uio->uio_resid -= n; 370184c1b94SMartin Matuska } 371184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uioskip); 372184c1b94SMartin Matuska 3737a7741afSMartin Matuska /* 3747a7741afSMartin Matuska * Check if the uio is page-aligned in memory. 3757a7741afSMartin Matuska */ 3767a7741afSMartin Matuska boolean_t 3777a7741afSMartin Matuska zfs_uio_page_aligned(zfs_uio_t *uio) 3787a7741afSMartin Matuska { 3797a7741afSMartin Matuska boolean_t aligned = B_TRUE; 3807a7741afSMartin Matuska 381*dd215568SMartin Matuska if (uio->uio_segflg == UIO_SYSSPACE) { 3827a7741afSMartin Matuska const struct iovec *iov = uio->uio_iov; 3837a7741afSMartin Matuska size_t skip = uio->uio_skip; 3847a7741afSMartin Matuska 3857a7741afSMartin Matuska for (int i = uio->uio_iovcnt; i > 0; iov++, i--) { 3867a7741afSMartin Matuska uintptr_t addr = (uintptr_t)(iov->iov_base + skip); 3877a7741afSMartin Matuska size_t size = iov->iov_len - skip; 3887a7741afSMartin Matuska if ((addr & (PAGE_SIZE - 1)) || 3897a7741afSMartin Matuska (size & (PAGE_SIZE - 1))) { 3907a7741afSMartin Matuska aligned = B_FALSE; 3917a7741afSMartin Matuska break; 3927a7741afSMartin Matuska } 3937a7741afSMartin Matuska skip = 0; 3947a7741afSMartin Matuska } 3957a7741afSMartin Matuska } else if (uio->uio_segflg == UIO_ITER) { 3967a7741afSMartin Matuska unsigned long alignment = 3977a7741afSMartin Matuska iov_iter_alignment(uio->uio_iter); 3987a7741afSMartin Matuska aligned = IS_P2ALIGNED(alignment, PAGE_SIZE); 3997a7741afSMartin Matuska } else { 4007a7741afSMartin Matuska /* Currently not supported */ 4017a7741afSMartin Matuska aligned = B_FALSE; 4027a7741afSMartin Matuska } 4037a7741afSMartin Matuska 4047a7741afSMartin Matuska return (aligned); 4057a7741afSMartin Matuska } 4067a7741afSMartin Matuska 4077a7741afSMartin Matuska 4087a7741afSMartin Matuska #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64) 4097a7741afSMartin Matuska #define ZFS_MARKEED_PAGE 0x0 4107a7741afSMartin Matuska #define IS_ZFS_MARKED_PAGE(_p) 0 4117a7741afSMartin Matuska #define zfs_mark_page(_p) 4127a7741afSMartin Matuska #define zfs_unmark_page(_p) 4137a7741afSMartin Matuska #define IS_ZERO_PAGE(_p) 0 4147a7741afSMartin Matuska 4157a7741afSMartin Matuska #else 4167a7741afSMartin Matuska /* 4177a7741afSMartin Matuska * Mark pages to know if they were allocated to replace ZERO_PAGE() for 4187a7741afSMartin Matuska * Direct I/O writes. 4197a7741afSMartin Matuska */ 4207a7741afSMartin Matuska #define ZFS_MARKED_PAGE 0x5a465350414745 /* ASCII: ZFSPAGE */ 4217a7741afSMartin Matuska #define IS_ZFS_MARKED_PAGE(_p) \ 4227a7741afSMartin Matuska (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE) 4237a7741afSMartin Matuska #define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0)) 4247a7741afSMartin Matuska 4257a7741afSMartin Matuska static inline void 4267a7741afSMartin Matuska zfs_mark_page(struct page *page) 4277a7741afSMartin Matuska { 4287a7741afSMartin Matuska ASSERT3P(page, !=, NULL); 4297a7741afSMartin Matuska get_page(page); 4307a7741afSMartin Matuska SetPagePrivate(page); 4317a7741afSMartin Matuska set_page_private(page, ZFS_MARKED_PAGE); 4327a7741afSMartin Matuska } 4337a7741afSMartin Matuska 4347a7741afSMartin Matuska static inline void 4357a7741afSMartin Matuska zfs_unmark_page(struct page *page) 4367a7741afSMartin Matuska { 4377a7741afSMartin Matuska ASSERT3P(page, !=, NULL); 4387a7741afSMartin Matuska set_page_private(page, 0UL); 4397a7741afSMartin Matuska ClearPagePrivate(page); 4407a7741afSMartin Matuska put_page(page); 4417a7741afSMartin Matuska } 4427a7741afSMartin Matuska #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */ 4437a7741afSMartin Matuska 444*dd215568SMartin Matuska #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) 4457a7741afSMartin Matuska static void 4467a7741afSMartin Matuska zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) 4477a7741afSMartin Matuska { 4487a7741afSMartin Matuska ASSERT3P(uio->uio_dio.pages, !=, NULL); 4497a7741afSMartin Matuska 4507a7741afSMartin Matuska for (long i = 0; i < uio->uio_dio.npages; i++) { 4517a7741afSMartin Matuska struct page *p = uio->uio_dio.pages[i]; 4527a7741afSMartin Matuska lock_page(p); 4537a7741afSMartin Matuska 4547a7741afSMartin Matuska if (IS_ZERO_PAGE(p)) { 4557a7741afSMartin Matuska /* 4567a7741afSMartin Matuska * If the user page points the kernels ZERO_PAGE() a 4577a7741afSMartin Matuska * new zero filled page will just be allocated so the 4587a7741afSMartin Matuska * contents of the page can not be changed by the user 4597a7741afSMartin Matuska * while a Direct I/O write is taking place. 4607a7741afSMartin Matuska */ 4617a7741afSMartin Matuska gfp_t gfp_zero_page = __GFP_NOWARN | GFP_NOIO | 4627a7741afSMartin Matuska __GFP_ZERO | GFP_KERNEL; 4637a7741afSMartin Matuska 4647a7741afSMartin Matuska ASSERT0(IS_ZFS_MARKED_PAGE(p)); 4657a7741afSMartin Matuska unlock_page(p); 4667a7741afSMartin Matuska put_page(p); 4677a7741afSMartin Matuska 46817aab35aSMartin Matuska uio->uio_dio.pages[i] = 46917aab35aSMartin Matuska __page_cache_alloc(gfp_zero_page); 47017aab35aSMartin Matuska zfs_mark_page(uio->uio_dio.pages[i]); 4717a7741afSMartin Matuska } else { 4727a7741afSMartin Matuska unlock_page(p); 4737a7741afSMartin Matuska } 4747a7741afSMartin Matuska } 4757a7741afSMartin Matuska } 476*dd215568SMartin Matuska #endif 4777a7741afSMartin Matuska 4787a7741afSMartin Matuska void 4797a7741afSMartin Matuska zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) 4807a7741afSMartin Matuska { 4817a7741afSMartin Matuska 4827a7741afSMartin Matuska ASSERT(uio->uio_extflg & UIO_DIRECT); 4837a7741afSMartin Matuska ASSERT3P(uio->uio_dio.pages, !=, NULL); 4847a7741afSMartin Matuska 485*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) 486*dd215568SMartin Matuska unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); 487*dd215568SMartin Matuska #else 4887a7741afSMartin Matuska for (long i = 0; i < uio->uio_dio.npages; i++) { 4897a7741afSMartin Matuska struct page *p = uio->uio_dio.pages[i]; 4907a7741afSMartin Matuska 4917a7741afSMartin Matuska if (IS_ZFS_MARKED_PAGE(p)) { 4927a7741afSMartin Matuska zfs_unmark_page(p); 4937a7741afSMartin Matuska __free_page(p); 4947a7741afSMartin Matuska continue; 4957a7741afSMartin Matuska } 4967a7741afSMartin Matuska 4977a7741afSMartin Matuska put_page(p); 4987a7741afSMartin Matuska } 499*dd215568SMartin Matuska #endif 5007a7741afSMartin Matuska vmem_free(uio->uio_dio.pages, 5017a7741afSMartin Matuska uio->uio_dio.npages * sizeof (struct page *)); 5027a7741afSMartin Matuska } 5037a7741afSMartin Matuska 504*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) 5057a7741afSMartin Matuska static int 506*dd215568SMartin Matuska zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) 5077a7741afSMartin Matuska { 508*dd215568SMartin Matuska long res; 509*dd215568SMartin Matuska size_t skip = uio->uio_skip; 510*dd215568SMartin Matuska size_t len = uio->uio_resid - skip; 511*dd215568SMartin Matuska unsigned int gup_flags = 0; 512*dd215568SMartin Matuska unsigned long addr; 513*dd215568SMartin Matuska unsigned long nr_pages; 5147a7741afSMartin Matuska 5157a7741afSMartin Matuska /* 516*dd215568SMartin Matuska * Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could 517*dd215568SMartin Matuska * possibly be used here in the future to allow for P2P operations with 518*dd215568SMartin Matuska * user pages. 5197a7741afSMartin Matuska */ 520*dd215568SMartin Matuska if (rw == UIO_READ) 521*dd215568SMartin Matuska gup_flags = FOLL_WRITE; 522*dd215568SMartin Matuska 523*dd215568SMartin Matuska if (len == 0) 524*dd215568SMartin Matuska return (0); 525*dd215568SMartin Matuska 526*dd215568SMartin Matuska #if defined(HAVE_ITER_IS_UBUF) 527*dd215568SMartin Matuska if (iter_is_ubuf(uio->uio_iter)) { 528*dd215568SMartin Matuska nr_pages = DIV_ROUND_UP(len, PAGE_SIZE); 529*dd215568SMartin Matuska addr = (unsigned long)uio->uio_iter->ubuf + skip; 530*dd215568SMartin Matuska res = pin_user_pages_unlocked(addr, nr_pages, 531*dd215568SMartin Matuska &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags); 5327a7741afSMartin Matuska if (res < 0) { 5337a7741afSMartin Matuska return (SET_ERROR(-res)); 5347a7741afSMartin Matuska } else if (len != (res * PAGE_SIZE)) { 535*dd215568SMartin Matuska uio->uio_dio.npages += res; 5367a7741afSMartin Matuska return (SET_ERROR(EFAULT)); 5377a7741afSMartin Matuska } 538*dd215568SMartin Matuska uio->uio_dio.npages += res; 5397a7741afSMartin Matuska return (0); 5407a7741afSMartin Matuska } 541*dd215568SMartin Matuska #endif 542*dd215568SMartin Matuska const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter); 5437a7741afSMartin Matuska for (int i = 0; i < uio->uio_iovcnt; i++) { 544*dd215568SMartin Matuska size_t amt = iovp->iov_len - skip; 545*dd215568SMartin Matuska if (amt == 0) { 5467a7741afSMartin Matuska iovp++; 5477a7741afSMartin Matuska skip = 0; 5487a7741afSMartin Matuska continue; 5497a7741afSMartin Matuska } 5507a7741afSMartin Matuska 551*dd215568SMartin Matuska addr = (unsigned long)iovp->iov_base + skip; 552*dd215568SMartin Matuska nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE); 553*dd215568SMartin Matuska res = pin_user_pages_unlocked(addr, nr_pages, 554*dd215568SMartin Matuska &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags); 555*dd215568SMartin Matuska if (res < 0) { 556*dd215568SMartin Matuska return (SET_ERROR(-res)); 557*dd215568SMartin Matuska } else if (amt != (res * PAGE_SIZE)) { 558*dd215568SMartin Matuska uio->uio_dio.npages += res; 559*dd215568SMartin Matuska return (SET_ERROR(EFAULT)); 560*dd215568SMartin Matuska } 5617a7741afSMartin Matuska 562*dd215568SMartin Matuska len -= amt; 563*dd215568SMartin Matuska uio->uio_dio.npages += res; 5647a7741afSMartin Matuska skip = 0; 5657a7741afSMartin Matuska iovp++; 566*dd215568SMartin Matuska }; 5677a7741afSMartin Matuska 5687a7741afSMartin Matuska ASSERT0(len); 5697a7741afSMartin Matuska 5707a7741afSMartin Matuska return (0); 5717a7741afSMartin Matuska } 5727a7741afSMartin Matuska 573*dd215568SMartin Matuska #else 5747a7741afSMartin Matuska static int 5757a7741afSMartin Matuska zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) 5767a7741afSMartin Matuska { 577*dd215568SMartin Matuska size_t start; 5787a7741afSMartin Matuska size_t wanted = uio->uio_resid - uio->uio_skip; 5797a7741afSMartin Matuska ssize_t rollback = 0; 5807a7741afSMartin Matuska ssize_t cnt; 5817a7741afSMartin Matuska unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE); 5827a7741afSMartin Matuska 5837a7741afSMartin Matuska while (wanted) { 5847a7741afSMartin Matuska cnt = iov_iter_get_pages(uio->uio_iter, 5857a7741afSMartin Matuska &uio->uio_dio.pages[uio->uio_dio.npages], 586*dd215568SMartin Matuska wanted, maxpages, &start); 5877a7741afSMartin Matuska if (cnt < 0) { 5887a7741afSMartin Matuska iov_iter_revert(uio->uio_iter, rollback); 5897a7741afSMartin Matuska return (SET_ERROR(-cnt)); 5907a7741afSMartin Matuska } 591*dd215568SMartin Matuska /* 592*dd215568SMartin Matuska * All Direct I/O operations must be page aligned. 593*dd215568SMartin Matuska */ 594*dd215568SMartin Matuska ASSERT(IS_P2ALIGNED(start, PAGE_SIZE)); 5957a7741afSMartin Matuska uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE); 5967a7741afSMartin Matuska rollback += cnt; 5977a7741afSMartin Matuska wanted -= cnt; 5987a7741afSMartin Matuska iov_iter_advance(uio->uio_iter, cnt); 5997a7741afSMartin Matuska 6007a7741afSMartin Matuska } 6017a7741afSMartin Matuska ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip); 6027a7741afSMartin Matuska iov_iter_revert(uio->uio_iter, rollback); 6037a7741afSMartin Matuska 6047a7741afSMartin Matuska return (0); 6057a7741afSMartin Matuska } 606*dd215568SMartin Matuska #endif /* HAVE_PIN_USER_PAGES_UNLOCKED */ 6077a7741afSMartin Matuska 6087a7741afSMartin Matuska /* 6097a7741afSMartin Matuska * This function pins user pages. In the event that the user pages were not 6107a7741afSMartin Matuska * successfully pinned an error value is returned. 6117a7741afSMartin Matuska * 6127a7741afSMartin Matuska * On success, 0 is returned. 6137a7741afSMartin Matuska */ 6147a7741afSMartin Matuska int 6157a7741afSMartin Matuska zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) 6167a7741afSMartin Matuska { 6177a7741afSMartin Matuska int error = 0; 6187a7741afSMartin Matuska long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE); 6197a7741afSMartin Matuska size_t size = npages * sizeof (struct page *); 6207a7741afSMartin Matuska 621*dd215568SMartin Matuska if (uio->uio_segflg == UIO_ITER) { 6227a7741afSMartin Matuska uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); 623*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) 624*dd215568SMartin Matuska error = zfs_uio_pin_user_pages(uio, rw); 625*dd215568SMartin Matuska #else 6267a7741afSMartin Matuska error = zfs_uio_get_dio_pages_iov_iter(uio, rw); 6277a7741afSMartin Matuska #endif 6287a7741afSMartin Matuska } else { 6297a7741afSMartin Matuska return (SET_ERROR(EOPNOTSUPP)); 6307a7741afSMartin Matuska } 6317a7741afSMartin Matuska 6327a7741afSMartin Matuska ASSERT3S(uio->uio_dio.npages, >=, 0); 6337a7741afSMartin Matuska 6347a7741afSMartin Matuska if (error) { 635*dd215568SMartin Matuska #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) 636*dd215568SMartin Matuska unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); 637*dd215568SMartin Matuska #else 6387a7741afSMartin Matuska for (long i = 0; i < uio->uio_dio.npages; i++) 6397a7741afSMartin Matuska put_page(uio->uio_dio.pages[i]); 640*dd215568SMartin Matuska #endif 6417a7741afSMartin Matuska vmem_free(uio->uio_dio.pages, size); 6427a7741afSMartin Matuska return (error); 6437a7741afSMartin Matuska } else { 6447a7741afSMartin Matuska ASSERT3S(uio->uio_dio.npages, ==, npages); 6457a7741afSMartin Matuska } 6467a7741afSMartin Matuska 647*dd215568SMartin Matuska #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) 648*dd215568SMartin Matuska if (rw == UIO_WRITE) 6497a7741afSMartin Matuska zfs_uio_dio_check_for_zero_page(uio); 650*dd215568SMartin Matuska #endif 6517a7741afSMartin Matuska 6527a7741afSMartin Matuska uio->uio_extflg |= UIO_DIRECT; 6537a7741afSMartin Matuska 6547a7741afSMartin Matuska return (0); 6557a7741afSMartin Matuska } 6567a7741afSMartin Matuska 657184c1b94SMartin Matuska #endif /* _KERNEL */ 658