1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23eda14cbcSMatt Macy * Use is subject to license terms. 24eda14cbcSMatt Macy */ 25eda14cbcSMatt Macy 26eda14cbcSMatt Macy /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27eda14cbcSMatt Macy /* All Rights Reserved */ 28eda14cbcSMatt Macy 29eda14cbcSMatt Macy /* 30eda14cbcSMatt Macy * University Copyright- Copyright (c) 1982, 1986, 1988 31eda14cbcSMatt Macy * The Regents of the University of California 32eda14cbcSMatt Macy * All Rights Reserved 33eda14cbcSMatt Macy * 34eda14cbcSMatt Macy * University Acknowledgment- Portions of this document are derived from 35eda14cbcSMatt Macy * software developed by the University of California, Berkeley, and its 36eda14cbcSMatt Macy * contributors. 37eda14cbcSMatt Macy */ 38eda14cbcSMatt Macy 39eda14cbcSMatt Macy /* 40eda14cbcSMatt Macy * $FreeBSD$ 41eda14cbcSMatt Macy */ 42eda14cbcSMatt Macy 43eda14cbcSMatt Macy #include <sys/param.h> 44ba27dd8bSMartin Matuska #include <sys/uio_impl.h> 45eda14cbcSMatt Macy #include <sys/vnode.h> 46184c1b94SMartin Matuska #include <sys/zfs_znode.h> 47*7a7741afSMartin Matuska #include <sys/byteorder.h> 48*7a7741afSMartin Matuska #include <sys/lock.h> 49*7a7741afSMartin Matuska #include <sys/vm.h> 50*7a7741afSMartin Matuska #include <vm/vm_map.h> 51eda14cbcSMatt Macy 5275e1fea6SMartin Matuska static void 5375e1fea6SMartin Matuska zfs_freeuio(struct uio *uio) 5475e1fea6SMartin Matuska { 5575e1fea6SMartin Matuska #if __FreeBSD_version > 1500013 5675e1fea6SMartin Matuska freeuio(uio); 5775e1fea6SMartin Matuska #else 5875e1fea6SMartin Matuska free(uio, M_IOV); 5975e1fea6SMartin Matuska #endif 6075e1fea6SMartin Matuska } 6175e1fea6SMartin Matuska 62ba27dd8bSMartin Matuska int 63ba27dd8bSMartin Matuska zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) 64ba27dd8bSMartin Matuska { 6516038816SMartin Matuska ASSERT3U(zfs_uio_rw(uio), ==, dir); 66ba27dd8bSMartin Matuska return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio))); 67ba27dd8bSMartin Matuska } 68ba27dd8bSMartin Matuska 69eda14cbcSMatt Macy /* 70184c1b94SMartin Matuska * same as zfs_uiomove() but doesn't modify uio structure. 71eda14cbcSMatt Macy * return in cbytes how many bytes were copied. 72eda14cbcSMatt Macy */ 73eda14cbcSMatt Macy int 74184c1b94SMartin Matuska zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) 75eda14cbcSMatt Macy { 76eda14cbcSMatt Macy struct iovec small_iovec[1]; 77eda14cbcSMatt Macy struct uio small_uio_clone; 78eda14cbcSMatt Macy struct uio *uio_clone; 79eda14cbcSMatt Macy int error; 80eda14cbcSMatt Macy 81184c1b94SMartin Matuska ASSERT3U(zfs_uio_rw(uio), ==, rw); 82184c1b94SMartin Matuska if (zfs_uio_iovcnt(uio) == 1) { 83184c1b94SMartin Matuska small_uio_clone = *(GET_UIO_STRUCT(uio)); 84184c1b94SMartin Matuska small_iovec[0] = *(GET_UIO_STRUCT(uio)->uio_iov); 85eda14cbcSMatt Macy small_uio_clone.uio_iov = small_iovec; 86eda14cbcSMatt Macy uio_clone = &small_uio_clone; 87eda14cbcSMatt Macy } else { 88184c1b94SMartin Matuska uio_clone = cloneuio(GET_UIO_STRUCT(uio)); 89eda14cbcSMatt Macy } 90eda14cbcSMatt Macy 91eda14cbcSMatt Macy error = vn_io_fault_uiomove(p, n, uio_clone); 92184c1b94SMartin Matuska *cbytes = zfs_uio_resid(uio) - uio_clone->uio_resid; 93eda14cbcSMatt Macy if (uio_clone != &small_uio_clone) 9475e1fea6SMartin Matuska zfs_freeuio(uio_clone); 95eda14cbcSMatt Macy return (error); 96eda14cbcSMatt Macy } 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy /* 99eda14cbcSMatt Macy * Drop the next n chars out of *uiop. 100eda14cbcSMatt Macy */ 101eda14cbcSMatt Macy void 102184c1b94SMartin Matuska zfs_uioskip(zfs_uio_t *uio, size_t n) 103eda14cbcSMatt Macy { 104184c1b94SMartin Matuska zfs_uio_seg_t segflg; 105eda14cbcSMatt Macy 106eda14cbcSMatt Macy /* For the full compatibility with illumos. */ 107184c1b94SMartin Matuska if (n > zfs_uio_resid(uio)) 108eda14cbcSMatt Macy return; 109eda14cbcSMatt Macy 110184c1b94SMartin Matuska segflg = zfs_uio_segflg(uio); 111184c1b94SMartin Matuska zfs_uio_segflg(uio) = UIO_NOCOPY; 112184c1b94SMartin Matuska zfs_uiomove(NULL, n, zfs_uio_rw(uio), uio); 113184c1b94SMartin Matuska zfs_uio_segflg(uio) = segflg; 114184c1b94SMartin Matuska } 115184c1b94SMartin Matuska 116184c1b94SMartin Matuska int 117184c1b94SMartin Matuska zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) 118184c1b94SMartin Matuska { 11916038816SMartin Matuska ASSERT3U(zfs_uio_rw(uio), ==, dir); 120184c1b94SMartin Matuska return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio))); 121eda14cbcSMatt Macy } 122*7a7741afSMartin Matuska 123*7a7741afSMartin Matuska /* 124*7a7741afSMartin Matuska * Check if the uio is page-aligned in memory. 125*7a7741afSMartin Matuska */ 126*7a7741afSMartin Matuska boolean_t 127*7a7741afSMartin Matuska zfs_uio_page_aligned(zfs_uio_t *uio) 128*7a7741afSMartin Matuska { 129*7a7741afSMartin Matuska const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov; 130*7a7741afSMartin Matuska 131*7a7741afSMartin Matuska for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) { 132*7a7741afSMartin Matuska uintptr_t addr = (uintptr_t)iov->iov_base; 133*7a7741afSMartin Matuska size_t size = iov->iov_len; 134*7a7741afSMartin Matuska if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 135*7a7741afSMartin Matuska return (B_FALSE); 136*7a7741afSMartin Matuska } 137*7a7741afSMartin Matuska } 138*7a7741afSMartin Matuska 139*7a7741afSMartin Matuska return (B_TRUE); 140*7a7741afSMartin Matuska } 141*7a7741afSMartin Matuska 142*7a7741afSMartin Matuska static void 143*7a7741afSMartin Matuska zfs_uio_set_pages_to_stable(zfs_uio_t *uio) 144*7a7741afSMartin Matuska { 145*7a7741afSMartin Matuska ASSERT3P(uio->uio_dio.pages, !=, NULL); 146*7a7741afSMartin Matuska ASSERT3S(uio->uio_dio.npages, >, 0); 147*7a7741afSMartin Matuska 148*7a7741afSMartin Matuska for (int i = 0; i < uio->uio_dio.npages; i++) { 149*7a7741afSMartin Matuska vm_page_t page = uio->uio_dio.pages[i]; 150*7a7741afSMartin Matuska ASSERT3P(page, !=, NULL); 151*7a7741afSMartin Matuska 152*7a7741afSMartin Matuska MPASS(page == PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(page))); 153*7a7741afSMartin Matuska vm_page_busy_acquire(page, VM_ALLOC_SBUSY); 154*7a7741afSMartin Matuska pmap_remove_write(page); 155*7a7741afSMartin Matuska } 156*7a7741afSMartin Matuska } 157*7a7741afSMartin Matuska 158*7a7741afSMartin Matuska static void 159*7a7741afSMartin Matuska zfs_uio_release_stable_pages(zfs_uio_t *uio) 160*7a7741afSMartin Matuska { 161*7a7741afSMartin Matuska ASSERT3P(uio->uio_dio.pages, !=, NULL); 162*7a7741afSMartin Matuska for (int i = 0; i < uio->uio_dio.npages; i++) { 163*7a7741afSMartin Matuska vm_page_t page = uio->uio_dio.pages[i]; 164*7a7741afSMartin Matuska 165*7a7741afSMartin Matuska ASSERT3P(page, !=, NULL); 166*7a7741afSMartin Matuska vm_page_sunbusy(page); 167*7a7741afSMartin Matuska } 168*7a7741afSMartin Matuska } 169*7a7741afSMartin Matuska 170*7a7741afSMartin Matuska /* 171*7a7741afSMartin Matuska * If the operation is marked as read, then we are stating the pages will be 172*7a7741afSMartin Matuska * written to and must be given write access. 173*7a7741afSMartin Matuska */ 174*7a7741afSMartin Matuska static int 175*7a7741afSMartin Matuska zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages, 176*7a7741afSMartin Matuska zfs_uio_rw_t rw, vm_page_t *pages) 177*7a7741afSMartin Matuska { 178*7a7741afSMartin Matuska vm_map_t map; 179*7a7741afSMartin Matuska vm_prot_t prot; 180*7a7741afSMartin Matuska int count; 181*7a7741afSMartin Matuska 182*7a7741afSMartin Matuska map = &curthread->td_proc->p_vmspace->vm_map; 183*7a7741afSMartin Matuska ASSERT3S(len, >, 0); 184*7a7741afSMartin Matuska 185*7a7741afSMartin Matuska prot = rw == UIO_READ ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; 186*7a7741afSMartin Matuska count = vm_fault_quick_hold_pages(map, start, len, prot, pages, 187*7a7741afSMartin Matuska nr_pages); 188*7a7741afSMartin Matuska 189*7a7741afSMartin Matuska return (count); 190*7a7741afSMartin Matuska } 191*7a7741afSMartin Matuska 192*7a7741afSMartin Matuska void 193*7a7741afSMartin Matuska zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) 194*7a7741afSMartin Matuska { 195*7a7741afSMartin Matuska ASSERT(uio->uio_extflg & UIO_DIRECT); 196*7a7741afSMartin Matuska ASSERT3P(uio->uio_dio.pages, !=, NULL); 197*7a7741afSMartin Matuska ASSERT(zfs_uio_rw(uio) == rw); 198*7a7741afSMartin Matuska 199*7a7741afSMartin Matuska if (rw == UIO_WRITE) 200*7a7741afSMartin Matuska zfs_uio_release_stable_pages(uio); 201*7a7741afSMartin Matuska 202*7a7741afSMartin Matuska vm_page_unhold_pages(&uio->uio_dio.pages[0], 203*7a7741afSMartin Matuska uio->uio_dio.npages); 204*7a7741afSMartin Matuska 205*7a7741afSMartin Matuska kmem_free(uio->uio_dio.pages, 206*7a7741afSMartin Matuska uio->uio_dio.npages * sizeof (vm_page_t)); 207*7a7741afSMartin Matuska } 208*7a7741afSMartin Matuska 209*7a7741afSMartin Matuska static int 210*7a7741afSMartin Matuska zfs_uio_get_user_pages(unsigned long start, int nr_pages, 211*7a7741afSMartin Matuska size_t len, zfs_uio_rw_t rw, vm_page_t *pages) 212*7a7741afSMartin Matuska { 213*7a7741afSMartin Matuska int count; 214*7a7741afSMartin Matuska 215*7a7741afSMartin Matuska count = zfs_uio_hold_pages(start, len, nr_pages, rw, pages); 216*7a7741afSMartin Matuska 217*7a7741afSMartin Matuska if (count != nr_pages) { 218*7a7741afSMartin Matuska if (count > 0) 219*7a7741afSMartin Matuska vm_page_unhold_pages(pages, count); 220*7a7741afSMartin Matuska return (0); 221*7a7741afSMartin Matuska } 222*7a7741afSMartin Matuska 223*7a7741afSMartin Matuska ASSERT3S(count, ==, nr_pages); 224*7a7741afSMartin Matuska 225*7a7741afSMartin Matuska return (count); 226*7a7741afSMartin Matuska } 227*7a7741afSMartin Matuska 228*7a7741afSMartin Matuska static int 229*7a7741afSMartin Matuska zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages) 230*7a7741afSMartin Matuska { 231*7a7741afSMartin Matuska unsigned long addr = (unsigned long)(v.iov_base); 232*7a7741afSMartin Matuska size_t len = v.iov_len; 233*7a7741afSMartin Matuska int n = DIV_ROUND_UP(len, PAGE_SIZE); 234*7a7741afSMartin Matuska 235*7a7741afSMartin Matuska int res = zfs_uio_get_user_pages( 236*7a7741afSMartin Matuska P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len, 237*7a7741afSMartin Matuska zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]); 238*7a7741afSMartin Matuska 239*7a7741afSMartin Matuska if (res != n) 240*7a7741afSMartin Matuska return (SET_ERROR(EFAULT)); 241*7a7741afSMartin Matuska 242*7a7741afSMartin Matuska ASSERT3U(len, ==, res * PAGE_SIZE); 243*7a7741afSMartin Matuska *numpages = res; 244*7a7741afSMartin Matuska return (0); 245*7a7741afSMartin Matuska } 246*7a7741afSMartin Matuska 247*7a7741afSMartin Matuska static int 248*7a7741afSMartin Matuska zfs_uio_get_dio_pages_impl(zfs_uio_t *uio) 249*7a7741afSMartin Matuska { 250*7a7741afSMartin Matuska const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov; 251*7a7741afSMartin Matuska size_t len = zfs_uio_resid(uio); 252*7a7741afSMartin Matuska 253*7a7741afSMartin Matuska for (int i = 0; i < zfs_uio_iovcnt(uio); i++) { 254*7a7741afSMartin Matuska struct iovec iov; 255*7a7741afSMartin Matuska int numpages = 0; 256*7a7741afSMartin Matuska 257*7a7741afSMartin Matuska if (iovp->iov_len == 0) { 258*7a7741afSMartin Matuska iovp++; 259*7a7741afSMartin Matuska continue; 260*7a7741afSMartin Matuska } 261*7a7741afSMartin Matuska iov.iov_len = MIN(len, iovp->iov_len); 262*7a7741afSMartin Matuska iov.iov_base = iovp->iov_base; 263*7a7741afSMartin Matuska int error = zfs_uio_iov_step(iov, uio, &numpages); 264*7a7741afSMartin Matuska 265*7a7741afSMartin Matuska if (error) 266*7a7741afSMartin Matuska return (error); 267*7a7741afSMartin Matuska 268*7a7741afSMartin Matuska uio->uio_dio.npages += numpages; 269*7a7741afSMartin Matuska len -= iov.iov_len; 270*7a7741afSMartin Matuska iovp++; 271*7a7741afSMartin Matuska } 272*7a7741afSMartin Matuska 273*7a7741afSMartin Matuska ASSERT0(len); 274*7a7741afSMartin Matuska 275*7a7741afSMartin Matuska return (0); 276*7a7741afSMartin Matuska } 277*7a7741afSMartin Matuska 278*7a7741afSMartin Matuska /* 279*7a7741afSMartin Matuska * This function holds user pages into the kernel. In the event that the user 280*7a7741afSMartin Matuska * pages are not successfully held an error value is returned. 281*7a7741afSMartin Matuska * 282*7a7741afSMartin Matuska * On success, 0 is returned. 283*7a7741afSMartin Matuska */ 284*7a7741afSMartin Matuska int 285*7a7741afSMartin Matuska zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) 286*7a7741afSMartin Matuska { 287*7a7741afSMartin Matuska int error = 0; 288*7a7741afSMartin Matuska int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE); 289*7a7741afSMartin Matuska size_t size = npages * sizeof (vm_page_t); 290*7a7741afSMartin Matuska 291*7a7741afSMartin Matuska ASSERT(zfs_uio_rw(uio) == rw); 292*7a7741afSMartin Matuska 293*7a7741afSMartin Matuska uio->uio_dio.pages = kmem_alloc(size, KM_SLEEP); 294*7a7741afSMartin Matuska 295*7a7741afSMartin Matuska error = zfs_uio_get_dio_pages_impl(uio); 296*7a7741afSMartin Matuska 297*7a7741afSMartin Matuska if (error) { 298*7a7741afSMartin Matuska vm_page_unhold_pages(&uio->uio_dio.pages[0], 299*7a7741afSMartin Matuska uio->uio_dio.npages); 300*7a7741afSMartin Matuska kmem_free(uio->uio_dio.pages, size); 301*7a7741afSMartin Matuska return (error); 302*7a7741afSMartin Matuska } 303*7a7741afSMartin Matuska 304*7a7741afSMartin Matuska ASSERT3S(uio->uio_dio.npages, >, 0); 305*7a7741afSMartin Matuska 306*7a7741afSMartin Matuska /* 307*7a7741afSMartin Matuska * Since we will be writing the user pages we must make sure that 308*7a7741afSMartin Matuska * they are stable. That way the contents of the pages can not change 309*7a7741afSMartin Matuska * while we are doing: compression, checksumming, encryption, parity 310*7a7741afSMartin Matuska * calculations or deduplication. 311*7a7741afSMartin Matuska */ 312*7a7741afSMartin Matuska if (zfs_uio_rw(uio) == UIO_WRITE) 313*7a7741afSMartin Matuska zfs_uio_set_pages_to_stable(uio); 314*7a7741afSMartin Matuska 315*7a7741afSMartin Matuska uio->uio_extflg |= UIO_DIRECT; 316*7a7741afSMartin Matuska 317*7a7741afSMartin Matuska return (0); 318*7a7741afSMartin Matuska } 319