1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 225329Sgw25295 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/zfs_context.h> 29789Sahrens #include <sys/spa.h> 30789Sahrens #include <sys/vdev_file.h> 31789Sahrens #include <sys/vdev_impl.h> 32789Sahrens #include <sys/zio.h> 33789Sahrens #include <sys/fs/zfs.h> 34789Sahrens 35789Sahrens /* 36789Sahrens * Virtual device vector for files. 37789Sahrens */ 38789Sahrens 39789Sahrens static int 405329Sgw25295 vdev_file_open_common(vdev_t *vd) 41789Sahrens { 42789Sahrens vdev_file_t *vf; 43789Sahrens vnode_t *vp; 44789Sahrens int error; 45789Sahrens 46789Sahrens /* 47789Sahrens * We must have a pathname, and it must be absolute. 48789Sahrens */ 49789Sahrens if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 50789Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 51789Sahrens return (EINVAL); 52789Sahrens } 53789Sahrens 54789Sahrens vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); 55789Sahrens 56789Sahrens /* 57789Sahrens * We always open the files from the root of the global zone, even if 58789Sahrens * we're in a local zone. If the user has gotten to this point, the 59789Sahrens * administrator has already decided that the pool should be available 60789Sahrens * to local zone users, so the underlying devices should be as well. 61789Sahrens */ 62789Sahrens ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); 635329Sgw25295 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, 64*5331Samw spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); 65789Sahrens 66789Sahrens if (error) { 67789Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 68789Sahrens return (error); 69789Sahrens } 70789Sahrens 71789Sahrens vf->vf_vnode = vp; 72789Sahrens 73789Sahrens #ifdef _KERNEL 74789Sahrens /* 75789Sahrens * Make sure it's a regular file. 76789Sahrens */ 77789Sahrens if (vp->v_type != VREG) { 78789Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 79789Sahrens return (ENODEV); 80789Sahrens } 81789Sahrens #endif 82789Sahrens 835329Sgw25295 return (0); 845329Sgw25295 } 855329Sgw25295 865329Sgw25295 static int 875329Sgw25295 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) 885329Sgw25295 { 895329Sgw25295 vdev_file_t *vf; 905329Sgw25295 vattr_t vattr; 915329Sgw25295 int error; 925329Sgw25295 935329Sgw25295 if ((error = vdev_file_open_common(vd)) != 0) 945329Sgw25295 return (error); 955329Sgw25295 965329Sgw25295 vf = vd->vdev_tsd; 975329Sgw25295 98789Sahrens /* 99789Sahrens * Determine the physical size of the file. 100789Sahrens */ 101789Sahrens vattr.va_mask = AT_SIZE; 102*5331Samw error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); 103789Sahrens if (error) { 104789Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 105789Sahrens return (error); 106789Sahrens } 107789Sahrens 108789Sahrens *psize = vattr.va_size; 109789Sahrens *ashift = SPA_MINBLOCKSHIFT; 110789Sahrens 111789Sahrens return (0); 112789Sahrens } 113789Sahrens 114789Sahrens static void 115789Sahrens vdev_file_close(vdev_t *vd) 116789Sahrens { 117789Sahrens vdev_file_t *vf = vd->vdev_tsd; 118789Sahrens 119789Sahrens if (vf == NULL) 120789Sahrens return; 121789Sahrens 122789Sahrens if (vf->vf_vnode != NULL) { 123*5331Samw (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); 124*5331Samw (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL); 125789Sahrens VN_RELE(vf->vf_vnode); 126789Sahrens } 127789Sahrens 128789Sahrens kmem_free(vf, sizeof (vdev_file_t)); 129789Sahrens vd->vdev_tsd = NULL; 130789Sahrens } 131789Sahrens 1325329Sgw25295 static int 1335329Sgw25295 vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, 1345329Sgw25295 enum uio_rw rw) 1355329Sgw25295 { 1365329Sgw25295 vdev_file_t *vf = vd->vdev_tsd; 1375329Sgw25295 ssize_t resid; 1385329Sgw25295 int error = 0; 1395329Sgw25295 1405329Sgw25295 if (vd == NULL || vf == NULL || vf->vf_vnode == NULL) 1415329Sgw25295 return (EINVAL); 1425329Sgw25295 1435329Sgw25295 ASSERT(rw == UIO_READ || rw == UIO_WRITE); 1445329Sgw25295 1455329Sgw25295 error = vn_rdwr(rw, vf->vf_vnode, data, size, offset, UIO_SYSSPACE, 1465329Sgw25295 0, RLIM64_INFINITY, kcred, &resid); 1475329Sgw25295 if (error || resid != 0) 1485329Sgw25295 return (EIO); 1495329Sgw25295 return (0); 1505329Sgw25295 } 1515329Sgw25295 1525329Sgw25295 static int 1535329Sgw25295 vdev_file_probe(vdev_t *vd) 1545329Sgw25295 { 1555329Sgw25295 vdev_t *nvd; 1565329Sgw25295 char *vl_boot; 1575329Sgw25295 uint64_t offset; 1585329Sgw25295 int l, error = 0, retries = 0; 1595329Sgw25295 1605329Sgw25295 if (vd == NULL) 1615329Sgw25295 return (EINVAL); 1625329Sgw25295 1635329Sgw25295 /* Hijack the current vdev */ 1645329Sgw25295 nvd = vd; 1655329Sgw25295 1665329Sgw25295 /* 1675329Sgw25295 * Pick a random label to rewrite. 1685329Sgw25295 */ 1695329Sgw25295 l = spa_get_random(VDEV_LABELS); 1705329Sgw25295 ASSERT(l < VDEV_LABELS); 1715329Sgw25295 1725329Sgw25295 offset = vdev_label_offset(vd->vdev_psize, l, 1735329Sgw25295 offsetof(vdev_label_t, vl_boot_header)); 1745329Sgw25295 1755329Sgw25295 vl_boot = kmem_alloc(VDEV_BOOT_HEADER_SIZE, KM_SLEEP); 1765329Sgw25295 1775329Sgw25295 while ((error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, 1785329Sgw25295 offset, UIO_READ)) != 0 && retries == 0) { 1795329Sgw25295 1805329Sgw25295 /* 1815329Sgw25295 * If we failed with the vdev that was passed in then 1825329Sgw25295 * try allocating a new one and try again. 1835329Sgw25295 */ 1845329Sgw25295 nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); 1855329Sgw25295 if (vd->vdev_path) 1865329Sgw25295 nvd->vdev_path = spa_strdup(vd->vdev_path); 1875329Sgw25295 error = vdev_file_open_common(nvd); 1885329Sgw25295 if (error) { 1895329Sgw25295 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1905329Sgw25295 nvd->vdev_stat.vs_aux); 1915329Sgw25295 break; 1925329Sgw25295 } 1935329Sgw25295 retries++; 1945329Sgw25295 } 1955329Sgw25295 1965329Sgw25295 if ((spa_mode & FWRITE) && !error) { 1975329Sgw25295 error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, 1985329Sgw25295 offset, UIO_WRITE); 1995329Sgw25295 } 2005329Sgw25295 2015329Sgw25295 if (retries) { 2025329Sgw25295 vdev_file_close(nvd); 2035329Sgw25295 if (nvd->vdev_path) 2045329Sgw25295 spa_strfree(nvd->vdev_path); 2055329Sgw25295 kmem_free(nvd, sizeof (vdev_t)); 2065329Sgw25295 } 2075329Sgw25295 kmem_free(vl_boot, VDEV_BOOT_HEADER_SIZE); 2085329Sgw25295 2095329Sgw25295 if (!error) 2105329Sgw25295 vd->vdev_is_failing = B_FALSE; 2115329Sgw25295 2125329Sgw25295 return (error); 2135329Sgw25295 } 2145329Sgw25295 215789Sahrens static void 216789Sahrens vdev_file_io_start(zio_t *zio) 217789Sahrens { 218789Sahrens vdev_t *vd = zio->io_vd; 219789Sahrens vdev_file_t *vf = vd->vdev_tsd; 220789Sahrens ssize_t resid; 221789Sahrens int error; 222789Sahrens 223789Sahrens if (zio->io_type == ZIO_TYPE_IOCTL) { 224789Sahrens zio_vdev_io_bypass(zio); 225789Sahrens 226789Sahrens /* XXPOLICY */ 2275329Sgw25295 if (!vdev_readable(vd)) { 228789Sahrens zio->io_error = ENXIO; 229789Sahrens zio_next_stage_async(zio); 230789Sahrens return; 231789Sahrens } 232789Sahrens 233789Sahrens switch (zio->io_cmd) { 234789Sahrens case DKIOCFLUSHWRITECACHE: 235789Sahrens zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, 236*5331Samw kcred, NULL); 237789Sahrens dprintf("fsync(%s) = %d\n", vdev_description(vd), 238789Sahrens zio->io_error); 239789Sahrens break; 240789Sahrens default: 241789Sahrens zio->io_error = ENOTSUP; 242789Sahrens } 243789Sahrens 244789Sahrens zio_next_stage_async(zio); 245789Sahrens return; 246789Sahrens } 247789Sahrens 2483059Sahrens /* 2493059Sahrens * In the kernel, don't bother double-caching, but in userland, 2503059Sahrens * we want to test the vdev_cache code. 2513059Sahrens */ 2523059Sahrens #ifndef _KERNEL 253789Sahrens if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) 254789Sahrens return; 2553059Sahrens #endif 256789Sahrens 257789Sahrens if ((zio = vdev_queue_io(zio)) == NULL) 258789Sahrens return; 259789Sahrens 260789Sahrens /* XXPOLICY */ 2615329Sgw25295 if (zio->io_type == ZIO_TYPE_WRITE) 2625329Sgw25295 error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO; 2635329Sgw25295 else 2645329Sgw25295 error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO; 2655329Sgw25295 error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error; 266789Sahrens if (error) { 267789Sahrens zio->io_error = error; 268789Sahrens zio_next_stage_async(zio); 269789Sahrens return; 270789Sahrens } 271789Sahrens 272789Sahrens zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? 273789Sahrens UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, 274789Sahrens zio->io_size, zio->io_offset, UIO_SYSSPACE, 275789Sahrens 0, RLIM64_INFINITY, kcred, &resid); 276789Sahrens 277789Sahrens if (resid != 0 && zio->io_error == 0) 278789Sahrens zio->io_error = ENOSPC; 279789Sahrens 280789Sahrens zio_next_stage_async(zio); 281789Sahrens } 282789Sahrens 283789Sahrens static void 284789Sahrens vdev_file_io_done(zio_t *zio) 285789Sahrens { 2865329Sgw25295 2875329Sgw25295 if (zio_injection_enabled && zio->io_error == 0) 2885329Sgw25295 zio->io_error = zio_handle_device_injection(zio->io_vd, EIO); 2895329Sgw25295 2905329Sgw25295 /* 2915329Sgw25295 * If this device is truely gone, then attempt to remove it 2925329Sgw25295 * from the configuration. 2935329Sgw25295 */ 2945329Sgw25295 if (zio->io_error == EIO) { 2955329Sgw25295 vdev_t *vd = zio->io_vd; 2965329Sgw25295 2975329Sgw25295 if (vdev_probe(vd) != 0) 2985329Sgw25295 vd->vdev_is_failing = B_TRUE; 2995329Sgw25295 } 3005329Sgw25295 301789Sahrens vdev_queue_io_done(zio); 302789Sahrens 3033059Sahrens #ifndef _KERNEL 304789Sahrens if (zio->io_type == ZIO_TYPE_WRITE) 305789Sahrens vdev_cache_write(zio); 3063059Sahrens #endif 307789Sahrens 308789Sahrens zio_next_stage(zio); 309789Sahrens } 310789Sahrens 311789Sahrens vdev_ops_t vdev_file_ops = { 312789Sahrens vdev_file_open, 313789Sahrens vdev_file_close, 3145329Sgw25295 vdev_file_probe, 315789Sahrens vdev_default_asize, 316789Sahrens vdev_file_io_start, 317789Sahrens vdev_file_io_done, 318789Sahrens NULL, 319789Sahrens VDEV_TYPE_FILE, /* name of this vdev type */ 320789Sahrens B_TRUE /* leaf vdev */ 321789Sahrens }; 322789Sahrens 323789Sahrens /* 324789Sahrens * From userland we access disks just like files. 325789Sahrens */ 326789Sahrens #ifndef _KERNEL 327789Sahrens 328789Sahrens vdev_ops_t vdev_disk_ops = { 329789Sahrens vdev_file_open, 330789Sahrens vdev_file_close, 3315329Sgw25295 vdev_file_probe, 332789Sahrens vdev_default_asize, 333789Sahrens vdev_file_io_start, 334789Sahrens vdev_file_io_done, 335789Sahrens NULL, 336789Sahrens VDEV_TYPE_DISK, /* name of this vdev type */ 337789Sahrens B_TRUE /* leaf vdev */ 338789Sahrens }; 339789Sahrens 340789Sahrens #endif 341