1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * 24eda14cbcSMatt Macy * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 25eda14cbcSMatt Macy * All rights reserved. 26eda14cbcSMatt Macy * 27eda14cbcSMatt Macy * Portions Copyright 2010 Robert Milkowski 28eda14cbcSMatt Macy * 29eda14cbcSMatt Macy * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 30eda14cbcSMatt Macy * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 31eda14cbcSMatt Macy * Copyright (c) 2013, Joyent, Inc. All rights reserved. 32eda14cbcSMatt Macy * Copyright (c) 2014 Integros [integros.com] 33ce4dcb97SMartin Matuska * Copyright (c) 2024, Klara, Inc. 34eda14cbcSMatt Macy */ 35eda14cbcSMatt Macy 36eda14cbcSMatt Macy /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 37eda14cbcSMatt Macy 38eda14cbcSMatt Macy /* 39eda14cbcSMatt Macy * ZFS volume emulation driver. 40eda14cbcSMatt Macy * 41eda14cbcSMatt Macy * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 42eda14cbcSMatt Macy * Volumes are accessed through the symbolic links named: 43eda14cbcSMatt Macy * 44eda14cbcSMatt Macy * /dev/zvol/<pool_name>/<dataset_name> 45eda14cbcSMatt Macy * 46eda14cbcSMatt Macy * Volumes are persistent through reboot. No user command needs to be 47eda14cbcSMatt Macy * run before opening and using a device. 48eda14cbcSMatt Macy * 49eda14cbcSMatt Macy * On FreeBSD ZVOLs are simply GEOM providers like any other storage device 50eda14cbcSMatt Macy * in the system. Except when they're simply character devices (volmode=dev). 51eda14cbcSMatt Macy */ 52eda14cbcSMatt Macy 53eda14cbcSMatt Macy #include <sys/types.h> 54eda14cbcSMatt Macy #include <sys/param.h> 55eda14cbcSMatt Macy #include <sys/kernel.h> 56eda14cbcSMatt Macy #include <sys/errno.h> 57eda14cbcSMatt Macy #include <sys/uio.h> 58eda14cbcSMatt Macy #include <sys/bio.h> 59eda14cbcSMatt Macy #include <sys/buf.h> 60eda14cbcSMatt Macy #include <sys/kmem.h> 61eda14cbcSMatt Macy #include <sys/conf.h> 62eda14cbcSMatt Macy #include <sys/cmn_err.h> 63eda14cbcSMatt Macy #include <sys/stat.h> 64eda14cbcSMatt Macy #include <sys/proc.h> 65eda14cbcSMatt Macy #include <sys/zap.h> 66eda14cbcSMatt Macy #include <sys/spa.h> 67eda14cbcSMatt Macy #include <sys/spa_impl.h> 68eda14cbcSMatt Macy #include <sys/zio.h> 69eda14cbcSMatt Macy #include <sys/disk.h> 70eda14cbcSMatt Macy #include <sys/dmu_traverse.h> 71eda14cbcSMatt Macy #include <sys/dnode.h> 72eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 73eda14cbcSMatt Macy #include <sys/dsl_prop.h> 74eda14cbcSMatt Macy #include <sys/dsl_dir.h> 75eda14cbcSMatt Macy #include <sys/byteorder.h> 76eda14cbcSMatt Macy #include <sys/sunddi.h> 77eda14cbcSMatt Macy #include <sys/dirent.h> 78eda14cbcSMatt Macy #include <sys/policy.h> 79eda14cbcSMatt Macy #include <sys/queue.h> 80eda14cbcSMatt Macy #include <sys/fs/zfs.h> 81eda14cbcSMatt Macy #include <sys/zfs_ioctl.h> 82eda14cbcSMatt Macy #include <sys/zil.h> 83eda14cbcSMatt Macy #include <sys/zfs_znode.h> 84eda14cbcSMatt Macy #include <sys/zfs_rlock.h> 85eda14cbcSMatt Macy #include <sys/vdev_impl.h> 86eda14cbcSMatt Macy #include <sys/vdev_raidz.h> 87eda14cbcSMatt Macy #include <sys/zvol.h> 88eda14cbcSMatt Macy #include <sys/zil_impl.h> 89eda14cbcSMatt Macy #include <sys/dataset_kstats.h> 90eda14cbcSMatt Macy #include <sys/dbuf.h> 91eda14cbcSMatt Macy #include <sys/dmu_tx.h> 92eda14cbcSMatt Macy #include <sys/zfeature.h> 93eda14cbcSMatt Macy #include <sys/zio_checksum.h> 94eda14cbcSMatt Macy #include <sys/zil_impl.h> 95eda14cbcSMatt Macy #include <sys/filio.h> 96c7046f76SMartin Matuska #include <sys/freebsd_event.h> 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy #include <geom/geom.h> 99eda14cbcSMatt Macy #include <sys/zvol.h> 100eda14cbcSMatt Macy #include <sys/zvol_impl.h> 101eda14cbcSMatt Macy 102eda14cbcSMatt Macy #include "zfs_namecheck.h" 103eda14cbcSMatt Macy 104eda14cbcSMatt Macy #define ZVOL_DUMPSIZE "dumpsize" 105eda14cbcSMatt Macy 106eda14cbcSMatt Macy #ifdef ZVOL_LOCK_DEBUG 107eda14cbcSMatt Macy #define ZVOL_RW_READER RW_WRITER 108eda14cbcSMatt Macy #define ZVOL_RW_READ_HELD RW_WRITE_HELD 109eda14cbcSMatt Macy #else 110eda14cbcSMatt Macy #define ZVOL_RW_READER RW_READER 111eda14cbcSMatt Macy #define ZVOL_RW_READ_HELD RW_READ_HELD 112eda14cbcSMatt Macy #endif 113eda14cbcSMatt Macy 114eda14cbcSMatt Macy enum zvol_geom_state { 115eda14cbcSMatt Macy ZVOL_GEOM_UNINIT, 116eda14cbcSMatt Macy ZVOL_GEOM_STOPPED, 117eda14cbcSMatt Macy ZVOL_GEOM_RUNNING, 118eda14cbcSMatt Macy }; 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy struct zvol_state_os { 121eda14cbcSMatt Macy #define zso_dev _zso_state._zso_dev 122eda14cbcSMatt Macy #define zso_geom _zso_state._zso_geom 123eda14cbcSMatt Macy union { 124eda14cbcSMatt Macy /* volmode=dev */ 125eda14cbcSMatt Macy struct zvol_state_dev { 126eda14cbcSMatt Macy struct cdev *zsd_cdev; 127c7046f76SMartin Matuska struct selinfo zsd_selinfo; 128eda14cbcSMatt Macy } _zso_dev; 129eda14cbcSMatt Macy 130eda14cbcSMatt Macy /* volmode=geom */ 131eda14cbcSMatt Macy struct zvol_state_geom { 132eda14cbcSMatt Macy struct g_provider *zsg_provider; 133eda14cbcSMatt Macy struct bio_queue_head zsg_queue; 134eda14cbcSMatt Macy struct mtx zsg_queue_mtx; 135eda14cbcSMatt Macy enum zvol_geom_state zsg_state; 136eda14cbcSMatt Macy } _zso_geom; 137eda14cbcSMatt Macy } _zso_state; 1387877fdebSMatt Macy int zso_dying; 139eda14cbcSMatt Macy }; 140eda14cbcSMatt Macy 141eda14cbcSMatt Macy static uint32_t zvol_minors; 142eda14cbcSMatt Macy 143eda14cbcSMatt Macy SYSCTL_DECL(_vfs_zfs); 144eda14cbcSMatt Macy SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME"); 145eda14cbcSMatt Macy SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &zvol_volmode, 0, 146eda14cbcSMatt Macy "Expose as GEOM providers (1), device files (2) or neither"); 147eda14cbcSMatt Macy static boolean_t zpool_on_zvol = B_FALSE; 148eda14cbcSMatt Macy SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, recursive, CTLFLAG_RWTUN, &zpool_on_zvol, 0, 149eda14cbcSMatt Macy "Allow zpools to use zvols as vdevs (DANGEROUS)"); 150eda14cbcSMatt Macy 151eda14cbcSMatt Macy /* 152eda14cbcSMatt Macy * Toggle unmap functionality. 153eda14cbcSMatt Macy */ 154eda14cbcSMatt Macy boolean_t zvol_unmap_enabled = B_TRUE; 155eda14cbcSMatt Macy 156eda14cbcSMatt Macy SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_enabled, CTLFLAG_RWTUN, 157eda14cbcSMatt Macy &zvol_unmap_enabled, 0, "Enable UNMAP functionality"); 158eda14cbcSMatt Macy 159eda14cbcSMatt Macy /* 160eda14cbcSMatt Macy * zvol maximum transfer in one DMU tx. 161eda14cbcSMatt Macy */ 162eda14cbcSMatt Macy int zvol_maxphys = DMU_MAX_ACCESS / 2; 163eda14cbcSMatt Macy 164eda14cbcSMatt Macy static void zvol_ensure_zilog(zvol_state_t *zv); 165eda14cbcSMatt Macy 166eda14cbcSMatt Macy static d_open_t zvol_cdev_open; 167eda14cbcSMatt Macy static d_close_t zvol_cdev_close; 168eda14cbcSMatt Macy static d_ioctl_t zvol_cdev_ioctl; 169eda14cbcSMatt Macy static d_read_t zvol_cdev_read; 170eda14cbcSMatt Macy static d_write_t zvol_cdev_write; 171eda14cbcSMatt Macy static d_strategy_t zvol_geom_bio_strategy; 172c7046f76SMartin Matuska static d_kqfilter_t zvol_cdev_kqfilter; 173eda14cbcSMatt Macy 174eda14cbcSMatt Macy static struct cdevsw zvol_cdevsw = { 175eda14cbcSMatt Macy .d_name = "zvol", 176eda14cbcSMatt Macy .d_version = D_VERSION, 177eda14cbcSMatt Macy .d_flags = D_DISK | D_TRACKCLOSE, 178eda14cbcSMatt Macy .d_open = zvol_cdev_open, 179eda14cbcSMatt Macy .d_close = zvol_cdev_close, 180eda14cbcSMatt Macy .d_ioctl = zvol_cdev_ioctl, 181eda14cbcSMatt Macy .d_read = zvol_cdev_read, 182eda14cbcSMatt Macy .d_write = zvol_cdev_write, 183eda14cbcSMatt Macy .d_strategy = zvol_geom_bio_strategy, 184c7046f76SMartin Matuska .d_kqfilter = zvol_cdev_kqfilter, 185c7046f76SMartin Matuska }; 186c7046f76SMartin Matuska 187c7046f76SMartin Matuska static void zvol_filter_detach(struct knote *kn); 188c7046f76SMartin Matuska static int zvol_filter_vnode(struct knote *kn, long hint); 189c7046f76SMartin Matuska 190c7046f76SMartin Matuska static struct filterops zvol_filterops_vnode = { 191c7046f76SMartin Matuska .f_isfd = 1, 192c7046f76SMartin Matuska .f_detach = zvol_filter_detach, 193c7046f76SMartin Matuska .f_event = zvol_filter_vnode, 194eda14cbcSMatt Macy }; 195eda14cbcSMatt Macy 196eda14cbcSMatt Macy extern uint_t zfs_geom_probe_vdev_key; 197eda14cbcSMatt Macy 198eda14cbcSMatt Macy struct g_class zfs_zvol_class = { 199eda14cbcSMatt Macy .name = "ZFS::ZVOL", 200eda14cbcSMatt Macy .version = G_VERSION, 201eda14cbcSMatt Macy }; 202eda14cbcSMatt Macy 203eda14cbcSMatt Macy DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 204eda14cbcSMatt Macy 205eda14cbcSMatt Macy static int zvol_geom_open(struct g_provider *pp, int flag, int count); 206eda14cbcSMatt Macy static int zvol_geom_close(struct g_provider *pp, int flag, int count); 207eda14cbcSMatt Macy static void zvol_geom_run(zvol_state_t *zv); 208eda14cbcSMatt Macy static void zvol_geom_destroy(zvol_state_t *zv); 209eda14cbcSMatt Macy static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 210eda14cbcSMatt Macy static void zvol_geom_worker(void *arg); 211eda14cbcSMatt Macy static void zvol_geom_bio_start(struct bio *bp); 212eda14cbcSMatt Macy static int zvol_geom_bio_getattr(struct bio *bp); 213eda14cbcSMatt Macy /* static d_strategy_t zvol_geom_bio_strategy; (declared elsewhere) */ 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy /* 216eda14cbcSMatt Macy * GEOM mode implementation 217eda14cbcSMatt Macy */ 218eda14cbcSMatt Macy 219eda14cbcSMatt Macy static int 220eda14cbcSMatt Macy zvol_geom_open(struct g_provider *pp, int flag, int count) 221eda14cbcSMatt Macy { 222eda14cbcSMatt Macy zvol_state_t *zv; 223eda14cbcSMatt Macy int err = 0; 2247877fdebSMatt Macy boolean_t drop_suspend = B_FALSE; 225eda14cbcSMatt Macy 226eda14cbcSMatt Macy if (!zpool_on_zvol && tsd_get(zfs_geom_probe_vdev_key) != NULL) { 227eda14cbcSMatt Macy /* 228e92ffd9bSMartin Matuska * If zfs_geom_probe_vdev_key is set, that means that zfs is 229eda14cbcSMatt Macy * attempting to probe geom providers while looking for a 230eda14cbcSMatt Macy * replacement for a missing VDEV. In this case, the 231eda14cbcSMatt Macy * spa_namespace_lock will not be held, but it is still illegal 232eda14cbcSMatt Macy * to use a zvol as a vdev. Deadlocks can result if another 233e92ffd9bSMartin Matuska * thread has spa_namespace_lock. 234eda14cbcSMatt Macy */ 235eda14cbcSMatt Macy return (SET_ERROR(EOPNOTSUPP)); 236eda14cbcSMatt Macy } 237eda14cbcSMatt Macy 238eac7052fSMatt Macy retry: 239eda14cbcSMatt Macy rw_enter(&zvol_state_lock, ZVOL_RW_READER); 240e92ffd9bSMartin Matuska /* 241e92ffd9bSMartin Matuska * Obtain a copy of private under zvol_state_lock to make sure either 242e92ffd9bSMartin Matuska * the result of zvol free code setting private to NULL is observed, 243e92ffd9bSMartin Matuska * or the zv is protected from being freed because of the positive 244e92ffd9bSMartin Matuska * zv_open_count. 245e92ffd9bSMartin Matuska */ 246eda14cbcSMatt Macy zv = pp->private; 247eda14cbcSMatt Macy if (zv == NULL) { 248eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 2497877fdebSMatt Macy err = SET_ERROR(ENXIO); 2507877fdebSMatt Macy goto out_locked; 251eda14cbcSMatt Macy } 252eda14cbcSMatt Macy 253eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 254ce4dcb97SMartin Matuska if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { 2557877fdebSMatt Macy rw_exit(&zvol_state_lock); 2567877fdebSMatt Macy err = SET_ERROR(ENXIO); 2577877fdebSMatt Macy goto out_zv_locked; 2587877fdebSMatt Macy } 2597877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); 260eda14cbcSMatt Macy 261eda14cbcSMatt Macy /* 262e92ffd9bSMartin Matuska * Make sure zvol is not suspended during first open 263eda14cbcSMatt Macy * (hold zv_suspend_lock) and respect proper lock acquisition 264e92ffd9bSMartin Matuska * ordering - zv_suspend_lock before zv_state_lock. 265eda14cbcSMatt Macy */ 266eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 2677877fdebSMatt Macy drop_suspend = B_TRUE; 268eda14cbcSMatt Macy if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { 269eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 270eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 271eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 272e92ffd9bSMartin Matuska /* Check to see if zv_suspend_lock is needed. */ 273eda14cbcSMatt Macy if (zv->zv_open_count != 0) { 274eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 275eda14cbcSMatt Macy drop_suspend = B_FALSE; 276eda14cbcSMatt Macy } 277eda14cbcSMatt Macy } 278eda14cbcSMatt Macy } 279eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 280eda14cbcSMatt Macy 281eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 282eda14cbcSMatt Macy 283eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 284e92ffd9bSMartin Matuska boolean_t drop_namespace = B_FALSE; 285e92ffd9bSMartin Matuska 286eda14cbcSMatt Macy ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); 287e92ffd9bSMartin Matuska 288e92ffd9bSMartin Matuska /* 289e92ffd9bSMartin Matuska * Take spa_namespace_lock to prevent lock inversion when 290e92ffd9bSMartin Matuska * zvols from one pool are opened as vdevs in another. 291e92ffd9bSMartin Matuska */ 292e92ffd9bSMartin Matuska if (!mutex_owned(&spa_namespace_lock)) { 293e92ffd9bSMartin Matuska if (!mutex_tryenter(&spa_namespace_lock)) { 294e92ffd9bSMartin Matuska mutex_exit(&zv->zv_state_lock); 295e92ffd9bSMartin Matuska rw_exit(&zv->zv_suspend_lock); 29675e1fea6SMartin Matuska drop_suspend = B_FALSE; 297e92ffd9bSMartin Matuska kern_yield(PRI_USER); 298e92ffd9bSMartin Matuska goto retry; 299e92ffd9bSMartin Matuska } else { 300e92ffd9bSMartin Matuska drop_namespace = B_TRUE; 301e92ffd9bSMartin Matuska } 302e92ffd9bSMartin Matuska } 303eda14cbcSMatt Macy err = zvol_first_open(zv, !(flag & FWRITE)); 304e92ffd9bSMartin Matuska if (drop_namespace) 305e92ffd9bSMartin Matuska mutex_exit(&spa_namespace_lock); 306eda14cbcSMatt Macy if (err) 3077877fdebSMatt Macy goto out_zv_locked; 308eda14cbcSMatt Macy pp->mediasize = zv->zv_volsize; 309eda14cbcSMatt Macy pp->stripeoffset = 0; 310eda14cbcSMatt Macy pp->stripesize = zv->zv_volblocksize; 311eda14cbcSMatt Macy } 312eda14cbcSMatt Macy 313e92ffd9bSMartin Matuska ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 314e92ffd9bSMartin Matuska 315eda14cbcSMatt Macy /* 316eda14cbcSMatt Macy * Check for a bad on-disk format version now since we 317eda14cbcSMatt Macy * lied about owning the dataset readonly before. 318eda14cbcSMatt Macy */ 319eda14cbcSMatt Macy if ((flag & FWRITE) && ((zv->zv_flags & ZVOL_RDONLY) || 320eda14cbcSMatt Macy dmu_objset_incompatible_encryption_version(zv->zv_objset))) { 3217877fdebSMatt Macy err = SET_ERROR(EROFS); 3227877fdebSMatt Macy goto out_opened; 323eda14cbcSMatt Macy } 324eda14cbcSMatt Macy if (zv->zv_flags & ZVOL_EXCL) { 3257877fdebSMatt Macy err = SET_ERROR(EBUSY); 3267877fdebSMatt Macy goto out_opened; 327eda14cbcSMatt Macy } 328716fd348SMartin Matuska if (flag & O_EXCL) { 329eda14cbcSMatt Macy if (zv->zv_open_count != 0) { 3307877fdebSMatt Macy err = SET_ERROR(EBUSY); 3317877fdebSMatt Macy goto out_opened; 332eda14cbcSMatt Macy } 333eda14cbcSMatt Macy zv->zv_flags |= ZVOL_EXCL; 334eda14cbcSMatt Macy } 335eda14cbcSMatt Macy 336eda14cbcSMatt Macy zv->zv_open_count += count; 3377877fdebSMatt Macy out_opened: 3387877fdebSMatt Macy if (zv->zv_open_count == 0) { 339eda14cbcSMatt Macy zvol_last_close(zv); 3407877fdebSMatt Macy wakeup(zv); 3417877fdebSMatt Macy } 3427877fdebSMatt Macy out_zv_locked: 3437877fdebSMatt Macy mutex_exit(&zv->zv_state_lock); 3447877fdebSMatt Macy out_locked: 345eda14cbcSMatt Macy if (drop_suspend) 346eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 3477877fdebSMatt Macy return (err); 348eda14cbcSMatt Macy } 349eda14cbcSMatt Macy 350eda14cbcSMatt Macy static int 351eda14cbcSMatt Macy zvol_geom_close(struct g_provider *pp, int flag, int count) 352eda14cbcSMatt Macy { 353c03c5b1cSMartin Matuska (void) flag; 354eda14cbcSMatt Macy zvol_state_t *zv; 355eda14cbcSMatt Macy boolean_t drop_suspend = B_TRUE; 3567877fdebSMatt Macy int new_open_count; 357eda14cbcSMatt Macy 358eda14cbcSMatt Macy rw_enter(&zvol_state_lock, ZVOL_RW_READER); 359eda14cbcSMatt Macy zv = pp->private; 360eda14cbcSMatt Macy if (zv == NULL) { 361eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 362eda14cbcSMatt Macy return (SET_ERROR(ENXIO)); 363eda14cbcSMatt Macy } 364eda14cbcSMatt Macy 365eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 366eda14cbcSMatt Macy if (zv->zv_flags & ZVOL_EXCL) { 3677877fdebSMatt Macy ASSERT3U(zv->zv_open_count, ==, 1); 368eda14cbcSMatt Macy zv->zv_flags &= ~ZVOL_EXCL; 369eda14cbcSMatt Macy } 370eda14cbcSMatt Macy 3717877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); 372eda14cbcSMatt Macy 373eda14cbcSMatt Macy /* 374eda14cbcSMatt Macy * If the open count is zero, this is a spurious close. 375eda14cbcSMatt Macy * That indicates a bug in the kernel / DDI framework. 376eda14cbcSMatt Macy */ 3777877fdebSMatt Macy ASSERT3U(zv->zv_open_count, >, 0); 378eda14cbcSMatt Macy 379eda14cbcSMatt Macy /* 380e92ffd9bSMartin Matuska * Make sure zvol is not suspended during last close 381eda14cbcSMatt Macy * (hold zv_suspend_lock) and respect proper lock acquisition 382e92ffd9bSMartin Matuska * ordering - zv_suspend_lock before zv_state_lock. 383eda14cbcSMatt Macy */ 3847877fdebSMatt Macy new_open_count = zv->zv_open_count - count; 3857877fdebSMatt Macy if (new_open_count == 0) { 386eda14cbcSMatt Macy if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { 387eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 388eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 389eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 390e92ffd9bSMartin Matuska /* Check to see if zv_suspend_lock is needed. */ 3917877fdebSMatt Macy new_open_count = zv->zv_open_count - count; 3927877fdebSMatt Macy if (new_open_count != 0) { 393eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 394eda14cbcSMatt Macy drop_suspend = B_FALSE; 395eda14cbcSMatt Macy } 396eda14cbcSMatt Macy } 397eda14cbcSMatt Macy } else { 398eda14cbcSMatt Macy drop_suspend = B_FALSE; 399eda14cbcSMatt Macy } 400eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 401eda14cbcSMatt Macy 402eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 403eda14cbcSMatt Macy 404eda14cbcSMatt Macy /* 405eda14cbcSMatt Macy * You may get multiple opens, but only one close. 406eda14cbcSMatt Macy */ 4077877fdebSMatt Macy zv->zv_open_count = new_open_count; 408eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 409eda14cbcSMatt Macy ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); 410eda14cbcSMatt Macy zvol_last_close(zv); 4117877fdebSMatt Macy wakeup(zv); 412eda14cbcSMatt Macy } 413eda14cbcSMatt Macy 414eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 415eda14cbcSMatt Macy 416eda14cbcSMatt Macy if (drop_suspend) 417eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 418eda14cbcSMatt Macy return (0); 419eda14cbcSMatt Macy } 420eda14cbcSMatt Macy 421eda14cbcSMatt Macy static void 422eda14cbcSMatt Macy zvol_geom_run(zvol_state_t *zv) 423eda14cbcSMatt Macy { 424eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 425eda14cbcSMatt Macy struct g_provider *pp = zsg->zsg_provider; 426eda14cbcSMatt Macy 4277877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); 428eda14cbcSMatt Macy 429eda14cbcSMatt Macy g_error_provider(pp, 0); 430eda14cbcSMatt Macy 431eda14cbcSMatt Macy kproc_kthread_add(zvol_geom_worker, zv, &system_proc, NULL, 0, 0, 432eda14cbcSMatt Macy "zfskern", "zvol %s", pp->name + sizeof (ZVOL_DRIVER)); 433eda14cbcSMatt Macy } 434eda14cbcSMatt Macy 435eda14cbcSMatt Macy static void 436eda14cbcSMatt Macy zvol_geom_destroy(zvol_state_t *zv) 437eda14cbcSMatt Macy { 438eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 439eda14cbcSMatt Macy struct g_provider *pp = zsg->zsg_provider; 440eda14cbcSMatt Macy 4417877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); 442eda14cbcSMatt Macy 443eda14cbcSMatt Macy g_topology_assert(); 444eda14cbcSMatt Macy 445eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 44616038816SMartin Matuska VERIFY3S(zsg->zsg_state, ==, ZVOL_GEOM_RUNNING); 447eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 448eda14cbcSMatt Macy zsg->zsg_provider = NULL; 449eda14cbcSMatt Macy g_wither_geom(pp->geom, ENXIO); 450eda14cbcSMatt Macy } 451eda14cbcSMatt Macy 4527877fdebSMatt Macy void 4537877fdebSMatt Macy zvol_wait_close(zvol_state_t *zv) 4547877fdebSMatt Macy { 4557877fdebSMatt Macy 4567877fdebSMatt Macy if (zv->zv_volmode != ZFS_VOLMODE_GEOM) 4577877fdebSMatt Macy return; 4587877fdebSMatt Macy mutex_enter(&zv->zv_state_lock); 4597877fdebSMatt Macy zv->zv_zso->zso_dying = B_TRUE; 4607877fdebSMatt Macy 4617877fdebSMatt Macy if (zv->zv_open_count) 4627877fdebSMatt Macy msleep(zv, &zv->zv_state_lock, 4637877fdebSMatt Macy PRIBIO, "zvol:dying", 10*hz); 4647877fdebSMatt Macy mutex_exit(&zv->zv_state_lock); 4657877fdebSMatt Macy } 4667877fdebSMatt Macy 4677877fdebSMatt Macy 468eda14cbcSMatt Macy static int 469eda14cbcSMatt Macy zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 470eda14cbcSMatt Macy { 471eda14cbcSMatt Macy int count, error, flags; 472eda14cbcSMatt Macy 473eda14cbcSMatt Macy g_topology_assert(); 474eda14cbcSMatt Macy 475eda14cbcSMatt Macy /* 476eda14cbcSMatt Macy * To make it easier we expect either open or close, but not both 477eda14cbcSMatt Macy * at the same time. 478eda14cbcSMatt Macy */ 479eda14cbcSMatt Macy KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 480eda14cbcSMatt Macy (acr <= 0 && acw <= 0 && ace <= 0), 481eda14cbcSMatt Macy ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 482eda14cbcSMatt Macy pp->name, acr, acw, ace)); 483eda14cbcSMatt Macy 484eda14cbcSMatt Macy if (pp->private == NULL) { 485eda14cbcSMatt Macy if (acr <= 0 && acw <= 0 && ace <= 0) 486eda14cbcSMatt Macy return (0); 487eda14cbcSMatt Macy return (pp->error); 488eda14cbcSMatt Macy } 489eda14cbcSMatt Macy 490eda14cbcSMatt Macy /* 491eda14cbcSMatt Macy * We don't pass FEXCL flag to zvol_geom_open()/zvol_geom_close() if 492eda14cbcSMatt Macy * ace != 0, because GEOM already handles that and handles it a bit 493eda14cbcSMatt Macy * differently. GEOM allows for multiple read/exclusive consumers and 494eda14cbcSMatt Macy * ZFS allows only one exclusive consumer, no matter if it is reader or 495eda14cbcSMatt Macy * writer. I like better the way GEOM works so I'll leave it for GEOM 496eda14cbcSMatt Macy * to decide what to do. 497eda14cbcSMatt Macy */ 498eda14cbcSMatt Macy 499eda14cbcSMatt Macy count = acr + acw + ace; 500eda14cbcSMatt Macy if (count == 0) 501eda14cbcSMatt Macy return (0); 502eda14cbcSMatt Macy 503eda14cbcSMatt Macy flags = 0; 504eda14cbcSMatt Macy if (acr != 0 || ace != 0) 505eda14cbcSMatt Macy flags |= FREAD; 506eda14cbcSMatt Macy if (acw != 0) 507eda14cbcSMatt Macy flags |= FWRITE; 508eda14cbcSMatt Macy 509eda14cbcSMatt Macy g_topology_unlock(); 510eda14cbcSMatt Macy if (count > 0) 511eda14cbcSMatt Macy error = zvol_geom_open(pp, flags, count); 512eda14cbcSMatt Macy else 513eda14cbcSMatt Macy error = zvol_geom_close(pp, flags, -count); 514eda14cbcSMatt Macy g_topology_lock(); 515eda14cbcSMatt Macy return (error); 516eda14cbcSMatt Macy } 517eda14cbcSMatt Macy 518eda14cbcSMatt Macy static void 519eda14cbcSMatt Macy zvol_geom_worker(void *arg) 520eda14cbcSMatt Macy { 521eda14cbcSMatt Macy zvol_state_t *zv = arg; 522eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 523eda14cbcSMatt Macy struct bio *bp; 524eda14cbcSMatt Macy 5257877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); 526eda14cbcSMatt Macy 527eda14cbcSMatt Macy thread_lock(curthread); 528eda14cbcSMatt Macy sched_prio(curthread, PRIBIO); 529eda14cbcSMatt Macy thread_unlock(curthread); 530eda14cbcSMatt Macy 531eda14cbcSMatt Macy for (;;) { 532eda14cbcSMatt Macy mtx_lock(&zsg->zsg_queue_mtx); 533eda14cbcSMatt Macy bp = bioq_takefirst(&zsg->zsg_queue); 534eda14cbcSMatt Macy if (bp == NULL) { 535eda14cbcSMatt Macy if (zsg->zsg_state == ZVOL_GEOM_STOPPED) { 536eda14cbcSMatt Macy zsg->zsg_state = ZVOL_GEOM_RUNNING; 537eda14cbcSMatt Macy wakeup(&zsg->zsg_state); 538eda14cbcSMatt Macy mtx_unlock(&zsg->zsg_queue_mtx); 539eda14cbcSMatt Macy kthread_exit(); 540eda14cbcSMatt Macy } 541eda14cbcSMatt Macy msleep(&zsg->zsg_queue, &zsg->zsg_queue_mtx, 542eda14cbcSMatt Macy PRIBIO | PDROP, "zvol:io", 0); 543eda14cbcSMatt Macy continue; 544eda14cbcSMatt Macy } 545eda14cbcSMatt Macy mtx_unlock(&zsg->zsg_queue_mtx); 546eda14cbcSMatt Macy zvol_geom_bio_strategy(bp); 547eda14cbcSMatt Macy } 548eda14cbcSMatt Macy } 549eda14cbcSMatt Macy 550eda14cbcSMatt Macy static void 551eda14cbcSMatt Macy zvol_geom_bio_start(struct bio *bp) 552eda14cbcSMatt Macy { 553eda14cbcSMatt Macy zvol_state_t *zv = bp->bio_to->private; 5547877fdebSMatt Macy struct zvol_state_geom *zsg; 555eda14cbcSMatt Macy boolean_t first; 556eda14cbcSMatt Macy 5577877fdebSMatt Macy if (zv == NULL) { 5587877fdebSMatt Macy g_io_deliver(bp, ENXIO); 5597877fdebSMatt Macy return; 5607877fdebSMatt Macy } 561eda14cbcSMatt Macy if (bp->bio_cmd == BIO_GETATTR) { 562eda14cbcSMatt Macy if (zvol_geom_bio_getattr(bp)) 563eda14cbcSMatt Macy g_io_deliver(bp, EOPNOTSUPP); 564eda14cbcSMatt Macy return; 565eda14cbcSMatt Macy } 566eda14cbcSMatt Macy 567eda14cbcSMatt Macy if (!THREAD_CAN_SLEEP()) { 5687877fdebSMatt Macy zsg = &zv->zv_zso->zso_geom; 569eda14cbcSMatt Macy mtx_lock(&zsg->zsg_queue_mtx); 570eda14cbcSMatt Macy first = (bioq_first(&zsg->zsg_queue) == NULL); 571eda14cbcSMatt Macy bioq_insert_tail(&zsg->zsg_queue, bp); 572eda14cbcSMatt Macy mtx_unlock(&zsg->zsg_queue_mtx); 573eda14cbcSMatt Macy if (first) 574eda14cbcSMatt Macy wakeup_one(&zsg->zsg_queue); 575eda14cbcSMatt Macy return; 576eda14cbcSMatt Macy } 577eda14cbcSMatt Macy 578eda14cbcSMatt Macy zvol_geom_bio_strategy(bp); 579eda14cbcSMatt Macy } 580eda14cbcSMatt Macy 581eda14cbcSMatt Macy static int 582eda14cbcSMatt Macy zvol_geom_bio_getattr(struct bio *bp) 583eda14cbcSMatt Macy { 584eda14cbcSMatt Macy zvol_state_t *zv; 585eda14cbcSMatt Macy 586eda14cbcSMatt Macy zv = bp->bio_to->private; 5877877fdebSMatt Macy ASSERT3P(zv, !=, NULL); 588eda14cbcSMatt Macy 589eda14cbcSMatt Macy spa_t *spa = dmu_objset_spa(zv->zv_objset); 590eda14cbcSMatt Macy uint64_t refd, avail, usedobjs, availobjs; 591eda14cbcSMatt Macy 592eda14cbcSMatt Macy if (g_handleattr_int(bp, "GEOM::candelete", 1)) 593eda14cbcSMatt Macy return (0); 594eda14cbcSMatt Macy if (strcmp(bp->bio_attribute, "blocksavail") == 0) { 595eda14cbcSMatt Macy dmu_objset_space(zv->zv_objset, &refd, &avail, 596eda14cbcSMatt Macy &usedobjs, &availobjs); 597eda14cbcSMatt Macy if (g_handleattr_off_t(bp, "blocksavail", avail / DEV_BSIZE)) 598eda14cbcSMatt Macy return (0); 599eda14cbcSMatt Macy } else if (strcmp(bp->bio_attribute, "blocksused") == 0) { 600eda14cbcSMatt Macy dmu_objset_space(zv->zv_objset, &refd, &avail, 601eda14cbcSMatt Macy &usedobjs, &availobjs); 602eda14cbcSMatt Macy if (g_handleattr_off_t(bp, "blocksused", refd / DEV_BSIZE)) 603eda14cbcSMatt Macy return (0); 604eda14cbcSMatt Macy } else if (strcmp(bp->bio_attribute, "poolblocksavail") == 0) { 605eda14cbcSMatt Macy avail = metaslab_class_get_space(spa_normal_class(spa)); 606eda14cbcSMatt Macy avail -= metaslab_class_get_alloc(spa_normal_class(spa)); 607eda14cbcSMatt Macy if (g_handleattr_off_t(bp, "poolblocksavail", 608eda14cbcSMatt Macy avail / DEV_BSIZE)) 609eda14cbcSMatt Macy return (0); 610eda14cbcSMatt Macy } else if (strcmp(bp->bio_attribute, "poolblocksused") == 0) { 611eda14cbcSMatt Macy refd = metaslab_class_get_alloc(spa_normal_class(spa)); 612eda14cbcSMatt Macy if (g_handleattr_off_t(bp, "poolblocksused", refd / DEV_BSIZE)) 613eda14cbcSMatt Macy return (0); 614eda14cbcSMatt Macy } 615eda14cbcSMatt Macy return (1); 616eda14cbcSMatt Macy } 617eda14cbcSMatt Macy 618eda14cbcSMatt Macy static void 619c7046f76SMartin Matuska zvol_filter_detach(struct knote *kn) 620c7046f76SMartin Matuska { 621c7046f76SMartin Matuska zvol_state_t *zv; 622c7046f76SMartin Matuska struct zvol_state_dev *zsd; 623c7046f76SMartin Matuska 624c7046f76SMartin Matuska zv = kn->kn_hook; 625c7046f76SMartin Matuska zsd = &zv->zv_zso->zso_dev; 626c7046f76SMartin Matuska 627c7046f76SMartin Matuska knlist_remove(&zsd->zsd_selinfo.si_note, kn, 0); 628c7046f76SMartin Matuska } 629c7046f76SMartin Matuska 630c7046f76SMartin Matuska static int 631c7046f76SMartin Matuska zvol_filter_vnode(struct knote *kn, long hint) 632c7046f76SMartin Matuska { 633c7046f76SMartin Matuska kn->kn_fflags |= kn->kn_sfflags & hint; 634c7046f76SMartin Matuska 635c7046f76SMartin Matuska return (kn->kn_fflags != 0); 636c7046f76SMartin Matuska } 637c7046f76SMartin Matuska 638c7046f76SMartin Matuska static int 639c7046f76SMartin Matuska zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn) 640c7046f76SMartin Matuska { 641c7046f76SMartin Matuska zvol_state_t *zv; 642c7046f76SMartin Matuska struct zvol_state_dev *zsd; 643c7046f76SMartin Matuska 644c7046f76SMartin Matuska zv = dev->si_drv2; 645c7046f76SMartin Matuska zsd = &zv->zv_zso->zso_dev; 646c7046f76SMartin Matuska 647c7046f76SMartin Matuska if (kn->kn_filter != EVFILT_VNODE) 648c7046f76SMartin Matuska return (EINVAL); 649c7046f76SMartin Matuska 650c7046f76SMartin Matuska /* XXX: extend support for other NOTE_* events */ 651c7046f76SMartin Matuska if (kn->kn_sfflags != NOTE_ATTRIB) 652c7046f76SMartin Matuska return (EINVAL); 653c7046f76SMartin Matuska 654c7046f76SMartin Matuska kn->kn_fop = &zvol_filterops_vnode; 655c7046f76SMartin Matuska kn->kn_hook = zv; 656c7046f76SMartin Matuska knlist_add(&zsd->zsd_selinfo.si_note, kn, 0); 657c7046f76SMartin Matuska 658c7046f76SMartin Matuska return (0); 659c7046f76SMartin Matuska } 660c7046f76SMartin Matuska 661c7046f76SMartin Matuska static void 662eda14cbcSMatt Macy zvol_geom_bio_strategy(struct bio *bp) 663eda14cbcSMatt Macy { 664eda14cbcSMatt Macy zvol_state_t *zv; 665eda14cbcSMatt Macy uint64_t off, volsize; 666eda14cbcSMatt Macy size_t resid; 667eda14cbcSMatt Macy char *addr; 668eda14cbcSMatt Macy objset_t *os; 669eda14cbcSMatt Macy zfs_locked_range_t *lr; 670eda14cbcSMatt Macy int error = 0; 671eda14cbcSMatt Macy boolean_t doread = B_FALSE; 672eda14cbcSMatt Macy boolean_t is_dumpified; 673f8b1db88SMartin Matuska boolean_t commit; 674eda14cbcSMatt Macy 675eda14cbcSMatt Macy if (bp->bio_to) 676eda14cbcSMatt Macy zv = bp->bio_to->private; 677eda14cbcSMatt Macy else 678eda14cbcSMatt Macy zv = bp->bio_dev->si_drv2; 679eda14cbcSMatt Macy 680eda14cbcSMatt Macy if (zv == NULL) { 681eda14cbcSMatt Macy error = SET_ERROR(ENXIO); 682eda14cbcSMatt Macy goto out; 683eda14cbcSMatt Macy } 684eda14cbcSMatt Macy 685eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 686eda14cbcSMatt Macy 687ce4dcb97SMartin Matuska if (zv->zv_flags & ZVOL_REMOVING) { 688ce4dcb97SMartin Matuska error = SET_ERROR(ENXIO); 689ce4dcb97SMartin Matuska goto resume; 690ce4dcb97SMartin Matuska } 691ce4dcb97SMartin Matuska 692eda14cbcSMatt Macy switch (bp->bio_cmd) { 693eda14cbcSMatt Macy case BIO_READ: 694eda14cbcSMatt Macy doread = B_TRUE; 695eda14cbcSMatt Macy break; 696eda14cbcSMatt Macy case BIO_WRITE: 697eda14cbcSMatt Macy case BIO_FLUSH: 698eda14cbcSMatt Macy case BIO_DELETE: 699eda14cbcSMatt Macy if (zv->zv_flags & ZVOL_RDONLY) { 700eda14cbcSMatt Macy error = SET_ERROR(EROFS); 701eda14cbcSMatt Macy goto resume; 702eda14cbcSMatt Macy } 703eda14cbcSMatt Macy zvol_ensure_zilog(zv); 704eda14cbcSMatt Macy if (bp->bio_cmd == BIO_FLUSH) 705f8b1db88SMartin Matuska goto commit; 706eda14cbcSMatt Macy break; 707eda14cbcSMatt Macy default: 7087877fdebSMatt Macy error = SET_ERROR(EOPNOTSUPP); 709eda14cbcSMatt Macy goto resume; 710eda14cbcSMatt Macy } 711eda14cbcSMatt Macy 712eda14cbcSMatt Macy off = bp->bio_offset; 713eda14cbcSMatt Macy volsize = zv->zv_volsize; 714eda14cbcSMatt Macy 715eda14cbcSMatt Macy os = zv->zv_objset; 7167877fdebSMatt Macy ASSERT3P(os, !=, NULL); 717eda14cbcSMatt Macy 718eda14cbcSMatt Macy addr = bp->bio_data; 719eda14cbcSMatt Macy resid = bp->bio_length; 720eda14cbcSMatt Macy 721eac7052fSMatt Macy if (resid > 0 && off >= volsize) { 722eda14cbcSMatt Macy error = SET_ERROR(EIO); 723eda14cbcSMatt Macy goto resume; 724eda14cbcSMatt Macy } 725eda14cbcSMatt Macy 726eda14cbcSMatt Macy is_dumpified = B_FALSE; 727f8b1db88SMartin Matuska commit = !doread && !is_dumpified && 728eda14cbcSMatt Macy zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 729eda14cbcSMatt Macy 730eda14cbcSMatt Macy /* 731eda14cbcSMatt Macy * There must be no buffer changes when doing a dmu_sync() because 732eda14cbcSMatt Macy * we can't change the data whilst calculating the checksum. 733eda14cbcSMatt Macy */ 734eda14cbcSMatt Macy lr = zfs_rangelock_enter(&zv->zv_rangelock, off, resid, 735eda14cbcSMatt Macy doread ? RL_READER : RL_WRITER); 736eda14cbcSMatt Macy 737eda14cbcSMatt Macy if (bp->bio_cmd == BIO_DELETE) { 738eda14cbcSMatt Macy dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 739eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 740eda14cbcSMatt Macy if (error != 0) { 741eda14cbcSMatt Macy dmu_tx_abort(tx); 742eda14cbcSMatt Macy } else { 743f8b1db88SMartin Matuska zvol_log_truncate(zv, tx, off, resid); 744eda14cbcSMatt Macy dmu_tx_commit(tx); 745eda14cbcSMatt Macy error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 746eda14cbcSMatt Macy off, resid); 747eda14cbcSMatt Macy resid = 0; 748eda14cbcSMatt Macy } 749eda14cbcSMatt Macy goto unlock; 750eda14cbcSMatt Macy } 751eda14cbcSMatt Macy while (resid != 0 && off < volsize) { 752eda14cbcSMatt Macy size_t size = MIN(resid, zvol_maxphys); 753eda14cbcSMatt Macy if (doread) { 754eda14cbcSMatt Macy error = dmu_read(os, ZVOL_OBJ, off, size, addr, 755eda14cbcSMatt Macy DMU_READ_PREFETCH); 756eda14cbcSMatt Macy } else { 757eda14cbcSMatt Macy dmu_tx_t *tx = dmu_tx_create(os); 758eda14cbcSMatt Macy dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, size); 759eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 760eda14cbcSMatt Macy if (error) { 761eda14cbcSMatt Macy dmu_tx_abort(tx); 762eda14cbcSMatt Macy } else { 763eda14cbcSMatt Macy dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 764f8b1db88SMartin Matuska zvol_log_write(zv, tx, off, size, commit); 765eda14cbcSMatt Macy dmu_tx_commit(tx); 766eda14cbcSMatt Macy } 767eda14cbcSMatt Macy } 768eda14cbcSMatt Macy if (error) { 769e92ffd9bSMartin Matuska /* Convert checksum errors into IO errors. */ 770eda14cbcSMatt Macy if (error == ECKSUM) 771eda14cbcSMatt Macy error = SET_ERROR(EIO); 772eda14cbcSMatt Macy break; 773eda14cbcSMatt Macy } 774eda14cbcSMatt Macy off += size; 775eda14cbcSMatt Macy addr += size; 776eda14cbcSMatt Macy resid -= size; 777eda14cbcSMatt Macy } 778eda14cbcSMatt Macy unlock: 779eda14cbcSMatt Macy zfs_rangelock_exit(lr); 780eda14cbcSMatt Macy 781eda14cbcSMatt Macy bp->bio_completed = bp->bio_length - resid; 782eda14cbcSMatt Macy if (bp->bio_completed < bp->bio_length && off > volsize) 7837877fdebSMatt Macy error = SET_ERROR(EINVAL); 784eda14cbcSMatt Macy 785eda14cbcSMatt Macy switch (bp->bio_cmd) { 786eda14cbcSMatt Macy case BIO_FLUSH: 787eda14cbcSMatt Macy break; 788eda14cbcSMatt Macy case BIO_READ: 789eda14cbcSMatt Macy dataset_kstats_update_read_kstats(&zv->zv_kstat, 790eda14cbcSMatt Macy bp->bio_completed); 791eda14cbcSMatt Macy break; 792eda14cbcSMatt Macy case BIO_WRITE: 793eda14cbcSMatt Macy dataset_kstats_update_write_kstats(&zv->zv_kstat, 794eda14cbcSMatt Macy bp->bio_completed); 795eda14cbcSMatt Macy break; 796eda14cbcSMatt Macy case BIO_DELETE: 797eda14cbcSMatt Macy break; 798eda14cbcSMatt Macy default: 799eda14cbcSMatt Macy break; 800eda14cbcSMatt Macy } 801eda14cbcSMatt Macy 802f8b1db88SMartin Matuska if (commit) { 803f8b1db88SMartin Matuska commit: 804eda14cbcSMatt Macy zil_commit(zv->zv_zilog, ZVOL_OBJ); 805eda14cbcSMatt Macy } 806eda14cbcSMatt Macy resume: 807eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 808eda14cbcSMatt Macy out: 809eda14cbcSMatt Macy if (bp->bio_to) 810eda14cbcSMatt Macy g_io_deliver(bp, error); 811eda14cbcSMatt Macy else 812eda14cbcSMatt Macy biofinish(bp, NULL, error); 813eda14cbcSMatt Macy } 814eda14cbcSMatt Macy 815eda14cbcSMatt Macy /* 816eda14cbcSMatt Macy * Character device mode implementation 817eda14cbcSMatt Macy */ 818eda14cbcSMatt Macy 819eda14cbcSMatt Macy static int 820184c1b94SMartin Matuska zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) 821eda14cbcSMatt Macy { 822eda14cbcSMatt Macy zvol_state_t *zv; 823eda14cbcSMatt Macy uint64_t volsize; 824eda14cbcSMatt Macy zfs_locked_range_t *lr; 825eda14cbcSMatt Macy int error = 0; 826184c1b94SMartin Matuska zfs_uio_t uio; 827184c1b94SMartin Matuska 828184c1b94SMartin Matuska zfs_uio_init(&uio, uio_s); 829eda14cbcSMatt Macy 830eda14cbcSMatt Macy zv = dev->si_drv2; 831eda14cbcSMatt Macy 832eda14cbcSMatt Macy volsize = zv->zv_volsize; 833eda14cbcSMatt Macy /* 834eda14cbcSMatt Macy * uio_loffset == volsize isn't an error as 83516038816SMartin Matuska * it's required for EOF processing. 836eda14cbcSMatt Macy */ 837184c1b94SMartin Matuska if (zfs_uio_resid(&uio) > 0 && 838184c1b94SMartin Matuska (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize)) 839eda14cbcSMatt Macy return (SET_ERROR(EIO)); 840eda14cbcSMatt Macy 841e639e0d2SMartin Matuska rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 84216038816SMartin Matuska ssize_t start_resid = zfs_uio_resid(&uio); 843184c1b94SMartin Matuska lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), 844184c1b94SMartin Matuska zfs_uio_resid(&uio), RL_READER); 845184c1b94SMartin Matuska while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) { 846184c1b94SMartin Matuska uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1); 847eda14cbcSMatt Macy 848e92ffd9bSMartin Matuska /* Don't read past the end. */ 849184c1b94SMartin Matuska if (bytes > volsize - zfs_uio_offset(&uio)) 850184c1b94SMartin Matuska bytes = volsize - zfs_uio_offset(&uio); 851eda14cbcSMatt Macy 852184c1b94SMartin Matuska error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes); 853eda14cbcSMatt Macy if (error) { 854e92ffd9bSMartin Matuska /* Convert checksum errors into IO errors. */ 855eda14cbcSMatt Macy if (error == ECKSUM) 856eda14cbcSMatt Macy error = SET_ERROR(EIO); 857eda14cbcSMatt Macy break; 858eda14cbcSMatt Macy } 859eda14cbcSMatt Macy } 860eda14cbcSMatt Macy zfs_rangelock_exit(lr); 86116038816SMartin Matuska int64_t nread = start_resid - zfs_uio_resid(&uio); 86216038816SMartin Matuska dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); 863e639e0d2SMartin Matuska rw_exit(&zv->zv_suspend_lock); 864eda14cbcSMatt Macy 865eda14cbcSMatt Macy return (error); 866eda14cbcSMatt Macy } 867eda14cbcSMatt Macy 868eda14cbcSMatt Macy static int 869184c1b94SMartin Matuska zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) 870eda14cbcSMatt Macy { 871eda14cbcSMatt Macy zvol_state_t *zv; 872eda14cbcSMatt Macy uint64_t volsize; 873eda14cbcSMatt Macy zfs_locked_range_t *lr; 874eda14cbcSMatt Macy int error = 0; 875f8b1db88SMartin Matuska boolean_t commit; 876184c1b94SMartin Matuska zfs_uio_t uio; 877eda14cbcSMatt Macy 878eda14cbcSMatt Macy zv = dev->si_drv2; 879eda14cbcSMatt Macy 880eda14cbcSMatt Macy volsize = zv->zv_volsize; 881eda14cbcSMatt Macy 882184c1b94SMartin Matuska zfs_uio_init(&uio, uio_s); 883184c1b94SMartin Matuska 884184c1b94SMartin Matuska if (zfs_uio_resid(&uio) > 0 && 885184c1b94SMartin Matuska (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize)) 886eda14cbcSMatt Macy return (SET_ERROR(EIO)); 887eda14cbcSMatt Macy 88816038816SMartin Matuska ssize_t start_resid = zfs_uio_resid(&uio); 889f8b1db88SMartin Matuska commit = (ioflag & IO_SYNC) || 890eda14cbcSMatt Macy (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 891eda14cbcSMatt Macy 892eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 893eda14cbcSMatt Macy zvol_ensure_zilog(zv); 894eda14cbcSMatt Macy 895184c1b94SMartin Matuska lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), 896184c1b94SMartin Matuska zfs_uio_resid(&uio), RL_WRITER); 897184c1b94SMartin Matuska while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) { 898184c1b94SMartin Matuska uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1); 899184c1b94SMartin Matuska uint64_t off = zfs_uio_offset(&uio); 900eda14cbcSMatt Macy dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 901eda14cbcSMatt Macy 902e92ffd9bSMartin Matuska if (bytes > volsize - off) /* Don't write past the end. */ 903eda14cbcSMatt Macy bytes = volsize - off; 904eda14cbcSMatt Macy 905eda14cbcSMatt Macy dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes); 906eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 907eda14cbcSMatt Macy if (error) { 908eda14cbcSMatt Macy dmu_tx_abort(tx); 909eda14cbcSMatt Macy break; 910eda14cbcSMatt Macy } 911184c1b94SMartin Matuska error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx); 912eda14cbcSMatt Macy if (error == 0) 913f8b1db88SMartin Matuska zvol_log_write(zv, tx, off, bytes, commit); 914eda14cbcSMatt Macy dmu_tx_commit(tx); 915eda14cbcSMatt Macy 916eda14cbcSMatt Macy if (error) 917eda14cbcSMatt Macy break; 918eda14cbcSMatt Macy } 919eda14cbcSMatt Macy zfs_rangelock_exit(lr); 92016038816SMartin Matuska int64_t nwritten = start_resid - zfs_uio_resid(&uio); 92116038816SMartin Matuska dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); 922f8b1db88SMartin Matuska if (commit) 923eda14cbcSMatt Macy zil_commit(zv->zv_zilog, ZVOL_OBJ); 924eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 925*7a7741afSMartin Matuska 926eda14cbcSMatt Macy return (error); 927eda14cbcSMatt Macy } 928eda14cbcSMatt Macy 929eda14cbcSMatt Macy static int 930eda14cbcSMatt Macy zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 931eda14cbcSMatt Macy { 932eda14cbcSMatt Macy zvol_state_t *zv; 933eda14cbcSMatt Macy int err = 0; 9347877fdebSMatt Macy boolean_t drop_suspend = B_FALSE; 935eda14cbcSMatt Macy 9367877fdebSMatt Macy retry: 937eda14cbcSMatt Macy rw_enter(&zvol_state_lock, ZVOL_RW_READER); 938e92ffd9bSMartin Matuska /* 939e92ffd9bSMartin Matuska * Obtain a copy of si_drv2 under zvol_state_lock to make sure either 940e92ffd9bSMartin Matuska * the result of zvol free code setting si_drv2 to NULL is observed, 941e92ffd9bSMartin Matuska * or the zv is protected from being freed because of the positive 942e92ffd9bSMartin Matuska * zv_open_count. 943e92ffd9bSMartin Matuska */ 944eda14cbcSMatt Macy zv = dev->si_drv2; 945eda14cbcSMatt Macy if (zv == NULL) { 946eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 9477877fdebSMatt Macy err = SET_ERROR(ENXIO); 9487877fdebSMatt Macy goto out_locked; 949eda14cbcSMatt Macy } 950eda14cbcSMatt Macy 951eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 952e92ffd9bSMartin Matuska if (zv->zv_zso->zso_dying) { 953e92ffd9bSMartin Matuska rw_exit(&zvol_state_lock); 954e92ffd9bSMartin Matuska err = SET_ERROR(ENXIO); 955e92ffd9bSMartin Matuska goto out_zv_locked; 956e92ffd9bSMartin Matuska } 9577877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV); 958eda14cbcSMatt Macy 959eda14cbcSMatt Macy /* 960e92ffd9bSMartin Matuska * Make sure zvol is not suspended during first open 961eda14cbcSMatt Macy * (hold zv_suspend_lock) and respect proper lock acquisition 962e92ffd9bSMartin Matuska * ordering - zv_suspend_lock before zv_state_lock. 963eda14cbcSMatt Macy */ 964eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 9657877fdebSMatt Macy drop_suspend = B_TRUE; 966eda14cbcSMatt Macy if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { 967eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 968eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 969eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 970e92ffd9bSMartin Matuska /* Check to see if zv_suspend_lock is needed. */ 971eda14cbcSMatt Macy if (zv->zv_open_count != 0) { 972eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 973eda14cbcSMatt Macy drop_suspend = B_FALSE; 974eda14cbcSMatt Macy } 975eda14cbcSMatt Macy } 976eda14cbcSMatt Macy } 977eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 978eda14cbcSMatt Macy 979eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 980eda14cbcSMatt Macy 981eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 982e92ffd9bSMartin Matuska boolean_t drop_namespace = B_FALSE; 983e92ffd9bSMartin Matuska 984eda14cbcSMatt Macy ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); 985e92ffd9bSMartin Matuska 986e92ffd9bSMartin Matuska /* 987e92ffd9bSMartin Matuska * Take spa_namespace_lock to prevent lock inversion when 988e92ffd9bSMartin Matuska * zvols from one pool are opened as vdevs in another. 989e92ffd9bSMartin Matuska */ 990e92ffd9bSMartin Matuska if (!mutex_owned(&spa_namespace_lock)) { 991e92ffd9bSMartin Matuska if (!mutex_tryenter(&spa_namespace_lock)) { 99247e46b11SRyan Moeller mutex_exit(&zv->zv_state_lock); 99347e46b11SRyan Moeller rw_exit(&zv->zv_suspend_lock); 99475e1fea6SMartin Matuska drop_suspend = B_FALSE; 995e92ffd9bSMartin Matuska kern_yield(PRI_USER); 996e92ffd9bSMartin Matuska goto retry; 997e92ffd9bSMartin Matuska } else { 998e92ffd9bSMartin Matuska drop_namespace = B_TRUE; 999e92ffd9bSMartin Matuska } 1000e92ffd9bSMartin Matuska } 1001eda14cbcSMatt Macy err = zvol_first_open(zv, !(flags & FWRITE)); 1002e92ffd9bSMartin Matuska if (drop_namespace) 1003e92ffd9bSMartin Matuska mutex_exit(&spa_namespace_lock); 1004eda14cbcSMatt Macy if (err) 10057877fdebSMatt Macy goto out_zv_locked; 1006eda14cbcSMatt Macy } 1007eda14cbcSMatt Macy 1008e92ffd9bSMartin Matuska ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 1009e92ffd9bSMartin Matuska 1010eda14cbcSMatt Macy if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 10117877fdebSMatt Macy err = SET_ERROR(EROFS); 1012eda14cbcSMatt Macy goto out_opened; 1013eda14cbcSMatt Macy } 1014eda14cbcSMatt Macy if (zv->zv_flags & ZVOL_EXCL) { 10157877fdebSMatt Macy err = SET_ERROR(EBUSY); 1016eda14cbcSMatt Macy goto out_opened; 1017eda14cbcSMatt Macy } 1018716fd348SMartin Matuska if (flags & O_EXCL) { 1019eda14cbcSMatt Macy if (zv->zv_open_count != 0) { 10207877fdebSMatt Macy err = SET_ERROR(EBUSY); 1021eda14cbcSMatt Macy goto out_opened; 1022eda14cbcSMatt Macy } 1023eda14cbcSMatt Macy zv->zv_flags |= ZVOL_EXCL; 1024eda14cbcSMatt Macy } 1025eda14cbcSMatt Macy 1026eda14cbcSMatt Macy zv->zv_open_count++; 1027eda14cbcSMatt Macy out_opened: 10287877fdebSMatt Macy if (zv->zv_open_count == 0) { 1029eda14cbcSMatt Macy zvol_last_close(zv); 10307877fdebSMatt Macy wakeup(zv); 10317877fdebSMatt Macy } 10327877fdebSMatt Macy out_zv_locked: 1033eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 10347877fdebSMatt Macy out_locked: 1035eda14cbcSMatt Macy if (drop_suspend) 1036eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 10377877fdebSMatt Macy return (err); 1038eda14cbcSMatt Macy } 1039eda14cbcSMatt Macy 1040eda14cbcSMatt Macy static int 1041eda14cbcSMatt Macy zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) 1042eda14cbcSMatt Macy { 1043eda14cbcSMatt Macy zvol_state_t *zv; 1044eda14cbcSMatt Macy boolean_t drop_suspend = B_TRUE; 1045eda14cbcSMatt Macy 1046eda14cbcSMatt Macy rw_enter(&zvol_state_lock, ZVOL_RW_READER); 1047eda14cbcSMatt Macy zv = dev->si_drv2; 1048eda14cbcSMatt Macy if (zv == NULL) { 1049eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 1050eda14cbcSMatt Macy return (SET_ERROR(ENXIO)); 1051eda14cbcSMatt Macy } 1052eda14cbcSMatt Macy 1053eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 1054eda14cbcSMatt Macy if (zv->zv_flags & ZVOL_EXCL) { 10557877fdebSMatt Macy ASSERT3U(zv->zv_open_count, ==, 1); 1056eda14cbcSMatt Macy zv->zv_flags &= ~ZVOL_EXCL; 1057eda14cbcSMatt Macy } 1058eda14cbcSMatt Macy 10597877fdebSMatt Macy ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV); 1060eda14cbcSMatt Macy 1061eda14cbcSMatt Macy /* 1062eda14cbcSMatt Macy * If the open count is zero, this is a spurious close. 1063eda14cbcSMatt Macy * That indicates a bug in the kernel / DDI framework. 1064eda14cbcSMatt Macy */ 10657877fdebSMatt Macy ASSERT3U(zv->zv_open_count, >, 0); 1066eda14cbcSMatt Macy /* 1067e92ffd9bSMartin Matuska * Make sure zvol is not suspended during last close 1068eda14cbcSMatt Macy * (hold zv_suspend_lock) and respect proper lock acquisition 1069e92ffd9bSMartin Matuska * ordering - zv_suspend_lock before zv_state_lock. 1070eda14cbcSMatt Macy */ 1071eda14cbcSMatt Macy if (zv->zv_open_count == 1) { 1072eda14cbcSMatt Macy if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { 1073eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 1074eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 1075eda14cbcSMatt Macy mutex_enter(&zv->zv_state_lock); 1076e92ffd9bSMartin Matuska /* Check to see if zv_suspend_lock is needed. */ 1077eda14cbcSMatt Macy if (zv->zv_open_count != 1) { 1078eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 1079eda14cbcSMatt Macy drop_suspend = B_FALSE; 1080eda14cbcSMatt Macy } 1081eda14cbcSMatt Macy } 1082eda14cbcSMatt Macy } else { 1083eda14cbcSMatt Macy drop_suspend = B_FALSE; 1084eda14cbcSMatt Macy } 1085eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 1086eda14cbcSMatt Macy 1087eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 1088eda14cbcSMatt Macy 1089eda14cbcSMatt Macy /* 1090eda14cbcSMatt Macy * You may get multiple opens, but only one close. 1091eda14cbcSMatt Macy */ 1092eda14cbcSMatt Macy zv->zv_open_count--; 1093eda14cbcSMatt Macy 1094eda14cbcSMatt Macy if (zv->zv_open_count == 0) { 1095eda14cbcSMatt Macy ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); 1096eda14cbcSMatt Macy zvol_last_close(zv); 10977877fdebSMatt Macy wakeup(zv); 1098eda14cbcSMatt Macy } 1099eda14cbcSMatt Macy 1100eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 1101eda14cbcSMatt Macy 1102eda14cbcSMatt Macy if (drop_suspend) 1103eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 1104eda14cbcSMatt Macy return (0); 1105eda14cbcSMatt Macy } 1106eda14cbcSMatt Macy 1107eda14cbcSMatt Macy static int 1108eda14cbcSMatt Macy zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, 1109eda14cbcSMatt Macy int fflag, struct thread *td) 1110eda14cbcSMatt Macy { 1111eda14cbcSMatt Macy zvol_state_t *zv; 1112eda14cbcSMatt Macy zfs_locked_range_t *lr; 1113eda14cbcSMatt Macy off_t offset, length; 1114e92ffd9bSMartin Matuska int error; 1115eda14cbcSMatt Macy boolean_t sync; 1116eda14cbcSMatt Macy 1117eda14cbcSMatt Macy zv = dev->si_drv2; 1118eda14cbcSMatt Macy 1119eda14cbcSMatt Macy error = 0; 1120eda14cbcSMatt Macy KASSERT(zv->zv_open_count > 0, 1121eda14cbcSMatt Macy ("Device with zero access count in %s", __func__)); 1122eda14cbcSMatt Macy 1123eda14cbcSMatt Macy switch (cmd) { 1124eda14cbcSMatt Macy case DIOCGSECTORSIZE: 1125eda14cbcSMatt Macy *(uint32_t *)data = DEV_BSIZE; 1126eda14cbcSMatt Macy break; 1127eda14cbcSMatt Macy case DIOCGMEDIASIZE: 1128eda14cbcSMatt Macy *(off_t *)data = zv->zv_volsize; 1129eda14cbcSMatt Macy break; 1130eda14cbcSMatt Macy case DIOCGFLUSH: 1131eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 1132eda14cbcSMatt Macy if (zv->zv_zilog != NULL) 1133eda14cbcSMatt Macy zil_commit(zv->zv_zilog, ZVOL_OBJ); 1134eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 1135eda14cbcSMatt Macy break; 1136eda14cbcSMatt Macy case DIOCGDELETE: 1137eda14cbcSMatt Macy if (!zvol_unmap_enabled) 1138eda14cbcSMatt Macy break; 1139eda14cbcSMatt Macy 1140eda14cbcSMatt Macy offset = ((off_t *)data)[0]; 1141eda14cbcSMatt Macy length = ((off_t *)data)[1]; 1142eda14cbcSMatt Macy if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 || 1143eda14cbcSMatt Macy offset < 0 || offset >= zv->zv_volsize || 1144eda14cbcSMatt Macy length <= 0) { 1145eda14cbcSMatt Macy printf("%s: offset=%jd length=%jd\n", __func__, offset, 1146eda14cbcSMatt Macy length); 11477877fdebSMatt Macy error = SET_ERROR(EINVAL); 1148eda14cbcSMatt Macy break; 1149eda14cbcSMatt Macy } 1150eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); 1151eda14cbcSMatt Macy zvol_ensure_zilog(zv); 1152eda14cbcSMatt Macy lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, length, 1153eda14cbcSMatt Macy RL_WRITER); 1154eda14cbcSMatt Macy dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1155eda14cbcSMatt Macy error = dmu_tx_assign(tx, TXG_WAIT); 1156eda14cbcSMatt Macy if (error != 0) { 1157eda14cbcSMatt Macy sync = FALSE; 1158eda14cbcSMatt Macy dmu_tx_abort(tx); 1159eda14cbcSMatt Macy } else { 1160eda14cbcSMatt Macy sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1161f8b1db88SMartin Matuska zvol_log_truncate(zv, tx, offset, length); 1162eda14cbcSMatt Macy dmu_tx_commit(tx); 1163eda14cbcSMatt Macy error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1164eda14cbcSMatt Macy offset, length); 1165eda14cbcSMatt Macy } 1166eda14cbcSMatt Macy zfs_rangelock_exit(lr); 1167eda14cbcSMatt Macy if (sync) 1168eda14cbcSMatt Macy zil_commit(zv->zv_zilog, ZVOL_OBJ); 1169eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 1170eda14cbcSMatt Macy break; 1171eda14cbcSMatt Macy case DIOCGSTRIPESIZE: 1172eda14cbcSMatt Macy *(off_t *)data = zv->zv_volblocksize; 1173eda14cbcSMatt Macy break; 1174eda14cbcSMatt Macy case DIOCGSTRIPEOFFSET: 1175eda14cbcSMatt Macy *(off_t *)data = 0; 1176eda14cbcSMatt Macy break; 1177eda14cbcSMatt Macy case DIOCGATTR: { 1178eda14cbcSMatt Macy spa_t *spa = dmu_objset_spa(zv->zv_objset); 1179eda14cbcSMatt Macy struct diocgattr_arg *arg = (struct diocgattr_arg *)data; 1180eda14cbcSMatt Macy uint64_t refd, avail, usedobjs, availobjs; 1181eda14cbcSMatt Macy 1182eda14cbcSMatt Macy if (strcmp(arg->name, "GEOM::candelete") == 0) 1183eda14cbcSMatt Macy arg->value.i = 1; 1184eda14cbcSMatt Macy else if (strcmp(arg->name, "blocksavail") == 0) { 1185eda14cbcSMatt Macy dmu_objset_space(zv->zv_objset, &refd, &avail, 1186eda14cbcSMatt Macy &usedobjs, &availobjs); 1187eda14cbcSMatt Macy arg->value.off = avail / DEV_BSIZE; 1188eda14cbcSMatt Macy } else if (strcmp(arg->name, "blocksused") == 0) { 1189eda14cbcSMatt Macy dmu_objset_space(zv->zv_objset, &refd, &avail, 1190eda14cbcSMatt Macy &usedobjs, &availobjs); 1191eda14cbcSMatt Macy arg->value.off = refd / DEV_BSIZE; 1192eda14cbcSMatt Macy } else if (strcmp(arg->name, "poolblocksavail") == 0) { 1193eda14cbcSMatt Macy avail = metaslab_class_get_space(spa_normal_class(spa)); 1194eda14cbcSMatt Macy avail -= metaslab_class_get_alloc( 1195eda14cbcSMatt Macy spa_normal_class(spa)); 1196eda14cbcSMatt Macy arg->value.off = avail / DEV_BSIZE; 1197eda14cbcSMatt Macy } else if (strcmp(arg->name, "poolblocksused") == 0) { 1198eda14cbcSMatt Macy refd = metaslab_class_get_alloc(spa_normal_class(spa)); 1199eda14cbcSMatt Macy arg->value.off = refd / DEV_BSIZE; 1200eda14cbcSMatt Macy } else 12017877fdebSMatt Macy error = SET_ERROR(ENOIOCTL); 1202eda14cbcSMatt Macy break; 1203eda14cbcSMatt Macy } 1204eda14cbcSMatt Macy case FIOSEEKHOLE: 1205eda14cbcSMatt Macy case FIOSEEKDATA: { 1206eda14cbcSMatt Macy off_t *off = (off_t *)data; 1207eda14cbcSMatt Macy uint64_t noff; 1208eda14cbcSMatt Macy boolean_t hole; 1209eda14cbcSMatt Macy 1210eda14cbcSMatt Macy hole = (cmd == FIOSEEKHOLE); 1211eda14cbcSMatt Macy noff = *off; 12122a58b312SMartin Matuska lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, 12132a58b312SMartin Matuska RL_READER); 1214eda14cbcSMatt Macy error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); 12152a58b312SMartin Matuska zfs_rangelock_exit(lr); 1216eda14cbcSMatt Macy *off = noff; 1217eda14cbcSMatt Macy break; 1218eda14cbcSMatt Macy } 1219eda14cbcSMatt Macy default: 12207877fdebSMatt Macy error = SET_ERROR(ENOIOCTL); 1221eda14cbcSMatt Macy } 1222eda14cbcSMatt Macy 1223eda14cbcSMatt Macy return (error); 1224eda14cbcSMatt Macy } 1225eda14cbcSMatt Macy 1226eda14cbcSMatt Macy /* 1227eda14cbcSMatt Macy * Misc. helpers 1228eda14cbcSMatt Macy */ 1229eda14cbcSMatt Macy 1230eda14cbcSMatt Macy static void 1231eda14cbcSMatt Macy zvol_ensure_zilog(zvol_state_t *zv) 1232eda14cbcSMatt Macy { 1233eda14cbcSMatt Macy ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); 1234eda14cbcSMatt Macy 1235eda14cbcSMatt Macy /* 1236eda14cbcSMatt Macy * Open a ZIL if this is the first time we have written to this 1237eda14cbcSMatt Macy * zvol. We protect zv->zv_zilog with zv_suspend_lock rather 1238eda14cbcSMatt Macy * than zv_state_lock so that we don't need to acquire an 1239eda14cbcSMatt Macy * additional lock in this path. 1240eda14cbcSMatt Macy */ 1241eda14cbcSMatt Macy if (zv->zv_zilog == NULL) { 1242eda14cbcSMatt Macy if (!rw_tryupgrade(&zv->zv_suspend_lock)) { 1243eda14cbcSMatt Macy rw_exit(&zv->zv_suspend_lock); 1244eda14cbcSMatt Macy rw_enter(&zv->zv_suspend_lock, RW_WRITER); 1245eda14cbcSMatt Macy } 1246eda14cbcSMatt Macy if (zv->zv_zilog == NULL) { 1247eda14cbcSMatt Macy zv->zv_zilog = zil_open(zv->zv_objset, 1248271171e0SMartin Matuska zvol_get_data, &zv->zv_kstat.dk_zil_sums); 1249eda14cbcSMatt Macy zv->zv_flags |= ZVOL_WRITTEN_TO; 1250c03c5b1cSMartin Matuska /* replay / destroy done in zvol_os_create_minor() */ 125116038816SMartin Matuska VERIFY0(zv->zv_zilog->zl_header->zh_flags & 125216038816SMartin Matuska ZIL_REPLAY_NEEDED); 1253eda14cbcSMatt Macy } 1254eda14cbcSMatt Macy rw_downgrade(&zv->zv_suspend_lock); 1255eda14cbcSMatt Macy } 1256eda14cbcSMatt Macy } 1257eda14cbcSMatt Macy 1258c03c5b1cSMartin Matuska boolean_t 1259c03c5b1cSMartin Matuska zvol_os_is_zvol(const char *device) 1260eda14cbcSMatt Macy { 1261eda14cbcSMatt Macy return (device && strncmp(device, ZVOL_DIR, strlen(ZVOL_DIR)) == 0); 1262eda14cbcSMatt Macy } 1263eda14cbcSMatt Macy 1264c03c5b1cSMartin Matuska void 1265c03c5b1cSMartin Matuska zvol_os_rename_minor(zvol_state_t *zv, const char *newname) 1266eda14cbcSMatt Macy { 1267eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zvol_state_lock)); 1268eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 1269eda14cbcSMatt Macy 1270e92ffd9bSMartin Matuska /* Move to a new hashtable entry. */ 1271b985c9caSMartin Matuska zv->zv_hash = zvol_name_hash(newname); 1272eda14cbcSMatt Macy hlist_del(&zv->zv_hlink); 1273eda14cbcSMatt Macy hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash)); 1274eda14cbcSMatt Macy 12757877fdebSMatt Macy if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 1276eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 1277eda14cbcSMatt Macy struct g_provider *pp = zsg->zsg_provider; 1278eda14cbcSMatt Macy struct g_geom *gp; 1279eda14cbcSMatt Macy 1280eda14cbcSMatt Macy g_topology_lock(); 1281eda14cbcSMatt Macy gp = pp->geom; 12827877fdebSMatt Macy ASSERT3P(gp, !=, NULL); 1283eda14cbcSMatt Macy 1284eda14cbcSMatt Macy zsg->zsg_provider = NULL; 1285eda14cbcSMatt Macy g_wither_provider(pp, ENXIO); 1286eda14cbcSMatt Macy 1287eda14cbcSMatt Macy pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 1288eda14cbcSMatt Macy pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 1289eda14cbcSMatt Macy pp->sectorsize = DEV_BSIZE; 1290eda14cbcSMatt Macy pp->mediasize = zv->zv_volsize; 1291eda14cbcSMatt Macy pp->private = zv; 1292eda14cbcSMatt Macy zsg->zsg_provider = pp; 1293eda14cbcSMatt Macy g_error_provider(pp, 0); 1294eda14cbcSMatt Macy g_topology_unlock(); 12957877fdebSMatt Macy } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 1296eda14cbcSMatt Macy struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; 1297eda14cbcSMatt Macy struct cdev *dev; 1298eda14cbcSMatt Macy struct make_dev_args args; 1299eda14cbcSMatt Macy 1300eda14cbcSMatt Macy dev = zsd->zsd_cdev; 1301eda14cbcSMatt Macy if (dev != NULL) { 1302eda14cbcSMatt Macy destroy_dev(dev); 1303eda14cbcSMatt Macy dev = zsd->zsd_cdev = NULL; 1304eda14cbcSMatt Macy if (zv->zv_open_count > 0) { 1305eda14cbcSMatt Macy zv->zv_flags &= ~ZVOL_EXCL; 1306eda14cbcSMatt Macy zv->zv_open_count = 0; 1307eda14cbcSMatt Macy /* XXX need suspend lock but lock order */ 1308eda14cbcSMatt Macy zvol_last_close(zv); 1309eda14cbcSMatt Macy } 1310eda14cbcSMatt Macy } 1311eda14cbcSMatt Macy 1312eda14cbcSMatt Macy make_dev_args_init(&args); 1313eda14cbcSMatt Macy args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1314eda14cbcSMatt Macy args.mda_devsw = &zvol_cdevsw; 1315eda14cbcSMatt Macy args.mda_cr = NULL; 1316eda14cbcSMatt Macy args.mda_uid = UID_ROOT; 1317eda14cbcSMatt Macy args.mda_gid = GID_OPERATOR; 1318eda14cbcSMatt Macy args.mda_mode = 0640; 1319eda14cbcSMatt Macy args.mda_si_drv2 = zv; 1320eda14cbcSMatt Macy if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname) 1321eda14cbcSMatt Macy == 0) { 1322cd853791SKonstantin Belousov dev->si_iosize_max = maxphys; 1323eda14cbcSMatt Macy zsd->zsd_cdev = dev; 1324eda14cbcSMatt Macy } 1325eda14cbcSMatt Macy } 1326eda14cbcSMatt Macy strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); 132714c2e0a0SMartin Matuska dataset_kstats_rename(&zv->zv_kstat, newname); 1328eda14cbcSMatt Macy } 1329eda14cbcSMatt Macy 1330eda14cbcSMatt Macy /* 1331eda14cbcSMatt Macy * Remove minor node for the specified volume. 1332eda14cbcSMatt Macy */ 1333c03c5b1cSMartin Matuska void 1334c03c5b1cSMartin Matuska zvol_os_free(zvol_state_t *zv) 1335eda14cbcSMatt Macy { 1336eda14cbcSMatt Macy ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); 1337eda14cbcSMatt Macy ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); 13387877fdebSMatt Macy ASSERT0(zv->zv_open_count); 1339eda14cbcSMatt Macy 1340eda14cbcSMatt Macy ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 1341eda14cbcSMatt Macy 1342eda14cbcSMatt Macy rw_destroy(&zv->zv_suspend_lock); 1343eda14cbcSMatt Macy zfs_rangelock_fini(&zv->zv_rangelock); 1344eda14cbcSMatt Macy 13457877fdebSMatt Macy if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 1346eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 13477877fdebSMatt Macy struct g_provider *pp __maybe_unused = zsg->zsg_provider; 13487877fdebSMatt Macy 13497877fdebSMatt Macy ASSERT3P(pp->private, ==, NULL); 1350eda14cbcSMatt Macy 1351eda14cbcSMatt Macy g_topology_lock(); 1352eda14cbcSMatt Macy zvol_geom_destroy(zv); 1353eda14cbcSMatt Macy g_topology_unlock(); 1354eda14cbcSMatt Macy mtx_destroy(&zsg->zsg_queue_mtx); 13557877fdebSMatt Macy } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 1356eda14cbcSMatt Macy struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; 1357eda14cbcSMatt Macy struct cdev *dev = zsd->zsd_cdev; 1358eda14cbcSMatt Macy 13593f9d360cSMartin Matuska if (dev != NULL) { 13607877fdebSMatt Macy ASSERT3P(dev->si_drv2, ==, NULL); 1361eda14cbcSMatt Macy destroy_dev(dev); 1362c7046f76SMartin Matuska knlist_clear(&zsd->zsd_selinfo.si_note, 0); 1363c7046f76SMartin Matuska knlist_destroy(&zsd->zsd_selinfo.si_note); 1364eda14cbcSMatt Macy } 13653f9d360cSMartin Matuska } 1366eda14cbcSMatt Macy 1367eda14cbcSMatt Macy mutex_destroy(&zv->zv_state_lock); 1368ce4dcb97SMartin Matuska cv_destroy(&zv->zv_removing_cv); 1369eda14cbcSMatt Macy dataset_kstats_destroy(&zv->zv_kstat); 1370eda14cbcSMatt Macy kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); 1371eda14cbcSMatt Macy kmem_free(zv, sizeof (zvol_state_t)); 1372eda14cbcSMatt Macy zvol_minors--; 1373eda14cbcSMatt Macy } 1374eda14cbcSMatt Macy 1375eda14cbcSMatt Macy /* 1376eda14cbcSMatt Macy * Create a minor node (plus a whole lot more) for the specified volume. 1377eda14cbcSMatt Macy */ 1378c03c5b1cSMartin Matuska int 1379c03c5b1cSMartin Matuska zvol_os_create_minor(const char *name) 1380eda14cbcSMatt Macy { 1381eda14cbcSMatt Macy zvol_state_t *zv; 1382eda14cbcSMatt Macy objset_t *os; 1383eda14cbcSMatt Macy dmu_object_info_t *doi; 1384eda14cbcSMatt Macy uint64_t volsize; 1385eda14cbcSMatt Macy uint64_t volmode, hash; 1386eda14cbcSMatt Macy int error; 1387dbd5678dSMartin Matuska bool replayed_zil = B_FALSE; 1388eda14cbcSMatt Macy 1389eda14cbcSMatt Macy ZFS_LOG(1, "Creating ZVOL %s...", name); 1390eda14cbcSMatt Macy hash = zvol_name_hash(name); 1391eda14cbcSMatt Macy if ((zv = zvol_find_by_name_hash(name, hash, RW_NONE)) != NULL) { 1392eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zv->zv_state_lock)); 1393eda14cbcSMatt Macy mutex_exit(&zv->zv_state_lock); 1394eda14cbcSMatt Macy return (SET_ERROR(EEXIST)); 1395eda14cbcSMatt Macy } 1396eda14cbcSMatt Macy 1397eda14cbcSMatt Macy DROP_GIANT(); 13987877fdebSMatt Macy 1399eda14cbcSMatt Macy doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); 1400eda14cbcSMatt Macy 1401e92ffd9bSMartin Matuska /* Lie and say we're read-only. */ 14027877fdebSMatt Macy error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os); 1403eda14cbcSMatt Macy if (error) 1404eda14cbcSMatt Macy goto out_doi; 1405eda14cbcSMatt Macy 1406eda14cbcSMatt Macy error = dmu_object_info(os, ZVOL_OBJ, doi); 1407eda14cbcSMatt Macy if (error) 1408eda14cbcSMatt Macy goto out_dmu_objset_disown; 1409eda14cbcSMatt Macy 1410eda14cbcSMatt Macy error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 1411eda14cbcSMatt Macy if (error) 1412eda14cbcSMatt Macy goto out_dmu_objset_disown; 1413eda14cbcSMatt Macy 1414eda14cbcSMatt Macy error = dsl_prop_get_integer(name, 1415eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_VOLMODE), &volmode, NULL); 14167877fdebSMatt Macy if (error || volmode == ZFS_VOLMODE_DEFAULT) 1417eda14cbcSMatt Macy volmode = zvol_volmode; 14187877fdebSMatt Macy error = 0; 14197877fdebSMatt Macy 1420eda14cbcSMatt Macy /* 1421eda14cbcSMatt Macy * zvol_alloc equivalent ... 1422eda14cbcSMatt Macy */ 1423eda14cbcSMatt Macy zv = kmem_zalloc(sizeof (*zv), KM_SLEEP); 1424eda14cbcSMatt Macy zv->zv_hash = hash; 1425eda14cbcSMatt Macy mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); 1426ce4dcb97SMartin Matuska cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); 1427eda14cbcSMatt Macy zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP); 14287877fdebSMatt Macy zv->zv_volmode = volmode; 14297877fdebSMatt Macy if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 1430eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 1431eda14cbcSMatt Macy struct g_provider *pp; 1432eda14cbcSMatt Macy struct g_geom *gp; 1433eda14cbcSMatt Macy 1434eda14cbcSMatt Macy zsg->zsg_state = ZVOL_GEOM_UNINIT; 1435eda14cbcSMatt Macy mtx_init(&zsg->zsg_queue_mtx, "zvol", NULL, MTX_DEF); 1436eda14cbcSMatt Macy 1437eda14cbcSMatt Macy g_topology_lock(); 1438eda14cbcSMatt Macy gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 1439eda14cbcSMatt Macy gp->start = zvol_geom_bio_start; 1440eda14cbcSMatt Macy gp->access = zvol_geom_access; 1441eda14cbcSMatt Macy pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 1442eda14cbcSMatt Macy pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 1443eda14cbcSMatt Macy pp->sectorsize = DEV_BSIZE; 1444eda14cbcSMatt Macy pp->mediasize = 0; 1445eda14cbcSMatt Macy pp->private = zv; 1446eda14cbcSMatt Macy 1447eda14cbcSMatt Macy zsg->zsg_provider = pp; 1448eda14cbcSMatt Macy bioq_init(&zsg->zsg_queue); 14497877fdebSMatt Macy } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 1450eda14cbcSMatt Macy struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; 1451eda14cbcSMatt Macy struct cdev *dev; 1452eda14cbcSMatt Macy struct make_dev_args args; 1453eda14cbcSMatt Macy 1454eda14cbcSMatt Macy make_dev_args_init(&args); 1455eda14cbcSMatt Macy args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1456eda14cbcSMatt Macy args.mda_devsw = &zvol_cdevsw; 1457eda14cbcSMatt Macy args.mda_cr = NULL; 1458eda14cbcSMatt Macy args.mda_uid = UID_ROOT; 1459eda14cbcSMatt Macy args.mda_gid = GID_OPERATOR; 1460eda14cbcSMatt Macy args.mda_mode = 0640; 1461eda14cbcSMatt Macy args.mda_si_drv2 = zv; 14623f9d360cSMartin Matuska if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name) 14633f9d360cSMartin Matuska == 0) { 1464cd853791SKonstantin Belousov dev->si_iosize_max = maxphys; 1465eda14cbcSMatt Macy zsd->zsd_cdev = dev; 1466c7046f76SMartin Matuska knlist_init_sx(&zsd->zsd_selinfo.si_note, 1467c7046f76SMartin Matuska &zv->zv_state_lock); 1468eda14cbcSMatt Macy } 14693f9d360cSMartin Matuska } 1470eda14cbcSMatt Macy (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 1471eda14cbcSMatt Macy rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL); 1472eda14cbcSMatt Macy zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL); 1473eda14cbcSMatt Macy 1474eda14cbcSMatt Macy if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 1475eda14cbcSMatt Macy zv->zv_flags |= ZVOL_RDONLY; 1476eda14cbcSMatt Macy 1477eda14cbcSMatt Macy zv->zv_volblocksize = doi->doi_data_block_size; 1478eda14cbcSMatt Macy zv->zv_volsize = volsize; 1479eda14cbcSMatt Macy zv->zv_objset = os; 1480eda14cbcSMatt Macy 1481271171e0SMartin Matuska ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); 1482271171e0SMartin Matuska error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); 1483271171e0SMartin Matuska if (error) 1484271171e0SMartin Matuska goto out_dmu_objset_disown; 14859db44a8eSMartin Matuska ASSERT3P(zv->zv_zilog, ==, NULL); 1486271171e0SMartin Matuska zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); 1487eda14cbcSMatt Macy if (spa_writeable(dmu_objset_spa(os))) { 1488eda14cbcSMatt Macy if (zil_replay_disable) 1489dbd5678dSMartin Matuska replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE); 1490eda14cbcSMatt Macy else 1491dbd5678dSMartin Matuska replayed_zil = zil_replay(os, zv, zvol_replay_vector); 1492eda14cbcSMatt Macy } 1493dbd5678dSMartin Matuska if (replayed_zil) 14949db44a8eSMartin Matuska zil_close(zv->zv_zilog); 14959db44a8eSMartin Matuska zv->zv_zilog = NULL; 1496eda14cbcSMatt Macy 14977877fdebSMatt Macy /* TODO: prefetch for geom tasting */ 1498eda14cbcSMatt Macy 1499eda14cbcSMatt Macy zv->zv_objset = NULL; 1500eda14cbcSMatt Macy out_dmu_objset_disown: 1501eda14cbcSMatt Macy dmu_objset_disown(os, B_TRUE, FTAG); 1502eda14cbcSMatt Macy 15037877fdebSMatt Macy if (error == 0 && volmode == ZFS_VOLMODE_GEOM) { 1504eda14cbcSMatt Macy zvol_geom_run(zv); 1505eda14cbcSMatt Macy g_topology_unlock(); 1506eda14cbcSMatt Macy } 1507eda14cbcSMatt Macy out_doi: 1508eda14cbcSMatt Macy kmem_free(doi, sizeof (dmu_object_info_t)); 1509eda14cbcSMatt Macy if (error == 0) { 1510eda14cbcSMatt Macy rw_enter(&zvol_state_lock, RW_WRITER); 1511eda14cbcSMatt Macy zvol_insert(zv); 1512eda14cbcSMatt Macy zvol_minors++; 1513eda14cbcSMatt Macy rw_exit(&zvol_state_lock); 1514eda14cbcSMatt Macy ZFS_LOG(1, "ZVOL %s created.", name); 15157877fdebSMatt Macy } 1516eda14cbcSMatt Macy PICKUP_GIANT(); 1517eda14cbcSMatt Macy return (error); 1518eda14cbcSMatt Macy } 1519eda14cbcSMatt Macy 1520c03c5b1cSMartin Matuska void 1521c03c5b1cSMartin Matuska zvol_os_clear_private(zvol_state_t *zv) 1522eda14cbcSMatt Macy { 1523eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zvol_state_lock)); 15247877fdebSMatt Macy if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 1525eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 1526eda14cbcSMatt Macy struct g_provider *pp = zsg->zsg_provider; 1527eda14cbcSMatt Macy 15287877fdebSMatt Macy if (pp->private == NULL) /* already cleared */ 1529eda14cbcSMatt Macy return; 1530eda14cbcSMatt Macy 1531eda14cbcSMatt Macy mtx_lock(&zsg->zsg_queue_mtx); 1532eda14cbcSMatt Macy zsg->zsg_state = ZVOL_GEOM_STOPPED; 1533eda14cbcSMatt Macy pp->private = NULL; 1534eda14cbcSMatt Macy wakeup_one(&zsg->zsg_queue); 1535eda14cbcSMatt Macy while (zsg->zsg_state != ZVOL_GEOM_RUNNING) 15367877fdebSMatt Macy msleep(&zsg->zsg_state, &zsg->zsg_queue_mtx, 1537eda14cbcSMatt Macy 0, "zvol:w", 0); 1538eda14cbcSMatt Macy mtx_unlock(&zsg->zsg_queue_mtx); 1539eda14cbcSMatt Macy ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); 15407877fdebSMatt Macy } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 15417877fdebSMatt Macy struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; 15427877fdebSMatt Macy struct cdev *dev = zsd->zsd_cdev; 15437877fdebSMatt Macy 15443f9d360cSMartin Matuska if (dev != NULL) 15457877fdebSMatt Macy dev->si_drv2 = NULL; 1546eda14cbcSMatt Macy } 1547eda14cbcSMatt Macy } 1548eda14cbcSMatt Macy 1549c03c5b1cSMartin Matuska int 1550c03c5b1cSMartin Matuska zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) 1551eda14cbcSMatt Macy { 1552eda14cbcSMatt Macy zv->zv_volsize = volsize; 15537877fdebSMatt Macy if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 1554eda14cbcSMatt Macy struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; 1555eda14cbcSMatt Macy struct g_provider *pp = zsg->zsg_provider; 1556eda14cbcSMatt Macy 1557eda14cbcSMatt Macy g_topology_lock(); 1558eda14cbcSMatt Macy 15597877fdebSMatt Macy if (pp->private == NULL) { 15607877fdebSMatt Macy g_topology_unlock(); 15617877fdebSMatt Macy return (SET_ERROR(ENXIO)); 15627877fdebSMatt Macy } 15637877fdebSMatt Macy 1564eda14cbcSMatt Macy /* 1565eda14cbcSMatt Macy * Do not invoke resize event when initial size was zero. 1566eda14cbcSMatt Macy * ZVOL initializes the size on first open, this is not 1567eda14cbcSMatt Macy * real resizing. 1568eda14cbcSMatt Macy */ 1569eda14cbcSMatt Macy if (pp->mediasize == 0) 1570eda14cbcSMatt Macy pp->mediasize = zv->zv_volsize; 1571eda14cbcSMatt Macy else 1572eda14cbcSMatt Macy g_resize_provider(pp, zv->zv_volsize); 1573eda14cbcSMatt Macy 1574eda14cbcSMatt Macy g_topology_unlock(); 1575c7046f76SMartin Matuska } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 1576c7046f76SMartin Matuska struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; 1577c7046f76SMartin Matuska 1578c7046f76SMartin Matuska KNOTE_UNLOCKED(&zsd->zsd_selinfo.si_note, NOTE_ATTRIB); 1579eda14cbcSMatt Macy } 1580eda14cbcSMatt Macy return (0); 1581eda14cbcSMatt Macy } 1582eda14cbcSMatt Macy 1583c03c5b1cSMartin Matuska void 1584c03c5b1cSMartin Matuska zvol_os_set_disk_ro(zvol_state_t *zv, int flags) 1585eda14cbcSMatt Macy { 1586eda14cbcSMatt Macy // XXX? set_disk_ro(zv->zv_zso->zvo_disk, flags); 1587eda14cbcSMatt Macy } 1588eda14cbcSMatt Macy 1589c03c5b1cSMartin Matuska void 1590c03c5b1cSMartin Matuska zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity) 1591eda14cbcSMatt Macy { 1592eda14cbcSMatt Macy // XXX? set_capacity(zv->zv_zso->zvo_disk, capacity); 1593eda14cbcSMatt Macy } 1594eda14cbcSMatt Macy 1595eda14cbcSMatt Macy /* 1596eda14cbcSMatt Macy * Public interfaces 1597eda14cbcSMatt Macy */ 1598eda14cbcSMatt Macy 1599eda14cbcSMatt Macy int 1600eda14cbcSMatt Macy zvol_busy(void) 1601eda14cbcSMatt Macy { 1602eda14cbcSMatt Macy return (zvol_minors != 0); 1603eda14cbcSMatt Macy } 1604eda14cbcSMatt Macy 1605eda14cbcSMatt Macy int 1606eda14cbcSMatt Macy zvol_init(void) 1607eda14cbcSMatt Macy { 1608eda14cbcSMatt Macy zvol_init_impl(); 1609eda14cbcSMatt Macy return (0); 1610eda14cbcSMatt Macy } 1611eda14cbcSMatt Macy 1612eda14cbcSMatt Macy void 1613eda14cbcSMatt Macy zvol_fini(void) 1614eda14cbcSMatt Macy { 1615eda14cbcSMatt Macy zvol_fini_impl(); 1616eda14cbcSMatt Macy } 1617