1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/debug.h> 30*0Sstevel@tonic-gate #include <sys/types.h> 31*0Sstevel@tonic-gate #include <sys/file.h> 32*0Sstevel@tonic-gate #include <sys/errno.h> 33*0Sstevel@tonic-gate #include <sys/uio.h> 34*0Sstevel@tonic-gate #include <sys/open.h> 35*0Sstevel@tonic-gate #include <sys/cred.h> 36*0Sstevel@tonic-gate #include <sys/kmem.h> 37*0Sstevel@tonic-gate #include <sys/conf.h> 38*0Sstevel@tonic-gate #include <sys/cmn_err.h> 39*0Sstevel@tonic-gate #include <sys/modctl.h> 40*0Sstevel@tonic-gate #include <sys/disp.h> 41*0Sstevel@tonic-gate #include <sys/atomic.h> 42*0Sstevel@tonic-gate #include <sys/filio.h> 43*0Sstevel@tonic-gate #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 44*0Sstevel@tonic-gate #include <sys/kstat.h> 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate #include <sys/ddi.h> 47*0Sstevel@tonic-gate #include <sys/devops.h> 48*0Sstevel@tonic-gate #include <sys/sunddi.h> 49*0Sstevel@tonic-gate #include <sys/priv_names.h> 50*0Sstevel@tonic-gate 51*0Sstevel@tonic-gate #include <sys/fssnap.h> 52*0Sstevel@tonic-gate #include <sys/fssnap_if.h> 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate /* 55*0Sstevel@tonic-gate * This module implements the file system snapshot code, which provides a 56*0Sstevel@tonic-gate * point-in-time image of a file system for the purposes of online backup. 57*0Sstevel@tonic-gate * There are essentially two parts to this project: the driver half and the 58*0Sstevel@tonic-gate * file system half. The driver half is a pseudo device driver called 59*0Sstevel@tonic-gate * "fssnap" that represents the snapshot. Each snapshot is assigned a 60*0Sstevel@tonic-gate * number that corresponds to the minor number of the device, and a control 61*0Sstevel@tonic-gate * device with a high minor number is used to initiate snapshot creation and 62*0Sstevel@tonic-gate * deletion. For all practical purposes the driver half acts like a 63*0Sstevel@tonic-gate * read-only disk device whose contents are exactly the same as the master 64*0Sstevel@tonic-gate * file system at the time the snapshot was created. 65*0Sstevel@tonic-gate * 66*0Sstevel@tonic-gate * The file system half provides interfaces necessary for performing the 67*0Sstevel@tonic-gate * file system dependent operations required to create and delete snapshots 68*0Sstevel@tonic-gate * and a special driver strategy routine that must always be used by the file 69*0Sstevel@tonic-gate * system for snapshots to work correctly. 70*0Sstevel@tonic-gate * 71*0Sstevel@tonic-gate * When a snapshot is to be created, the user utility will send an ioctl to 72*0Sstevel@tonic-gate * the control device of the driver half specifying the file system to be 73*0Sstevel@tonic-gate * snapshotted, the file descriptor of a backing-store file which is used to 74*0Sstevel@tonic-gate * hold old data before it is overwritten, and other snapshot parameters. 75*0Sstevel@tonic-gate * This ioctl is passed on to the file system specified in the original 76*0Sstevel@tonic-gate * ioctl request. The file system is expected to be able to flush 77*0Sstevel@tonic-gate * everything out to make the file system consistent and lock it to ensure 78*0Sstevel@tonic-gate * no changes occur while the snapshot is being created. It then calls 79*0Sstevel@tonic-gate * fssnap_create() to create state for a new snapshot, from which an opaque 80*0Sstevel@tonic-gate * handle is returned with the snapshot locked. Next, the file system must 81*0Sstevel@tonic-gate * populate the "candidate bitmap", which tells the snapshot code which 82*0Sstevel@tonic-gate * "chunks" should be considered for copy-on-write (a chunk is the unit of 83*0Sstevel@tonic-gate * granularity used for copy-on-write, which is independent of the device 84*0Sstevel@tonic-gate * and file system block sizes). This is typically done by scanning the 85*0Sstevel@tonic-gate * file system allocation bitmaps to determine which chunks contain 86*0Sstevel@tonic-gate * allocated blocks in the file system at the time the snapshot was created. 87*0Sstevel@tonic-gate * If a chunk has no allocated blocks, it does not need to be copied before 88*0Sstevel@tonic-gate * being written to. Once the candidate bitmap is populated with 89*0Sstevel@tonic-gate * fssnap_set_candidate(), the file system calls fssnap_create_done() to 90*0Sstevel@tonic-gate * complete the snapshot creation and unlock the snapshot. The file system 91*0Sstevel@tonic-gate * may now be unlocked and modifications to it resumed. 92*0Sstevel@tonic-gate * 93*0Sstevel@tonic-gate * Once a snapshot is created, the file system must perform all writes 94*0Sstevel@tonic-gate * through a special strategy routine, fssnap_strategy(). This strategy 95*0Sstevel@tonic-gate * routine determines whether the chunks contained by the write must be 96*0Sstevel@tonic-gate * copied before being overwritten by consulting the candidate bitmap 97*0Sstevel@tonic-gate * described above, and the "hastrans bitmap" which tells it whether the chunk 98*0Sstevel@tonic-gate * has been copied already or not. If the chunk is a candidate but has not 99*0Sstevel@tonic-gate * been copied, it reads the old data in and adds it to a queue. The 100*0Sstevel@tonic-gate * old data can then be overwritten with the new data. An asynchronous 101*0Sstevel@tonic-gate * task queue is dispatched for each old chunk read in which writes the old 102*0Sstevel@tonic-gate * data to the backing file specified at snapshot creation time. The 103*0Sstevel@tonic-gate * backing file is a sparse file the same size as the file system that 104*0Sstevel@tonic-gate * contains the old data at the offset that data originally had in the 105*0Sstevel@tonic-gate * file system. If the queue containing in-memory chunks gets too large, 106*0Sstevel@tonic-gate * writes to the file system may be throttled by a semaphore until the 107*0Sstevel@tonic-gate * task queues have a chance to push some of the chunks to the backing file. 108*0Sstevel@tonic-gate * 109*0Sstevel@tonic-gate * With the candidate bitmap, the hastrans bitmap, the data on the master 110*0Sstevel@tonic-gate * file system, and the old data in memory and in the backing file, the 111*0Sstevel@tonic-gate * snapshot pseudo-driver can piece together the original file system 112*0Sstevel@tonic-gate * information to satisfy read requests. If the requested chunk is not a 113*0Sstevel@tonic-gate * candidate, it returns a zeroed buffer. If the chunk is a candidate but 114*0Sstevel@tonic-gate * has not been copied it reads it from the master file system. If it is a 115*0Sstevel@tonic-gate * candidate and has been copied, it either copies the data from the 116*0Sstevel@tonic-gate * in-memory queue or it reads it in from the backing file. The result is 117*0Sstevel@tonic-gate * a replication of the original file system that can be backed up, mounted, 118*0Sstevel@tonic-gate * or manipulated by other file system utilities that work on a read-only 119*0Sstevel@tonic-gate * device. 120*0Sstevel@tonic-gate * 121*0Sstevel@tonic-gate * This module is divided into three roughly logical sections: 122*0Sstevel@tonic-gate * 123*0Sstevel@tonic-gate * - The snapshot driver, which is a character/block driver 124*0Sstevel@tonic-gate * representing the snapshot itself. These routines are 125*0Sstevel@tonic-gate * prefixed with "snap_". 126*0Sstevel@tonic-gate * 127*0Sstevel@tonic-gate * - The library routines that are defined in fssnap_if.h that 128*0Sstevel@tonic-gate * are used by file systems that use this snapshot implementation. 129*0Sstevel@tonic-gate * These functions are prefixed with "fssnap_" and are called through 130*0Sstevel@tonic-gate * a function vector from the file system. 131*0Sstevel@tonic-gate * 132*0Sstevel@tonic-gate * - The helper routines used by the snapshot driver and the fssnap 133*0Sstevel@tonic-gate * library routines for managing the translation table and other 134*0Sstevel@tonic-gate * useful functions. These routines are all static and are 135*0Sstevel@tonic-gate * prefixed with either "fssnap_" or "transtbl_" if they 136*0Sstevel@tonic-gate * are specifically used for translation table activities. 137*0Sstevel@tonic-gate */ 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate static dev_info_t *fssnap_dip = NULL; 140*0Sstevel@tonic-gate static struct snapshot_id *snapshot = NULL; 141*0Sstevel@tonic-gate static struct snapshot_id snap_ctl; 142*0Sstevel@tonic-gate static int num_snapshots = 0; 143*0Sstevel@tonic-gate static kmutex_t snapshot_mutex; 144*0Sstevel@tonic-gate static char snapname[] = SNAP_NAME; 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gate /* "tunable" parameters */ 147*0Sstevel@tonic-gate static int fssnap_taskq_nthreads = FSSNAP_TASKQ_THREADS; 148*0Sstevel@tonic-gate static uint_t fssnap_max_mem_chunks = FSSNAP_MAX_MEM_CHUNKS; 149*0Sstevel@tonic-gate static int fssnap_taskq_maxtasks = FSSNAP_TASKQ_MAXTASKS; 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate /* static function prototypes */ 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate /* snapshot driver */ 154*0Sstevel@tonic-gate static int snap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 155*0Sstevel@tonic-gate static int snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 156*0Sstevel@tonic-gate static int snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 157*0Sstevel@tonic-gate static int snap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 158*0Sstevel@tonic-gate static int snap_close(dev_t dev, int flag, int otyp, cred_t *cred); 159*0Sstevel@tonic-gate static int snap_strategy(struct buf *bp); 160*0Sstevel@tonic-gate static int snap_read(dev_t dev, struct uio *uiop, cred_t *credp); 161*0Sstevel@tonic-gate static int snap_print(dev_t dev, char *str); 162*0Sstevel@tonic-gate static int snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 163*0Sstevel@tonic-gate cred_t *credp, int *rvalp); 164*0Sstevel@tonic-gate static int snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 165*0Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp); 166*0Sstevel@tonic-gate static int snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, 167*0Sstevel@tonic-gate int offset, int len, char *buffer); 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate /* fssnap interface implementations (see fssnap_if.h) */ 171*0Sstevel@tonic-gate static void fssnap_strategy_impl(void *, struct buf *); 172*0Sstevel@tonic-gate static void *fssnap_create_impl(chunknumber_t, uint_t, u_offset_t, 173*0Sstevel@tonic-gate struct vnode *, int, struct vnode **, char *, u_offset_t); 174*0Sstevel@tonic-gate static void fssnap_set_candidate_impl(void *, chunknumber_t); 175*0Sstevel@tonic-gate static int fssnap_is_candidate_impl(void *, u_offset_t); 176*0Sstevel@tonic-gate static int fssnap_create_done_impl(void *); 177*0Sstevel@tonic-gate static int fssnap_delete_impl(void *); 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate /* fssnap interface support routines */ 180*0Sstevel@tonic-gate static int fssnap_translate(struct snapshot_id **, struct buf *); 181*0Sstevel@tonic-gate static void fssnap_write_taskq(void *); 182*0Sstevel@tonic-gate static void fssnap_create_kstats(snapshot_id_t *, int, const char *, 183*0Sstevel@tonic-gate const char *); 184*0Sstevel@tonic-gate static int fssnap_update_kstat_num(kstat_t *, int); 185*0Sstevel@tonic-gate static void fssnap_delete_kstats(struct cow_info *); 186*0Sstevel@tonic-gate 187*0Sstevel@tonic-gate /* translation table prototypes */ 188*0Sstevel@tonic-gate static cow_map_node_t *transtbl_add(cow_map_t *, chunknumber_t, caddr_t); 189*0Sstevel@tonic-gate static cow_map_node_t *transtbl_get(cow_map_t *, chunknumber_t); 190*0Sstevel@tonic-gate static void transtbl_delete(cow_map_t *, cow_map_node_t *); 191*0Sstevel@tonic-gate static void transtbl_free(cow_map_t *); 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate static kstat_t *fssnap_highwater_kstat; 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate /* ************************************************************************ */ 196*0Sstevel@tonic-gate 197*0Sstevel@tonic-gate /* Device and Module Structures */ 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate static struct cb_ops snap_cb_ops = { 200*0Sstevel@tonic-gate snap_open, 201*0Sstevel@tonic-gate snap_close, 202*0Sstevel@tonic-gate snap_strategy, 203*0Sstevel@tonic-gate snap_print, 204*0Sstevel@tonic-gate nodev, /* no snap_dump */ 205*0Sstevel@tonic-gate snap_read, 206*0Sstevel@tonic-gate nodev, /* no snap_write */ 207*0Sstevel@tonic-gate snap_ioctl, 208*0Sstevel@tonic-gate nodev, /* no snap_devmap */ 209*0Sstevel@tonic-gate nodev, /* no snap_mmap */ 210*0Sstevel@tonic-gate nodev, /* no snap_segmap */ 211*0Sstevel@tonic-gate nochpoll, 212*0Sstevel@tonic-gate snap_prop_op, 213*0Sstevel@tonic-gate NULL, /* streamtab */ 214*0Sstevel@tonic-gate D_64BIT | D_NEW | D_MP, /* driver compatibility */ 215*0Sstevel@tonic-gate CB_REV, 216*0Sstevel@tonic-gate nodev, /* async I/O read entry point */ 217*0Sstevel@tonic-gate nodev /* async I/O write entry point */ 218*0Sstevel@tonic-gate }; 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate static struct dev_ops snap_ops = { 221*0Sstevel@tonic-gate DEVO_REV, 222*0Sstevel@tonic-gate 0, /* ref count */ 223*0Sstevel@tonic-gate snap_getinfo, 224*0Sstevel@tonic-gate nulldev, /* snap_identify obsolete */ 225*0Sstevel@tonic-gate nulldev, /* no snap_probe */ 226*0Sstevel@tonic-gate snap_attach, 227*0Sstevel@tonic-gate snap_detach, 228*0Sstevel@tonic-gate nodev, /* no snap_reset */ 229*0Sstevel@tonic-gate &snap_cb_ops, 230*0Sstevel@tonic-gate (struct bus_ops *)NULL, 231*0Sstevel@tonic-gate nulldev /* no snap_power() */ 232*0Sstevel@tonic-gate }; 233*0Sstevel@tonic-gate 234*0Sstevel@tonic-gate extern struct mod_ops mod_driverops; 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate static struct modldrv md = { 237*0Sstevel@tonic-gate &mod_driverops, /* Type of module. This is a driver */ 238*0Sstevel@tonic-gate "snapshot driver %I%", /* Name of the module */ 239*0Sstevel@tonic-gate &snap_ops, 240*0Sstevel@tonic-gate }; 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate static struct modlinkage ml = { 243*0Sstevel@tonic-gate MODREV_1, 244*0Sstevel@tonic-gate &md, 245*0Sstevel@tonic-gate NULL 246*0Sstevel@tonic-gate }; 247*0Sstevel@tonic-gate 248*0Sstevel@tonic-gate static void *statep; 249*0Sstevel@tonic-gate 250*0Sstevel@tonic-gate int 251*0Sstevel@tonic-gate _init(void) 252*0Sstevel@tonic-gate { 253*0Sstevel@tonic-gate int error; 254*0Sstevel@tonic-gate kstat_t *ksp; 255*0Sstevel@tonic-gate kstat_named_t *ksdata; 256*0Sstevel@tonic-gate 257*0Sstevel@tonic-gate error = ddi_soft_state_init(&statep, sizeof (struct snapshot_id *), 1); 258*0Sstevel@tonic-gate if (error) { 259*0Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to init ddi_soft_state."); 260*0Sstevel@tonic-gate return (error); 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate 263*0Sstevel@tonic-gate error = mod_install(&ml); 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate if (error) { 266*0Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to mod_install."); 267*0Sstevel@tonic-gate ddi_soft_state_fini(&statep); 268*0Sstevel@tonic-gate return (error); 269*0Sstevel@tonic-gate } 270*0Sstevel@tonic-gate 271*0Sstevel@tonic-gate /* 272*0Sstevel@tonic-gate * Fill in the snapshot operations vector for file systems 273*0Sstevel@tonic-gate * (defined in fssnap_if.c) 274*0Sstevel@tonic-gate */ 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gate snapops.fssnap_create = fssnap_create_impl; 277*0Sstevel@tonic-gate snapops.fssnap_set_candidate = fssnap_set_candidate_impl; 278*0Sstevel@tonic-gate snapops.fssnap_is_candidate = fssnap_is_candidate_impl; 279*0Sstevel@tonic-gate snapops.fssnap_create_done = fssnap_create_done_impl; 280*0Sstevel@tonic-gate snapops.fssnap_delete = fssnap_delete_impl; 281*0Sstevel@tonic-gate snapops.fssnap_strategy = fssnap_strategy_impl; 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate mutex_init(&snapshot_mutex, NULL, MUTEX_DEFAULT, NULL); 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate /* 286*0Sstevel@tonic-gate * Initialize the fssnap highwater kstat 287*0Sstevel@tonic-gate */ 288*0Sstevel@tonic-gate ksp = kstat_create(snapname, 0, FSSNAP_KSTAT_HIGHWATER, "misc", 289*0Sstevel@tonic-gate KSTAT_TYPE_NAMED, 1, 0); 290*0Sstevel@tonic-gate if (ksp != NULL) { 291*0Sstevel@tonic-gate ksdata = (kstat_named_t *)ksp->ks_data; 292*0Sstevel@tonic-gate kstat_named_init(ksdata, FSSNAP_KSTAT_HIGHWATER, 293*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 294*0Sstevel@tonic-gate ksdata->value.ui32 = 0; 295*0Sstevel@tonic-gate kstat_install(ksp); 296*0Sstevel@tonic-gate } else { 297*0Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to create highwater kstat."); 298*0Sstevel@tonic-gate } 299*0Sstevel@tonic-gate fssnap_highwater_kstat = ksp; 300*0Sstevel@tonic-gate 301*0Sstevel@tonic-gate return (0); 302*0Sstevel@tonic-gate } 303*0Sstevel@tonic-gate 304*0Sstevel@tonic-gate int 305*0Sstevel@tonic-gate _info(struct modinfo *modinfop) 306*0Sstevel@tonic-gate { 307*0Sstevel@tonic-gate return (mod_info(&ml, modinfop)); 308*0Sstevel@tonic-gate } 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate int 311*0Sstevel@tonic-gate _fini(void) 312*0Sstevel@tonic-gate { 313*0Sstevel@tonic-gate int error; 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate error = mod_remove(&ml); 316*0Sstevel@tonic-gate if (error) 317*0Sstevel@tonic-gate return (error); 318*0Sstevel@tonic-gate ddi_soft_state_fini(&statep); 319*0Sstevel@tonic-gate 320*0Sstevel@tonic-gate /* 321*0Sstevel@tonic-gate * delete the fssnap highwater kstat 322*0Sstevel@tonic-gate */ 323*0Sstevel@tonic-gate kstat_delete(fssnap_highwater_kstat); 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate mutex_destroy(&snapshot_mutex); 326*0Sstevel@tonic-gate 327*0Sstevel@tonic-gate /* Clear out the file system operations vector */ 328*0Sstevel@tonic-gate snapops.fssnap_create = NULL; 329*0Sstevel@tonic-gate snapops.fssnap_set_candidate = NULL; 330*0Sstevel@tonic-gate snapops.fssnap_create_done = NULL; 331*0Sstevel@tonic-gate snapops.fssnap_delete = NULL; 332*0Sstevel@tonic-gate snapops.fssnap_strategy = NULL; 333*0Sstevel@tonic-gate 334*0Sstevel@tonic-gate return (0); 335*0Sstevel@tonic-gate } 336*0Sstevel@tonic-gate 337*0Sstevel@tonic-gate /* ************************************************************************ */ 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate /* 340*0Sstevel@tonic-gate * Snapshot Driver Routines 341*0Sstevel@tonic-gate * 342*0Sstevel@tonic-gate * This section implements the snapshot character and block drivers. The 343*0Sstevel@tonic-gate * device will appear to be a consistent read-only file system to 344*0Sstevel@tonic-gate * applications that wish to back it up or mount it. The snapshot driver 345*0Sstevel@tonic-gate * communicates with the file system through the translation table, which 346*0Sstevel@tonic-gate * tells the snapshot driver where to find the data necessary to piece 347*0Sstevel@tonic-gate * together the frozen file system. The data may either be on the master 348*0Sstevel@tonic-gate * device (no translation exists), in memory (a translation exists but has 349*0Sstevel@tonic-gate * not been flushed to the backing store), or in the backing store file. 350*0Sstevel@tonic-gate * The read request may require the snapshot driver to retreive data from 351*0Sstevel@tonic-gate * several different places and piece it together to look like a single 352*0Sstevel@tonic-gate * contiguous read. 353*0Sstevel@tonic-gate * 354*0Sstevel@tonic-gate * The device minor number corresponds to the snapshot number in the list of 355*0Sstevel@tonic-gate * snapshot identifiers. The soft state for each minor number is simply a 356*0Sstevel@tonic-gate * pointer to the snapshot id, which holds all of the snapshot state. One 357*0Sstevel@tonic-gate * minor number is designated as the control device. All snapshot create 358*0Sstevel@tonic-gate * and delete requests go through the control device to ensure this module 359*0Sstevel@tonic-gate * is properly loaded and attached before the file system starts calling 360*0Sstevel@tonic-gate * routines defined here. 361*0Sstevel@tonic-gate */ 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate /* 365*0Sstevel@tonic-gate * snap_getinfo() - snapshot driver getinfo(9E) routine 366*0Sstevel@tonic-gate * 367*0Sstevel@tonic-gate */ 368*0Sstevel@tonic-gate /*ARGSUSED*/ 369*0Sstevel@tonic-gate static int 370*0Sstevel@tonic-gate snap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 371*0Sstevel@tonic-gate { 372*0Sstevel@tonic-gate switch (infocmd) { 373*0Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 374*0Sstevel@tonic-gate *result = fssnap_dip; 375*0Sstevel@tonic-gate return (DDI_SUCCESS); 376*0Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 377*0Sstevel@tonic-gate *result = 0; /* we only have one instance */ 378*0Sstevel@tonic-gate return (DDI_SUCCESS); 379*0Sstevel@tonic-gate } 380*0Sstevel@tonic-gate return (DDI_FAILURE); 381*0Sstevel@tonic-gate } 382*0Sstevel@tonic-gate 383*0Sstevel@tonic-gate /* 384*0Sstevel@tonic-gate * snap_attach() - snapshot driver attach(9E) routine 385*0Sstevel@tonic-gate * 386*0Sstevel@tonic-gate * sets up snapshot control device and control state. The control state 387*0Sstevel@tonic-gate * is a pointer to an "anonymous" snapshot_id for tracking opens and closes 388*0Sstevel@tonic-gate */ 389*0Sstevel@tonic-gate static int 390*0Sstevel@tonic-gate snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 391*0Sstevel@tonic-gate { 392*0Sstevel@tonic-gate int error; 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate switch (cmd) { 395*0Sstevel@tonic-gate case DDI_ATTACH: 396*0Sstevel@tonic-gate /* create the control device */ 397*0Sstevel@tonic-gate error = ddi_create_priv_minor_node(dip, SNAP_CTL_NODE, S_IFCHR, 398*0Sstevel@tonic-gate SNAP_CTL_MINOR, DDI_PSEUDO, PRIVONLY_DEV, 399*0Sstevel@tonic-gate PRIV_SYS_CONFIG, PRIV_SYS_CONFIG, 0666); 400*0Sstevel@tonic-gate if (error == DDI_FAILURE) { 401*0Sstevel@tonic-gate return (DDI_FAILURE); 402*0Sstevel@tonic-gate } 403*0Sstevel@tonic-gate 404*0Sstevel@tonic-gate rw_init(&snap_ctl.sid_rwlock, NULL, RW_DEFAULT, NULL); 405*0Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 406*0Sstevel@tonic-gate fssnap_dip = dip; 407*0Sstevel@tonic-gate snap_ctl.sid_snapnumber = SNAP_CTL_MINOR; 408*0Sstevel@tonic-gate /* the control sid is not linked into the snapshot list */ 409*0Sstevel@tonic-gate snap_ctl.sid_next = NULL; 410*0Sstevel@tonic-gate snap_ctl.sid_cowinfo = NULL; 411*0Sstevel@tonic-gate snap_ctl.sid_flags = 0; 412*0Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 413*0Sstevel@tonic-gate ddi_report_dev(dip); 414*0Sstevel@tonic-gate 415*0Sstevel@tonic-gate return (DDI_SUCCESS); 416*0Sstevel@tonic-gate case DDI_PM_RESUME: 417*0Sstevel@tonic-gate return (DDI_SUCCESS); 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate case DDI_RESUME: 420*0Sstevel@tonic-gate return (DDI_SUCCESS); 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate default: 423*0Sstevel@tonic-gate return (DDI_FAILURE); 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate } 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate /* 428*0Sstevel@tonic-gate * snap_detach() - snapshot driver detach(9E) routine 429*0Sstevel@tonic-gate * 430*0Sstevel@tonic-gate * destroys snapshot control device and control state. If any snapshots 431*0Sstevel@tonic-gate * are active (ie. num_snapshots != 0), the device will refuse to detach. 432*0Sstevel@tonic-gate */ 433*0Sstevel@tonic-gate static int 434*0Sstevel@tonic-gate snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 435*0Sstevel@tonic-gate { 436*0Sstevel@tonic-gate struct snapshot_id *sidp, *sidnextp; 437*0Sstevel@tonic-gate 438*0Sstevel@tonic-gate switch (cmd) { 439*0Sstevel@tonic-gate case DDI_DETACH: 440*0Sstevel@tonic-gate /* do not detach if the device is active */ 441*0Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 442*0Sstevel@tonic-gate if ((num_snapshots != 0) || 443*0Sstevel@tonic-gate ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0)) { 444*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 445*0Sstevel@tonic-gate return (DDI_FAILURE); 446*0Sstevel@tonic-gate } 447*0Sstevel@tonic-gate 448*0Sstevel@tonic-gate /* free up the snapshot list */ 449*0Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 450*0Sstevel@tonic-gate ASSERT(SID_AVAILABLE(sidp) && 451*0Sstevel@tonic-gate !RW_LOCK_HELD(&sidp->sid_rwlock)); 452*0Sstevel@tonic-gate sidnextp = sidp->sid_next; 453*0Sstevel@tonic-gate rw_destroy(&sidp->sid_rwlock); 454*0Sstevel@tonic-gate kmem_free(sidp, sizeof (struct snapshot_id)); 455*0Sstevel@tonic-gate } 456*0Sstevel@tonic-gate snapshot = NULL; 457*0Sstevel@tonic-gate 458*0Sstevel@tonic-gate /* delete the control device */ 459*0Sstevel@tonic-gate ddi_remove_minor_node(dip, SNAP_CTL_NODE); 460*0Sstevel@tonic-gate fssnap_dip = NULL; 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate ASSERT((snap_ctl.sid_flags & SID_CHAR_BUSY) == 0); 463*0Sstevel@tonic-gate rw_destroy(&snap_ctl.sid_rwlock); 464*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 465*0Sstevel@tonic-gate 466*0Sstevel@tonic-gate return (DDI_SUCCESS); 467*0Sstevel@tonic-gate 468*0Sstevel@tonic-gate default: 469*0Sstevel@tonic-gate return (DDI_FAILURE); 470*0Sstevel@tonic-gate } 471*0Sstevel@tonic-gate } 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gate /* 474*0Sstevel@tonic-gate * snap_open() - snapshot driver open(9E) routine 475*0Sstevel@tonic-gate * 476*0Sstevel@tonic-gate * marks the snapshot id as busy so it will not be recycled when deleted 477*0Sstevel@tonic-gate * until the snapshot is closed. 478*0Sstevel@tonic-gate */ 479*0Sstevel@tonic-gate /* ARGSUSED */ 480*0Sstevel@tonic-gate static int 481*0Sstevel@tonic-gate snap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 482*0Sstevel@tonic-gate { 483*0Sstevel@tonic-gate minor_t minor; 484*0Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* snapshots are read-only */ 487*0Sstevel@tonic-gate if (flag & FWRITE) 488*0Sstevel@tonic-gate return (EROFS); 489*0Sstevel@tonic-gate 490*0Sstevel@tonic-gate minor = getminor(*devp); 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 493*0Sstevel@tonic-gate /* control device must be opened exclusively */ 494*0Sstevel@tonic-gate if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) 495*0Sstevel@tonic-gate return (EINVAL); 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 498*0Sstevel@tonic-gate if ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0) { 499*0Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 500*0Sstevel@tonic-gate return (EBUSY); 501*0Sstevel@tonic-gate } 502*0Sstevel@tonic-gate 503*0Sstevel@tonic-gate snap_ctl.sid_flags |= SID_CHAR_BUSY; 504*0Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 505*0Sstevel@tonic-gate 506*0Sstevel@tonic-gate return (0); 507*0Sstevel@tonic-gate } 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 510*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) 511*0Sstevel@tonic-gate return (ENXIO); 512*0Sstevel@tonic-gate sidp = *sidpp; 513*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 514*0Sstevel@tonic-gate 515*0Sstevel@tonic-gate if ((flag & FEXCL) && SID_BUSY(sidp)) { 516*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 517*0Sstevel@tonic-gate return (EAGAIN); 518*0Sstevel@tonic-gate } 519*0Sstevel@tonic-gate 520*0Sstevel@tonic-gate ASSERT(sidpp != NULL && sidp != NULL); 521*0Sstevel@tonic-gate /* check to see if this snapshot has been killed on us */ 522*0Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 523*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_open: snapshot %d does not exist.", 524*0Sstevel@tonic-gate minor); 525*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 526*0Sstevel@tonic-gate return (ENXIO); 527*0Sstevel@tonic-gate } 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate switch (otyp) { 530*0Sstevel@tonic-gate case OTYP_CHR: 531*0Sstevel@tonic-gate sidp->sid_flags |= SID_CHAR_BUSY; 532*0Sstevel@tonic-gate break; 533*0Sstevel@tonic-gate case OTYP_BLK: 534*0Sstevel@tonic-gate sidp->sid_flags |= SID_BLOCK_BUSY; 535*0Sstevel@tonic-gate break; 536*0Sstevel@tonic-gate default: 537*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 538*0Sstevel@tonic-gate return (EINVAL); 539*0Sstevel@tonic-gate } 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate /* 544*0Sstevel@tonic-gate * at this point if a valid snapshot was found then it has 545*0Sstevel@tonic-gate * been marked busy and we can use it. 546*0Sstevel@tonic-gate */ 547*0Sstevel@tonic-gate return (0); 548*0Sstevel@tonic-gate } 549*0Sstevel@tonic-gate 550*0Sstevel@tonic-gate /* 551*0Sstevel@tonic-gate * snap_close() - snapshot driver close(9E) routine 552*0Sstevel@tonic-gate * 553*0Sstevel@tonic-gate * unsets the busy bits in the snapshot id. If the snapshot has been 554*0Sstevel@tonic-gate * deleted while the snapshot device was open, the close call will clean 555*0Sstevel@tonic-gate * up the remaining state information. 556*0Sstevel@tonic-gate */ 557*0Sstevel@tonic-gate /* ARGSUSED */ 558*0Sstevel@tonic-gate static int 559*0Sstevel@tonic-gate snap_close(dev_t dev, int flag, int otyp, cred_t *cred) 560*0Sstevel@tonic-gate { 561*0Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 562*0Sstevel@tonic-gate minor_t minor; 563*0Sstevel@tonic-gate char name[20]; 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate minor = getminor(dev); 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gate /* if this is the control device, close it and return */ 568*0Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 569*0Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 570*0Sstevel@tonic-gate snap_ctl.sid_flags &= ~(SID_CHAR_BUSY); 571*0Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 572*0Sstevel@tonic-gate return (0); 573*0Sstevel@tonic-gate } 574*0Sstevel@tonic-gate 575*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 576*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 577*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_close: could not find state for " 578*0Sstevel@tonic-gate "snapshot %d.", minor); 579*0Sstevel@tonic-gate return (ENXIO); 580*0Sstevel@tonic-gate } 581*0Sstevel@tonic-gate sidp = *sidpp; 582*0Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 583*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 584*0Sstevel@tonic-gate 585*0Sstevel@tonic-gate /* Mark the snapshot as not being busy anymore */ 586*0Sstevel@tonic-gate switch (otyp) { 587*0Sstevel@tonic-gate case OTYP_CHR: 588*0Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CHAR_BUSY); 589*0Sstevel@tonic-gate break; 590*0Sstevel@tonic-gate case OTYP_BLK: 591*0Sstevel@tonic-gate sidp->sid_flags &= ~(SID_BLOCK_BUSY); 592*0Sstevel@tonic-gate break; 593*0Sstevel@tonic-gate default: 594*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 595*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 596*0Sstevel@tonic-gate return (EINVAL); 597*0Sstevel@tonic-gate } 598*0Sstevel@tonic-gate 599*0Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 600*0Sstevel@tonic-gate /* 601*0Sstevel@tonic-gate * if this is the last close on a snapshot that has been 602*0Sstevel@tonic-gate * deleted, then free up the soft state. The snapdelete 603*0Sstevel@tonic-gate * ioctl does not free this when the device is in use so 604*0Sstevel@tonic-gate * we do it here after the last reference goes away. 605*0Sstevel@tonic-gate */ 606*0Sstevel@tonic-gate 607*0Sstevel@tonic-gate /* remove the device nodes */ 608*0Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 609*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 610*0Sstevel@tonic-gate sidp->sid_snapnumber); 611*0Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 612*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 613*0Sstevel@tonic-gate sidp->sid_snapnumber); 614*0Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 615*0Sstevel@tonic-gate 616*0Sstevel@tonic-gate /* delete the state structure */ 617*0Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 618*0Sstevel@tonic-gate num_snapshots--; 619*0Sstevel@tonic-gate } 620*0Sstevel@tonic-gate 621*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 622*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 623*0Sstevel@tonic-gate 624*0Sstevel@tonic-gate return (0); 625*0Sstevel@tonic-gate } 626*0Sstevel@tonic-gate 627*0Sstevel@tonic-gate /* 628*0Sstevel@tonic-gate * snap_read() - snapshot driver read(9E) routine 629*0Sstevel@tonic-gate * 630*0Sstevel@tonic-gate * reads data from the snapshot by calling snap_strategy() through physio() 631*0Sstevel@tonic-gate */ 632*0Sstevel@tonic-gate /* ARGSUSED */ 633*0Sstevel@tonic-gate static int 634*0Sstevel@tonic-gate snap_read(dev_t dev, struct uio *uiop, cred_t *credp) 635*0Sstevel@tonic-gate { 636*0Sstevel@tonic-gate minor_t minor; 637*0Sstevel@tonic-gate struct snapshot_id **sidpp; 638*0Sstevel@tonic-gate 639*0Sstevel@tonic-gate minor = getminor(dev); 640*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 641*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 642*0Sstevel@tonic-gate cmn_err(CE_WARN, 643*0Sstevel@tonic-gate "snap_read: could not find state for snapshot %d.", minor); 644*0Sstevel@tonic-gate return (ENXIO); 645*0Sstevel@tonic-gate } 646*0Sstevel@tonic-gate return (physio(snap_strategy, NULL, dev, B_READ, minphys, uiop)); 647*0Sstevel@tonic-gate } 648*0Sstevel@tonic-gate 649*0Sstevel@tonic-gate /* 650*0Sstevel@tonic-gate * snap_strategy() - snapshot driver strategy(9E) routine 651*0Sstevel@tonic-gate * 652*0Sstevel@tonic-gate * cycles through each chunk in the requested buffer and calls 653*0Sstevel@tonic-gate * snap_getchunk() on each chunk to retrieve it from the appropriate 654*0Sstevel@tonic-gate * place. Once all of the parts are put together the requested buffer 655*0Sstevel@tonic-gate * is returned. The snapshot driver is read-only, so a write is invalid. 656*0Sstevel@tonic-gate */ 657*0Sstevel@tonic-gate static int 658*0Sstevel@tonic-gate snap_strategy(struct buf *bp) 659*0Sstevel@tonic-gate { 660*0Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 661*0Sstevel@tonic-gate minor_t minor; 662*0Sstevel@tonic-gate chunknumber_t chunk; 663*0Sstevel@tonic-gate int off, len; 664*0Sstevel@tonic-gate u_longlong_t reqptr; 665*0Sstevel@tonic-gate int error = 0; 666*0Sstevel@tonic-gate size_t chunksz; 667*0Sstevel@tonic-gate caddr_t buf; 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate /* snapshot device is read-only */ 670*0Sstevel@tonic-gate if (bp->b_flags & B_WRITE) { 671*0Sstevel@tonic-gate bioerror(bp, EROFS); 672*0Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 673*0Sstevel@tonic-gate biodone(bp); 674*0Sstevel@tonic-gate return (0); 675*0Sstevel@tonic-gate } 676*0Sstevel@tonic-gate 677*0Sstevel@tonic-gate minor = getminor(bp->b_edev); 678*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 679*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 680*0Sstevel@tonic-gate cmn_err(CE_WARN, 681*0Sstevel@tonic-gate "snap_strategy: could not find state for snapshot %d.", 682*0Sstevel@tonic-gate minor); 683*0Sstevel@tonic-gate bioerror(bp, ENXIO); 684*0Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 685*0Sstevel@tonic-gate biodone(bp); 686*0Sstevel@tonic-gate return (0); 687*0Sstevel@tonic-gate } 688*0Sstevel@tonic-gate sidp = *sidpp; 689*0Sstevel@tonic-gate ASSERT(sidp); 690*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 691*0Sstevel@tonic-gate 692*0Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 693*0Sstevel@tonic-gate bioerror(bp, ENXIO); 694*0Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 695*0Sstevel@tonic-gate biodone(bp); 696*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 697*0Sstevel@tonic-gate return (0); 698*0Sstevel@tonic-gate } 699*0Sstevel@tonic-gate 700*0Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_PHYS)) 701*0Sstevel@tonic-gate bp_mapin(bp); 702*0Sstevel@tonic-gate 703*0Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 704*0Sstevel@tonic-gate ASSERT(bp->b_un.b_addr); 705*0Sstevel@tonic-gate buf = bp->b_un.b_addr; 706*0Sstevel@tonic-gate 707*0Sstevel@tonic-gate chunksz = sidp->sid_cowinfo->cow_map.cmap_chunksz; 708*0Sstevel@tonic-gate 709*0Sstevel@tonic-gate /* reqptr is the current DEV_BSIZE offset into the device */ 710*0Sstevel@tonic-gate /* chunk is the chunk containing reqptr */ 711*0Sstevel@tonic-gate /* len is the length of the request (in the current chunk) in bytes */ 712*0Sstevel@tonic-gate /* off is the byte offset into the current chunk */ 713*0Sstevel@tonic-gate reqptr = bp->b_lblkno; 714*0Sstevel@tonic-gate while (bp->b_resid > 0) { 715*0Sstevel@tonic-gate chunk = dbtocowchunk(&sidp->sid_cowinfo->cow_map, reqptr); 716*0Sstevel@tonic-gate off = (reqptr % (chunksz >> DEV_BSHIFT)) << DEV_BSHIFT; 717*0Sstevel@tonic-gate len = min(chunksz - off, bp->b_resid); 718*0Sstevel@tonic-gate ASSERT((off + len) <= chunksz); 719*0Sstevel@tonic-gate 720*0Sstevel@tonic-gate if ((error = snap_getchunk(sidp, chunk, off, len, buf)) != 0) { 721*0Sstevel@tonic-gate /* 722*0Sstevel@tonic-gate * EINVAL means the user tried to go out of range. 723*0Sstevel@tonic-gate * Anything else means it's likely that we're 724*0Sstevel@tonic-gate * confused. 725*0Sstevel@tonic-gate */ 726*0Sstevel@tonic-gate if (error != EINVAL) { 727*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_strategy: error " 728*0Sstevel@tonic-gate "calling snap_getchunk, chunk = %llu, " 729*0Sstevel@tonic-gate "offset = %d, len = %d, resid = %lu, " 730*0Sstevel@tonic-gate "error = %d.", 731*0Sstevel@tonic-gate chunk, off, len, bp->b_resid, error); 732*0Sstevel@tonic-gate } 733*0Sstevel@tonic-gate bioerror(bp, error); 734*0Sstevel@tonic-gate biodone(bp); 735*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 736*0Sstevel@tonic-gate return (0); 737*0Sstevel@tonic-gate } 738*0Sstevel@tonic-gate bp->b_resid -= len; 739*0Sstevel@tonic-gate reqptr += (len >> DEV_BSHIFT); 740*0Sstevel@tonic-gate buf += len; 741*0Sstevel@tonic-gate } 742*0Sstevel@tonic-gate 743*0Sstevel@tonic-gate ASSERT(bp->b_resid == 0); 744*0Sstevel@tonic-gate biodone(bp); 745*0Sstevel@tonic-gate 746*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 747*0Sstevel@tonic-gate return (0); 748*0Sstevel@tonic-gate } 749*0Sstevel@tonic-gate 750*0Sstevel@tonic-gate /* 751*0Sstevel@tonic-gate * snap_getchunk() - helper function for snap_strategy() 752*0Sstevel@tonic-gate * 753*0Sstevel@tonic-gate * gets the requested data from the appropriate place and fills in the 754*0Sstevel@tonic-gate * buffer. chunk is the chunk number of the request, offset is the 755*0Sstevel@tonic-gate * offset into that chunk and must be less than the chunk size. len is 756*0Sstevel@tonic-gate * the length of the request starting at offset, and must not exceed a 757*0Sstevel@tonic-gate * chunk boundary. buffer is the address to copy the data to. len 758*0Sstevel@tonic-gate * bytes are copied into the buffer starting at the location specified. 759*0Sstevel@tonic-gate * 760*0Sstevel@tonic-gate * A chunk is located according to the following algorithm: 761*0Sstevel@tonic-gate * - If the chunk does not have a translation or is not a candidate 762*0Sstevel@tonic-gate * for translation, it is read straight from the master device. 763*0Sstevel@tonic-gate * - If the chunk does have a translation, then it is either on 764*0Sstevel@tonic-gate * disk or in memory: 765*0Sstevel@tonic-gate * o If it is in memory the requested data is simply copied out 766*0Sstevel@tonic-gate * of the in-memory buffer. 767*0Sstevel@tonic-gate * o If it is in the backing store, it is read from there. 768*0Sstevel@tonic-gate * 769*0Sstevel@tonic-gate * This function does the real work of the snapshot driver. 770*0Sstevel@tonic-gate */ 771*0Sstevel@tonic-gate static int 772*0Sstevel@tonic-gate snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, int offset, 773*0Sstevel@tonic-gate int len, char *buffer) 774*0Sstevel@tonic-gate { 775*0Sstevel@tonic-gate cow_map_t *cmap = &sidp->sid_cowinfo->cow_map; 776*0Sstevel@tonic-gate cow_map_node_t *cmn; 777*0Sstevel@tonic-gate struct buf *snapbuf; 778*0Sstevel@tonic-gate int error = 0; 779*0Sstevel@tonic-gate char *newbuffer; 780*0Sstevel@tonic-gate int newlen = 0; 781*0Sstevel@tonic-gate int partial = 0; 782*0Sstevel@tonic-gate 783*0Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 784*0Sstevel@tonic-gate ASSERT(offset + len <= cmap->cmap_chunksz); 785*0Sstevel@tonic-gate 786*0Sstevel@tonic-gate /* 787*0Sstevel@tonic-gate * Check if the chunk number is out of range and if so bail out 788*0Sstevel@tonic-gate */ 789*0Sstevel@tonic-gate if (chunk >= (cmap->cmap_bmsize * NBBY)) { 790*0Sstevel@tonic-gate return (EINVAL); 791*0Sstevel@tonic-gate } 792*0Sstevel@tonic-gate 793*0Sstevel@tonic-gate /* 794*0Sstevel@tonic-gate * If the chunk is not a candidate for translation, then the chunk 795*0Sstevel@tonic-gate * was not allocated when the snapshot was taken. Since it does 796*0Sstevel@tonic-gate * not contain data associated with this snapshot, just return a 797*0Sstevel@tonic-gate * zero buffer instead. 798*0Sstevel@tonic-gate */ 799*0Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, chunk)) { 800*0Sstevel@tonic-gate bzero(buffer, len); 801*0Sstevel@tonic-gate return (0); 802*0Sstevel@tonic-gate } 803*0Sstevel@tonic-gate 804*0Sstevel@tonic-gate /* 805*0Sstevel@tonic-gate * if the chunk is a candidate for translation but a 806*0Sstevel@tonic-gate * translation does not exist, then read through to the 807*0Sstevel@tonic-gate * original file system. The rwlock is held until the read 808*0Sstevel@tonic-gate * completes if it hasn't been translated to make sure the 809*0Sstevel@tonic-gate * file system does not translate the block before we 810*0Sstevel@tonic-gate * access it. If it has already been translated we don't 811*0Sstevel@tonic-gate * need the lock, because the translation will never go away. 812*0Sstevel@tonic-gate */ 813*0Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_READER); 814*0Sstevel@tonic-gate if (isclr(cmap->cmap_hastrans, chunk)) { 815*0Sstevel@tonic-gate snapbuf = getrbuf(KM_SLEEP); 816*0Sstevel@tonic-gate /* 817*0Sstevel@tonic-gate * Reading into the buffer saves having to do a copy, 818*0Sstevel@tonic-gate * but gets tricky if the request size is not a 819*0Sstevel@tonic-gate * multiple of DEV_BSIZE. However, we are filling the 820*0Sstevel@tonic-gate * buffer left to right, so future reads will write 821*0Sstevel@tonic-gate * over any extra data we might have read. 822*0Sstevel@tonic-gate */ 823*0Sstevel@tonic-gate 824*0Sstevel@tonic-gate partial = len % DEV_BSIZE; 825*0Sstevel@tonic-gate 826*0Sstevel@tonic-gate snapbuf->b_bcount = len; 827*0Sstevel@tonic-gate snapbuf->b_lblkno = lbtodb(chunk * cmap->cmap_chunksz + offset); 828*0Sstevel@tonic-gate snapbuf->b_un.b_addr = buffer; 829*0Sstevel@tonic-gate 830*0Sstevel@tonic-gate snapbuf->b_iodone = NULL; 831*0Sstevel@tonic-gate snapbuf->b_proc = NULL; /* i.e. the kernel */ 832*0Sstevel@tonic-gate snapbuf->b_flags = B_READ | B_BUSY; 833*0Sstevel@tonic-gate snapbuf->b_edev = sidp->sid_fvp->v_vfsp->vfs_dev; 834*0Sstevel@tonic-gate 835*0Sstevel@tonic-gate if (partial) { 836*0Sstevel@tonic-gate /* 837*0Sstevel@tonic-gate * Partial block read in progress. 838*0Sstevel@tonic-gate * This is bad as modules further down the line 839*0Sstevel@tonic-gate * assume buf's are exact multiples of DEV_BSIZE 840*0Sstevel@tonic-gate * and we end up with fewer, or zero, bytes read. 841*0Sstevel@tonic-gate * To get round this we need to round up to the 842*0Sstevel@tonic-gate * nearest full block read and then return only 843*0Sstevel@tonic-gate * len bytes. 844*0Sstevel@tonic-gate */ 845*0Sstevel@tonic-gate newlen = (len - partial) + DEV_BSIZE; 846*0Sstevel@tonic-gate newbuffer = kmem_alloc(newlen, KM_SLEEP); 847*0Sstevel@tonic-gate 848*0Sstevel@tonic-gate snapbuf->b_bcount = newlen; 849*0Sstevel@tonic-gate snapbuf->b_un.b_addr = newbuffer; 850*0Sstevel@tonic-gate } 851*0Sstevel@tonic-gate 852*0Sstevel@tonic-gate (void) bdev_strategy(snapbuf); 853*0Sstevel@tonic-gate (void) biowait(snapbuf); 854*0Sstevel@tonic-gate 855*0Sstevel@tonic-gate error = geterror(snapbuf); 856*0Sstevel@tonic-gate 857*0Sstevel@tonic-gate if (partial) { 858*0Sstevel@tonic-gate /* 859*0Sstevel@tonic-gate * Partial block read. Now we need to bcopy the 860*0Sstevel@tonic-gate * correct number of bytes back into the 861*0Sstevel@tonic-gate * supplied buffer, and tidy up our temp 862*0Sstevel@tonic-gate * buffer. 863*0Sstevel@tonic-gate */ 864*0Sstevel@tonic-gate bcopy(newbuffer, buffer, len); 865*0Sstevel@tonic-gate kmem_free(newbuffer, newlen); 866*0Sstevel@tonic-gate } 867*0Sstevel@tonic-gate 868*0Sstevel@tonic-gate freerbuf(snapbuf); 869*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 870*0Sstevel@tonic-gate 871*0Sstevel@tonic-gate return (error); 872*0Sstevel@tonic-gate } 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate /* 875*0Sstevel@tonic-gate * finally, if the chunk is a candidate for translation and it 876*0Sstevel@tonic-gate * has been translated, then we clone the chunk of the buffer 877*0Sstevel@tonic-gate * that was copied aside by the file system. 878*0Sstevel@tonic-gate * The cmap_rwlock does not need to be held after we know the 879*0Sstevel@tonic-gate * data has already been copied. Once a chunk has been copied 880*0Sstevel@tonic-gate * to the backing file, it is stable read only data. 881*0Sstevel@tonic-gate */ 882*0Sstevel@tonic-gate cmn = transtbl_get(cmap, chunk); 883*0Sstevel@tonic-gate 884*0Sstevel@tonic-gate /* check whether the data is in memory or in the backing file */ 885*0Sstevel@tonic-gate if (cmn != NULL) { 886*0Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 887*0Sstevel@tonic-gate /* already in memory */ 888*0Sstevel@tonic-gate bcopy(cmn->cmn_buf + offset, buffer, len); 889*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 890*0Sstevel@tonic-gate } else { 891*0Sstevel@tonic-gate ssize_t resid = len; 892*0Sstevel@tonic-gate int bf_index; 893*0Sstevel@tonic-gate /* 894*0Sstevel@tonic-gate * can cause deadlock with writer if we don't drop the 895*0Sstevel@tonic-gate * cmap_rwlock before trying to get the backing store file 896*0Sstevel@tonic-gate * vnode rwlock. 897*0Sstevel@tonic-gate */ 898*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 899*0Sstevel@tonic-gate 900*0Sstevel@tonic-gate bf_index = chunk / cmap->cmap_chunksperbf; 901*0Sstevel@tonic-gate 902*0Sstevel@tonic-gate /* read buffer from backing file */ 903*0Sstevel@tonic-gate error = vn_rdwr(UIO_READ, 904*0Sstevel@tonic-gate (sidp->sid_cowinfo->cow_backfile_array)[bf_index], 905*0Sstevel@tonic-gate buffer, len, ((chunk % cmap->cmap_chunksperbf) * 906*0Sstevel@tonic-gate cmap->cmap_chunksz) + offset, UIO_SYSSPACE, 0, 907*0Sstevel@tonic-gate RLIM64_INFINITY, kcred, &resid); 908*0Sstevel@tonic-gate } 909*0Sstevel@tonic-gate 910*0Sstevel@tonic-gate return (error); 911*0Sstevel@tonic-gate } 912*0Sstevel@tonic-gate 913*0Sstevel@tonic-gate /* 914*0Sstevel@tonic-gate * snap_print() - snapshot driver print(9E) routine 915*0Sstevel@tonic-gate * 916*0Sstevel@tonic-gate * prints the device identification string. 917*0Sstevel@tonic-gate */ 918*0Sstevel@tonic-gate static int 919*0Sstevel@tonic-gate snap_print(dev_t dev, char *str) 920*0Sstevel@tonic-gate { 921*0Sstevel@tonic-gate struct snapshot_id **sidpp; 922*0Sstevel@tonic-gate minor_t minor; 923*0Sstevel@tonic-gate 924*0Sstevel@tonic-gate minor = getminor(dev); 925*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 926*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 927*0Sstevel@tonic-gate cmn_err(CE_WARN, 928*0Sstevel@tonic-gate "snap_print: could not find state for snapshot %d.", minor); 929*0Sstevel@tonic-gate return (ENXIO); 930*0Sstevel@tonic-gate } 931*0Sstevel@tonic-gate 932*0Sstevel@tonic-gate cmn_err(CE_NOTE, "snap_print: snapshot %d: %s", minor, str); 933*0Sstevel@tonic-gate 934*0Sstevel@tonic-gate return (0); 935*0Sstevel@tonic-gate } 936*0Sstevel@tonic-gate 937*0Sstevel@tonic-gate /* 938*0Sstevel@tonic-gate * snap_prop_op() - snapshot driver prop_op(9E) routine 939*0Sstevel@tonic-gate * 940*0Sstevel@tonic-gate * get 32-bit and 64-bit values for size (character driver) and nblocks 941*0Sstevel@tonic-gate * (block driver). 942*0Sstevel@tonic-gate */ 943*0Sstevel@tonic-gate static int 944*0Sstevel@tonic-gate snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 945*0Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp) 946*0Sstevel@tonic-gate { 947*0Sstevel@tonic-gate struct snapshot_id **sidpp; 948*0Sstevel@tonic-gate int length, km_flags; 949*0Sstevel@tonic-gate int nblocks, size; 950*0Sstevel@tonic-gate uint64_t Size, Nblocks; 951*0Sstevel@tonic-gate caddr_t buffer; 952*0Sstevel@tonic-gate int minor; 953*0Sstevel@tonic-gate dev_t mdev; 954*0Sstevel@tonic-gate 955*0Sstevel@tonic-gate minor = getminor(dev); 956*0Sstevel@tonic-gate length = *lengthp; /* Get callers length */ 957*0Sstevel@tonic-gate 958*0Sstevel@tonic-gate /* if this is the control device just check for .conf properties */ 959*0Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) 960*0Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 961*0Sstevel@tonic-gate valuep, lengthp)); 962*0Sstevel@tonic-gate /* check to see if there is a master device plumbed */ 963*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 964*0Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 965*0Sstevel@tonic-gate cmn_err(CE_WARN, 966*0Sstevel@tonic-gate "snap_prop_op: could not find state for " 967*0Sstevel@tonic-gate "snapshot %d.", minor); 968*0Sstevel@tonic-gate return (DDI_PROP_NOT_FOUND); 969*0Sstevel@tonic-gate } 970*0Sstevel@tonic-gate 971*0Sstevel@tonic-gate if (((*sidpp)->sid_fvp == NULL) || ((*sidpp)->sid_fvp->v_vfsp == NULL)) 972*0Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 973*0Sstevel@tonic-gate valuep, lengthp)); 974*0Sstevel@tonic-gate mdev = (*sidpp)->sid_fvp->v_vfsp->vfs_dev; 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate /* get size information from the master device. */ 977*0Sstevel@tonic-gate 978*0Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 979*0Sstevel@tonic-gate nblocks = bdev_size(mdev); 980*0Sstevel@tonic-gate *lengthp = sizeof (nblocks); /* Set callers length */ 981*0Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 982*0Sstevel@tonic-gate Nblocks = bdev_Size(mdev); 983*0Sstevel@tonic-gate *lengthp = sizeof (Nblocks); /* Set callers length */ 984*0Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 985*0Sstevel@tonic-gate size = cdev_size(mdev); 986*0Sstevel@tonic-gate *lengthp = sizeof (size); /* Set callers length */ 987*0Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 988*0Sstevel@tonic-gate Size = cdev_Size(mdev); 989*0Sstevel@tonic-gate *lengthp = sizeof (Size); /* Set callers length */ 990*0Sstevel@tonic-gate } else { /* not for us */ 991*0Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 992*0Sstevel@tonic-gate valuep, lengthp)); 993*0Sstevel@tonic-gate } 994*0Sstevel@tonic-gate 995*0Sstevel@tonic-gate /* 996*0Sstevel@tonic-gate * If length only request, just return the length. 997*0Sstevel@tonic-gate */ 998*0Sstevel@tonic-gate if (prop_op == PROP_LEN) { 999*0Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 1000*0Sstevel@tonic-gate } 1001*0Sstevel@tonic-gate 1002*0Sstevel@tonic-gate /* 1003*0Sstevel@tonic-gate * Allocate buffer, if required. Either way, set `buffer' variable. 1004*0Sstevel@tonic-gate */ 1005*0Sstevel@tonic-gate switch (prop_op) { 1006*0Sstevel@tonic-gate case PROP_LEN_AND_VAL_ALLOC: 1007*0Sstevel@tonic-gate 1008*0Sstevel@tonic-gate km_flags = KM_NOSLEEP; 1009*0Sstevel@tonic-gate 1010*0Sstevel@tonic-gate if (flags & DDI_PROP_CANSLEEP) 1011*0Sstevel@tonic-gate km_flags = KM_SLEEP; 1012*0Sstevel@tonic-gate 1013*0Sstevel@tonic-gate buffer = kmem_alloc(*lengthp, km_flags); 1014*0Sstevel@tonic-gate if (buffer == NULL) { 1015*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_get_prop: no mem for " 1016*0Sstevel@tonic-gate "property %s.", name); 1017*0Sstevel@tonic-gate return (DDI_PROP_NO_MEMORY); 1018*0Sstevel@tonic-gate } 1019*0Sstevel@tonic-gate *(caddr_t *)valuep = buffer; /* Set callers buf ptr */ 1020*0Sstevel@tonic-gate break; 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate case PROP_LEN_AND_VAL_BUF: 1023*0Sstevel@tonic-gate 1024*0Sstevel@tonic-gate if (*lengthp > length) 1025*0Sstevel@tonic-gate return (DDI_PROP_BUF_TOO_SMALL); 1026*0Sstevel@tonic-gate 1027*0Sstevel@tonic-gate buffer = valuep; /* get callers buf ptr */ 1028*0Sstevel@tonic-gate break; 1029*0Sstevel@tonic-gate } 1030*0Sstevel@tonic-gate 1031*0Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 1032*0Sstevel@tonic-gate *((uint_t *)buffer) = nblocks; 1033*0Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 1034*0Sstevel@tonic-gate *((uint64_t *)buffer) = Nblocks; 1035*0Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 1036*0Sstevel@tonic-gate *((uint_t *)buffer) = size; 1037*0Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 1038*0Sstevel@tonic-gate *((uint64_t *)buffer) = Size; 1039*0Sstevel@tonic-gate } 1040*0Sstevel@tonic-gate 1041*0Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 1042*0Sstevel@tonic-gate } 1043*0Sstevel@tonic-gate 1044*0Sstevel@tonic-gate /* 1045*0Sstevel@tonic-gate * snap_ioctl() - snapshot driver ioctl(9E) routine 1046*0Sstevel@tonic-gate * 1047*0Sstevel@tonic-gate * only applies to the control device. The control device accepts two 1048*0Sstevel@tonic-gate * ioctl requests: create a snapshot or delete a snapshot. In either 1049*0Sstevel@tonic-gate * case, the vnode for the requested file system is extracted, and the 1050*0Sstevel@tonic-gate * request is passed on to the file system via the same ioctl. The file 1051*0Sstevel@tonic-gate * system is responsible for doing the things necessary for creating or 1052*0Sstevel@tonic-gate * destroying a snapshot, including any file system specific operations 1053*0Sstevel@tonic-gate * that must be performed as well as setting up and deleting the snapshot 1054*0Sstevel@tonic-gate * state through the fssnap interfaces. 1055*0Sstevel@tonic-gate */ 1056*0Sstevel@tonic-gate static int 1057*0Sstevel@tonic-gate snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1058*0Sstevel@tonic-gate int *rvalp) 1059*0Sstevel@tonic-gate { 1060*0Sstevel@tonic-gate minor_t minor; 1061*0Sstevel@tonic-gate int error = 0; 1062*0Sstevel@tonic-gate 1063*0Sstevel@tonic-gate minor = getminor(dev); 1064*0Sstevel@tonic-gate 1065*0Sstevel@tonic-gate if (minor != SNAP_CTL_MINOR) { 1066*0Sstevel@tonic-gate return (EINVAL); 1067*0Sstevel@tonic-gate } 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate switch (cmd) { 1070*0Sstevel@tonic-gate case _FIOSNAPSHOTCREATE: 1071*0Sstevel@tonic-gate { 1072*0Sstevel@tonic-gate struct fiosnapcreate fc; 1073*0Sstevel@tonic-gate struct file *fp; 1074*0Sstevel@tonic-gate struct vnode *vp; 1075*0Sstevel@tonic-gate 1076*0Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1077*0Sstevel@tonic-gate return (EFAULT); 1078*0Sstevel@tonic-gate 1079*0Sstevel@tonic-gate /* get vnode for file system mount point */ 1080*0Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1081*0Sstevel@tonic-gate return (EBADF); 1082*0Sstevel@tonic-gate 1083*0Sstevel@tonic-gate ASSERT(fp->f_vnode); 1084*0Sstevel@tonic-gate vp = fp->f_vnode; 1085*0Sstevel@tonic-gate VN_HOLD(vp); 1086*0Sstevel@tonic-gate releasef(fc.rootfiledesc); 1087*0Sstevel@tonic-gate 1088*0Sstevel@tonic-gate /* pass ioctl request to file system */ 1089*0Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1090*0Sstevel@tonic-gate VN_RELE(vp); 1091*0Sstevel@tonic-gate break; 1092*0Sstevel@tonic-gate } 1093*0Sstevel@tonic-gate case _FIOSNAPSHOTCREATE_MULTI: 1094*0Sstevel@tonic-gate { 1095*0Sstevel@tonic-gate struct fiosnapcreate_multi fc; 1096*0Sstevel@tonic-gate struct file *fp; 1097*0Sstevel@tonic-gate struct vnode *vp; 1098*0Sstevel@tonic-gate 1099*0Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1100*0Sstevel@tonic-gate return (EFAULT); 1101*0Sstevel@tonic-gate 1102*0Sstevel@tonic-gate /* get vnode for file system mount point */ 1103*0Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1104*0Sstevel@tonic-gate return (EBADF); 1105*0Sstevel@tonic-gate 1106*0Sstevel@tonic-gate ASSERT(fp->f_vnode); 1107*0Sstevel@tonic-gate vp = fp->f_vnode; 1108*0Sstevel@tonic-gate VN_HOLD(vp); 1109*0Sstevel@tonic-gate releasef(fc.rootfiledesc); 1110*0Sstevel@tonic-gate 1111*0Sstevel@tonic-gate /* pass ioctl request to file system */ 1112*0Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1113*0Sstevel@tonic-gate VN_RELE(vp); 1114*0Sstevel@tonic-gate break; 1115*0Sstevel@tonic-gate } 1116*0Sstevel@tonic-gate case _FIOSNAPSHOTDELETE: 1117*0Sstevel@tonic-gate { 1118*0Sstevel@tonic-gate major_t major; 1119*0Sstevel@tonic-gate struct fiosnapdelete fc; 1120*0Sstevel@tonic-gate snapshot_id_t *sidp = NULL; 1121*0Sstevel@tonic-gate snapshot_id_t *sidnextp = NULL; 1122*0Sstevel@tonic-gate struct file *fp = NULL; 1123*0Sstevel@tonic-gate struct vnode *vp = NULL; 1124*0Sstevel@tonic-gate struct vfs *vfsp = NULL; 1125*0Sstevel@tonic-gate vfsops_t *vfsops = EIO_vfsops; 1126*0Sstevel@tonic-gate 1127*0Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1128*0Sstevel@tonic-gate return (EFAULT); 1129*0Sstevel@tonic-gate 1130*0Sstevel@tonic-gate /* get vnode for file system mount point */ 1131*0Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1132*0Sstevel@tonic-gate return (EBADF); 1133*0Sstevel@tonic-gate 1134*0Sstevel@tonic-gate ASSERT(fp->f_vnode); 1135*0Sstevel@tonic-gate vp = fp->f_vnode; 1136*0Sstevel@tonic-gate VN_HOLD(vp); 1137*0Sstevel@tonic-gate releasef(fc.rootfiledesc); 1138*0Sstevel@tonic-gate /* 1139*0Sstevel@tonic-gate * Test for two formats of delete and set correct minor/vp: 1140*0Sstevel@tonic-gate * pseudo device: 1141*0Sstevel@tonic-gate * fssnap -d [/dev/fssnap/x] 1142*0Sstevel@tonic-gate * or 1143*0Sstevel@tonic-gate * mount point: 1144*0Sstevel@tonic-gate * fssnap -d [/mntpt] 1145*0Sstevel@tonic-gate * Note that minor is verified to be equal to SNAP_CTL_MINOR 1146*0Sstevel@tonic-gate * at this point which is an invalid minor number. 1147*0Sstevel@tonic-gate */ 1148*0Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 1149*0Sstevel@tonic-gate major = ddi_driver_major(fssnap_dip); 1150*0Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 1151*0Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 1152*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1153*0Sstevel@tonic-gate sidnextp = sidp->sid_next; 1154*0Sstevel@tonic-gate /* pseudo device: */ 1155*0Sstevel@tonic-gate if (major == getmajor(vp->v_rdev)) { 1156*0Sstevel@tonic-gate minor = getminor(vp->v_rdev); 1157*0Sstevel@tonic-gate if (sidp->sid_snapnumber == (uint_t)minor && 1158*0Sstevel@tonic-gate sidp->sid_fvp) { 1159*0Sstevel@tonic-gate VN_RELE(vp); 1160*0Sstevel@tonic-gate vp = sidp->sid_fvp; 1161*0Sstevel@tonic-gate VN_HOLD(vp); 1162*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1163*0Sstevel@tonic-gate break; 1164*0Sstevel@tonic-gate } 1165*0Sstevel@tonic-gate /* Mount point: */ 1166*0Sstevel@tonic-gate } else { 1167*0Sstevel@tonic-gate if (sidp->sid_fvp == vp) { 1168*0Sstevel@tonic-gate minor = sidp->sid_snapnumber; 1169*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1170*0Sstevel@tonic-gate break; 1171*0Sstevel@tonic-gate } 1172*0Sstevel@tonic-gate } 1173*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1174*0Sstevel@tonic-gate } 1175*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 1176*0Sstevel@tonic-gate /* Verify minor got set correctly above */ 1177*0Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 1178*0Sstevel@tonic-gate VN_RELE(vp); 1179*0Sstevel@tonic-gate return (EINVAL); 1180*0Sstevel@tonic-gate } 1181*0Sstevel@tonic-gate dev = makedevice(major, minor); 1182*0Sstevel@tonic-gate /* 1183*0Sstevel@tonic-gate * Create dummy vfs entry 1184*0Sstevel@tonic-gate * to use as a locking semaphore across the IOCTL 1185*0Sstevel@tonic-gate * for mount in progress cases... 1186*0Sstevel@tonic-gate */ 1187*0Sstevel@tonic-gate vfsp = kmem_alloc(sizeof (vfs_t), KM_SLEEP); 1188*0Sstevel@tonic-gate VFS_INIT(vfsp, vfsops, NULL); 1189*0Sstevel@tonic-gate vfs_addmip(dev, vfsp); 1190*0Sstevel@tonic-gate if ((vfs_devmounting(dev, vfsp)) || 1191*0Sstevel@tonic-gate (vfs_devismounted(dev))) { 1192*0Sstevel@tonic-gate vfs_delmip(vfsp); 1193*0Sstevel@tonic-gate kmem_free(vfsp, sizeof (struct vfs)); 1194*0Sstevel@tonic-gate VN_RELE(vp); 1195*0Sstevel@tonic-gate return (EBUSY); 1196*0Sstevel@tonic-gate } 1197*0Sstevel@tonic-gate /* 1198*0Sstevel@tonic-gate * Nobody mounted but do not release mount in progress lock 1199*0Sstevel@tonic-gate * until IOCTL complete to prohibit a mount sneaking 1200*0Sstevel@tonic-gate * in 1201*0Sstevel@tonic-gate */ 1202*0Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1203*0Sstevel@tonic-gate vfs_delmip(vfsp); 1204*0Sstevel@tonic-gate kmem_free(vfsp, sizeof (struct vfs)); 1205*0Sstevel@tonic-gate VN_RELE(vp); 1206*0Sstevel@tonic-gate break; 1207*0Sstevel@tonic-gate } 1208*0Sstevel@tonic-gate default: 1209*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: Invalid ioctl cmd %d, minor %d.", 1210*0Sstevel@tonic-gate cmd, minor); 1211*0Sstevel@tonic-gate return (EINVAL); 1212*0Sstevel@tonic-gate } 1213*0Sstevel@tonic-gate 1214*0Sstevel@tonic-gate return (error); 1215*0Sstevel@tonic-gate } 1216*0Sstevel@tonic-gate 1217*0Sstevel@tonic-gate 1218*0Sstevel@tonic-gate /* ************************************************************************ */ 1219*0Sstevel@tonic-gate 1220*0Sstevel@tonic-gate /* 1221*0Sstevel@tonic-gate * Translation Table Routines 1222*0Sstevel@tonic-gate * 1223*0Sstevel@tonic-gate * These support routines implement a simple doubly linked list 1224*0Sstevel@tonic-gate * to keep track of chunks that are currently in memory. The maximum 1225*0Sstevel@tonic-gate * size of the list is determined by the fssnap_max_mem_chunks variable. 1226*0Sstevel@tonic-gate * The cmap_rwlock is used to protect the linkage of the list. 1227*0Sstevel@tonic-gate */ 1228*0Sstevel@tonic-gate 1229*0Sstevel@tonic-gate /* 1230*0Sstevel@tonic-gate * transtbl_add() - add a node to the translation table 1231*0Sstevel@tonic-gate * 1232*0Sstevel@tonic-gate * allocates a new node and points it at the buffer passed in. The node 1233*0Sstevel@tonic-gate * is added to the beginning of the doubly linked list and the head of 1234*0Sstevel@tonic-gate * the list is moved. The cmap_rwlock must be held as a writer through 1235*0Sstevel@tonic-gate * this operation. 1236*0Sstevel@tonic-gate */ 1237*0Sstevel@tonic-gate static cow_map_node_t * 1238*0Sstevel@tonic-gate transtbl_add(cow_map_t *cmap, chunknumber_t chunk, caddr_t buf) 1239*0Sstevel@tonic-gate { 1240*0Sstevel@tonic-gate cow_map_node_t *cmnode; 1241*0Sstevel@tonic-gate 1242*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 1243*0Sstevel@tonic-gate 1244*0Sstevel@tonic-gate cmnode = kmem_alloc(sizeof (cow_map_node_t), KM_SLEEP); 1245*0Sstevel@tonic-gate 1246*0Sstevel@tonic-gate /* 1247*0Sstevel@tonic-gate * insert new translations at the beginning so cmn_table is always 1248*0Sstevel@tonic-gate * the first node. 1249*0Sstevel@tonic-gate */ 1250*0Sstevel@tonic-gate cmnode->cmn_chunk = chunk; 1251*0Sstevel@tonic-gate cmnode->cmn_buf = buf; 1252*0Sstevel@tonic-gate cmnode->cmn_prev = NULL; 1253*0Sstevel@tonic-gate cmnode->cmn_next = cmap->cmap_table; 1254*0Sstevel@tonic-gate if (cmnode->cmn_next) 1255*0Sstevel@tonic-gate cmnode->cmn_next->cmn_prev = cmnode; 1256*0Sstevel@tonic-gate cmap->cmap_table = cmnode; 1257*0Sstevel@tonic-gate 1258*0Sstevel@tonic-gate return (cmnode); 1259*0Sstevel@tonic-gate } 1260*0Sstevel@tonic-gate 1261*0Sstevel@tonic-gate /* 1262*0Sstevel@tonic-gate * transtbl_get() - look up a node in the translation table 1263*0Sstevel@tonic-gate * 1264*0Sstevel@tonic-gate * called by the snapshot driver to find data that has been translated. 1265*0Sstevel@tonic-gate * The lookup is done by the chunk number, and the node is returned. 1266*0Sstevel@tonic-gate * If the node was not found, NULL is returned. 1267*0Sstevel@tonic-gate */ 1268*0Sstevel@tonic-gate static cow_map_node_t * 1269*0Sstevel@tonic-gate transtbl_get(cow_map_t *cmap, chunknumber_t chunk) 1270*0Sstevel@tonic-gate { 1271*0Sstevel@tonic-gate cow_map_node_t *cmn; 1272*0Sstevel@tonic-gate 1273*0Sstevel@tonic-gate ASSERT(RW_READ_HELD(&cmap->cmap_rwlock)); 1274*0Sstevel@tonic-gate ASSERT(cmap); 1275*0Sstevel@tonic-gate 1276*0Sstevel@tonic-gate /* search the translation table */ 1277*0Sstevel@tonic-gate for (cmn = cmap->cmap_table; cmn != NULL; cmn = cmn->cmn_next) { 1278*0Sstevel@tonic-gate if (cmn->cmn_chunk == chunk) 1279*0Sstevel@tonic-gate return (cmn); 1280*0Sstevel@tonic-gate } 1281*0Sstevel@tonic-gate 1282*0Sstevel@tonic-gate /* not found */ 1283*0Sstevel@tonic-gate return (NULL); 1284*0Sstevel@tonic-gate } 1285*0Sstevel@tonic-gate 1286*0Sstevel@tonic-gate /* 1287*0Sstevel@tonic-gate * transtbl_delete() - delete a node from the translation table 1288*0Sstevel@tonic-gate * 1289*0Sstevel@tonic-gate * called when a node's data has been written out to disk. The 1290*0Sstevel@tonic-gate * cmap_rwlock must be held as a writer for this operation. If the node 1291*0Sstevel@tonic-gate * being deleted is the head of the list, then the head is moved to the 1292*0Sstevel@tonic-gate * next node. Both the node's data and the node itself are freed. 1293*0Sstevel@tonic-gate */ 1294*0Sstevel@tonic-gate static void 1295*0Sstevel@tonic-gate transtbl_delete(cow_map_t *cmap, cow_map_node_t *cmn) 1296*0Sstevel@tonic-gate { 1297*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 1298*0Sstevel@tonic-gate ASSERT(cmn); 1299*0Sstevel@tonic-gate ASSERT(cmap->cmap_table); 1300*0Sstevel@tonic-gate 1301*0Sstevel@tonic-gate /* if the head of the list is being deleted, then move the head up */ 1302*0Sstevel@tonic-gate if (cmap->cmap_table == cmn) { 1303*0Sstevel@tonic-gate ASSERT(cmn->cmn_prev == NULL); 1304*0Sstevel@tonic-gate cmap->cmap_table = cmn->cmn_next; 1305*0Sstevel@tonic-gate } 1306*0Sstevel@tonic-gate 1307*0Sstevel@tonic-gate 1308*0Sstevel@tonic-gate /* make previous node's next pointer skip over current node */ 1309*0Sstevel@tonic-gate if (cmn->cmn_prev != NULL) { 1310*0Sstevel@tonic-gate ASSERT(cmn->cmn_prev->cmn_next == cmn); 1311*0Sstevel@tonic-gate cmn->cmn_prev->cmn_next = cmn->cmn_next; 1312*0Sstevel@tonic-gate } 1313*0Sstevel@tonic-gate 1314*0Sstevel@tonic-gate /* make next node's previous pointer skip over current node */ 1315*0Sstevel@tonic-gate if (cmn->cmn_next != NULL) { 1316*0Sstevel@tonic-gate ASSERT(cmn->cmn_next->cmn_prev == cmn); 1317*0Sstevel@tonic-gate cmn->cmn_next->cmn_prev = cmn->cmn_prev; 1318*0Sstevel@tonic-gate } 1319*0Sstevel@tonic-gate 1320*0Sstevel@tonic-gate /* free the data and the node */ 1321*0Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 1322*0Sstevel@tonic-gate kmem_free(cmn->cmn_buf, cmap->cmap_chunksz); 1323*0Sstevel@tonic-gate kmem_free(cmn, sizeof (cow_map_node_t)); 1324*0Sstevel@tonic-gate } 1325*0Sstevel@tonic-gate 1326*0Sstevel@tonic-gate /* 1327*0Sstevel@tonic-gate * transtbl_free() - free the entire translation table 1328*0Sstevel@tonic-gate * 1329*0Sstevel@tonic-gate * called when the snapshot is deleted. This frees all of the nodes in 1330*0Sstevel@tonic-gate * the translation table (but not the bitmaps). 1331*0Sstevel@tonic-gate */ 1332*0Sstevel@tonic-gate static void 1333*0Sstevel@tonic-gate transtbl_free(cow_map_t *cmap) 1334*0Sstevel@tonic-gate { 1335*0Sstevel@tonic-gate cow_map_node_t *curnode; 1336*0Sstevel@tonic-gate cow_map_node_t *tempnode; 1337*0Sstevel@tonic-gate 1338*0Sstevel@tonic-gate for (curnode = cmap->cmap_table; curnode != NULL; curnode = tempnode) { 1339*0Sstevel@tonic-gate tempnode = curnode->cmn_next; 1340*0Sstevel@tonic-gate 1341*0Sstevel@tonic-gate kmem_free(curnode->cmn_buf, cmap->cmap_chunksz); 1342*0Sstevel@tonic-gate kmem_free(curnode, sizeof (cow_map_node_t)); 1343*0Sstevel@tonic-gate } 1344*0Sstevel@tonic-gate } 1345*0Sstevel@tonic-gate 1346*0Sstevel@tonic-gate 1347*0Sstevel@tonic-gate /* ************************************************************************ */ 1348*0Sstevel@tonic-gate 1349*0Sstevel@tonic-gate /* 1350*0Sstevel@tonic-gate * Interface Implementation Routines 1351*0Sstevel@tonic-gate * 1352*0Sstevel@tonic-gate * The following functions implement snapshot interface routines that are 1353*0Sstevel@tonic-gate * called by the file system to create, delete, and use a snapshot. The 1354*0Sstevel@tonic-gate * interfaces are defined in fssnap_if.c and are filled in by this driver 1355*0Sstevel@tonic-gate * when it is loaded. This technique allows the file system to depend on 1356*0Sstevel@tonic-gate * the interface module without having to load the full implementation and 1357*0Sstevel@tonic-gate * snapshot device drivers. 1358*0Sstevel@tonic-gate */ 1359*0Sstevel@tonic-gate 1360*0Sstevel@tonic-gate /* 1361*0Sstevel@tonic-gate * fssnap_strategy_impl() - strategy routine called by the file system 1362*0Sstevel@tonic-gate * 1363*0Sstevel@tonic-gate * called by the file system to handle copy-on-write when necessary. All 1364*0Sstevel@tonic-gate * reads and writes that the file system performs should go through this 1365*0Sstevel@tonic-gate * function. If the file system calls the underlying device's strategy 1366*0Sstevel@tonic-gate * routine without going through fssnap_strategy() (eg. by calling 1367*0Sstevel@tonic-gate * bdev_strategy()), the snapshot may not be consistent. 1368*0Sstevel@tonic-gate * 1369*0Sstevel@tonic-gate * This function starts by doing significant sanity checking to insure 1370*0Sstevel@tonic-gate * the snapshot was not deleted out from under it or deleted and then 1371*0Sstevel@tonic-gate * recreated. To do this, it checks the actual pointer passed into it 1372*0Sstevel@tonic-gate * (ie. the handle held by the file system). NOTE that the parameter is 1373*0Sstevel@tonic-gate * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is 1374*0Sstevel@tonic-gate * locked, it knows things are ok and that this snapshot is really for 1375*0Sstevel@tonic-gate * this file system. 1376*0Sstevel@tonic-gate * 1377*0Sstevel@tonic-gate * If the request is a write, fssnap_translate() is called to determine 1378*0Sstevel@tonic-gate * whether a copy-on-write is required. If it is a read, the read is 1379*0Sstevel@tonic-gate * simply passed on to the underlying device. 1380*0Sstevel@tonic-gate */ 1381*0Sstevel@tonic-gate static void 1382*0Sstevel@tonic-gate fssnap_strategy_impl(void *snapshot_id, buf_t *bp) 1383*0Sstevel@tonic-gate { 1384*0Sstevel@tonic-gate struct snapshot_id **sidpp; 1385*0Sstevel@tonic-gate struct snapshot_id *sidp; 1386*0Sstevel@tonic-gate int error; 1387*0Sstevel@tonic-gate 1388*0Sstevel@tonic-gate /* read requests are always passed through */ 1389*0Sstevel@tonic-gate if (bp->b_flags & B_READ) { 1390*0Sstevel@tonic-gate (void) bdev_strategy(bp); 1391*0Sstevel@tonic-gate return; 1392*0Sstevel@tonic-gate } 1393*0Sstevel@tonic-gate 1394*0Sstevel@tonic-gate /* 1395*0Sstevel@tonic-gate * Because we were not able to take the snapshot read lock BEFORE 1396*0Sstevel@tonic-gate * checking for a snapshot back in the file system, things may have 1397*0Sstevel@tonic-gate * drastically changed out from under us. For instance, the snapshot 1398*0Sstevel@tonic-gate * may have been deleted, deleted and recreated, or worse yet, deleted 1399*0Sstevel@tonic-gate * for this file system but now the snapshot number is in use by another 1400*0Sstevel@tonic-gate * file system. 1401*0Sstevel@tonic-gate * 1402*0Sstevel@tonic-gate * Having a pointer to the file system's snapshot id pointer allows us 1403*0Sstevel@tonic-gate * to sanity check most of this, though it assumes the file system is 1404*0Sstevel@tonic-gate * keeping track of a pointer to the snapshot_id somewhere. 1405*0Sstevel@tonic-gate */ 1406*0Sstevel@tonic-gate sidpp = (struct snapshot_id **)snapshot_id; 1407*0Sstevel@tonic-gate sidp = *sidpp; 1408*0Sstevel@tonic-gate 1409*0Sstevel@tonic-gate /* 1410*0Sstevel@tonic-gate * if this file system's snapshot was disabled, just pass the 1411*0Sstevel@tonic-gate * request through. 1412*0Sstevel@tonic-gate */ 1413*0Sstevel@tonic-gate if (sidp == NULL) { 1414*0Sstevel@tonic-gate (void) bdev_strategy(bp); 1415*0Sstevel@tonic-gate return; 1416*0Sstevel@tonic-gate } 1417*0Sstevel@tonic-gate 1418*0Sstevel@tonic-gate /* 1419*0Sstevel@tonic-gate * Once we have the reader lock the snapshot will not magically go 1420*0Sstevel@tonic-gate * away. But things may have changed on us before this so double check. 1421*0Sstevel@tonic-gate */ 1422*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1423*0Sstevel@tonic-gate 1424*0Sstevel@tonic-gate /* 1425*0Sstevel@tonic-gate * if an error was founds somewhere the DELETE flag will be 1426*0Sstevel@tonic-gate * set to indicate the snapshot should be deleted and no new 1427*0Sstevel@tonic-gate * translations should occur. 1428*0Sstevel@tonic-gate */ 1429*0Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 1430*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1431*0Sstevel@tonic-gate (void) fssnap_delete_impl(sidpp); 1432*0Sstevel@tonic-gate (void) bdev_strategy(bp); 1433*0Sstevel@tonic-gate return; 1434*0Sstevel@tonic-gate } 1435*0Sstevel@tonic-gate 1436*0Sstevel@tonic-gate /* 1437*0Sstevel@tonic-gate * If the file system is no longer pointing to the snapshot we were 1438*0Sstevel@tonic-gate * called with, then it should not attempt to translate this buffer as 1439*0Sstevel@tonic-gate * it may be going to a snapshot for a different file system. 1440*0Sstevel@tonic-gate * Even if the file system snapshot pointer is still the same, the 1441*0Sstevel@tonic-gate * snapshot may have been disabled before we got the reader lock. 1442*0Sstevel@tonic-gate */ 1443*0Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1444*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1445*0Sstevel@tonic-gate (void) bdev_strategy(bp); 1446*0Sstevel@tonic-gate return; 1447*0Sstevel@tonic-gate } 1448*0Sstevel@tonic-gate 1449*0Sstevel@tonic-gate /* 1450*0Sstevel@tonic-gate * At this point we're sure the snapshot will not go away while the 1451*0Sstevel@tonic-gate * reader lock is held, and we are reasonably certain that we are 1452*0Sstevel@tonic-gate * writing to the correct snapshot. 1453*0Sstevel@tonic-gate */ 1454*0Sstevel@tonic-gate if ((error = fssnap_translate(sidpp, bp)) != 0) { 1455*0Sstevel@tonic-gate /* 1456*0Sstevel@tonic-gate * fssnap_translate can release the reader lock if it 1457*0Sstevel@tonic-gate * has to wait for a semaphore. In this case it is possible 1458*0Sstevel@tonic-gate * for the snapshot to be deleted in this time frame. If this 1459*0Sstevel@tonic-gate * happens just sent the buf thru to the filesystems device. 1460*0Sstevel@tonic-gate */ 1461*0Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1462*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1463*0Sstevel@tonic-gate (void) bdev_strategy(bp); 1464*0Sstevel@tonic-gate return; 1465*0Sstevel@tonic-gate } 1466*0Sstevel@tonic-gate bioerror(bp, error); 1467*0Sstevel@tonic-gate biodone(bp); 1468*0Sstevel@tonic-gate } 1469*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1470*0Sstevel@tonic-gate } 1471*0Sstevel@tonic-gate 1472*0Sstevel@tonic-gate /* 1473*0Sstevel@tonic-gate * fssnap_translate() - helper function for fssnap_strategy() 1474*0Sstevel@tonic-gate * 1475*0Sstevel@tonic-gate * performs the actual copy-on-write for write requests, if required. 1476*0Sstevel@tonic-gate * This function does the real work of the file system side of things. 1477*0Sstevel@tonic-gate * 1478*0Sstevel@tonic-gate * It first checks the candidate bitmap to quickly determine whether any 1479*0Sstevel@tonic-gate * action is necessary. If the candidate bitmap indicates the chunk was 1480*0Sstevel@tonic-gate * allocated when the snapshot was created, then it checks to see whether 1481*0Sstevel@tonic-gate * a translation already exists. If a translation already exists then no 1482*0Sstevel@tonic-gate * action is required. If the chunk is a candidate for copy-on-write, 1483*0Sstevel@tonic-gate * and a translation does not already exist, then the chunk is read in 1484*0Sstevel@tonic-gate * and a node is added to the translation table. 1485*0Sstevel@tonic-gate * 1486*0Sstevel@tonic-gate * Once all of the chunks in the request range have been copied (if they 1487*0Sstevel@tonic-gate * needed to be), then the original request can be satisfied and the old 1488*0Sstevel@tonic-gate * data can be overwritten. 1489*0Sstevel@tonic-gate */ 1490*0Sstevel@tonic-gate static int 1491*0Sstevel@tonic-gate fssnap_translate(struct snapshot_id **sidpp, struct buf *wbp) 1492*0Sstevel@tonic-gate { 1493*0Sstevel@tonic-gate snapshot_id_t *sidp = *sidpp; 1494*0Sstevel@tonic-gate struct buf *oldbp; /* buffer to store old data in */ 1495*0Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 1496*0Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 1497*0Sstevel@tonic-gate cow_map_node_t *cmn; 1498*0Sstevel@tonic-gate chunknumber_t cowchunk, startchunk, endchunk; 1499*0Sstevel@tonic-gate int error; 1500*0Sstevel@tonic-gate int throttle_write = 0; 1501*0Sstevel@tonic-gate 1502*0Sstevel@tonic-gate /* make sure the snapshot is active */ 1503*0Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 1504*0Sstevel@tonic-gate 1505*0Sstevel@tonic-gate startchunk = dbtocowchunk(cmap, wbp->b_lblkno); 1506*0Sstevel@tonic-gate endchunk = dbtocowchunk(cmap, wbp->b_lblkno + 1507*0Sstevel@tonic-gate ((wbp->b_bcount-1) >> DEV_BSHIFT)); 1508*0Sstevel@tonic-gate 1509*0Sstevel@tonic-gate /* 1510*0Sstevel@tonic-gate * Do not throttle the writes of the fssnap taskq thread and 1511*0Sstevel@tonic-gate * the log roll (trans_roll) thread. Furthermore the writes to 1512*0Sstevel@tonic-gate * the on-disk log are also not subject to throttling. 1513*0Sstevel@tonic-gate * The fssnap_write_taskq thread's write can block on the throttling 1514*0Sstevel@tonic-gate * semaphore which leads to self-deadlock as this same thread 1515*0Sstevel@tonic-gate * releases the throttling semaphore after completing the IO. 1516*0Sstevel@tonic-gate * If the trans_roll thread's write is throttled then we can deadlock 1517*0Sstevel@tonic-gate * because the fssnap_taskq_thread which releases the throttling 1518*0Sstevel@tonic-gate * semaphore can block waiting for log space which can only be 1519*0Sstevel@tonic-gate * released by the trans_roll thread. 1520*0Sstevel@tonic-gate */ 1521*0Sstevel@tonic-gate 1522*0Sstevel@tonic-gate throttle_write = !(taskq_member(cowp->cow_taskq, curthread) || 1523*0Sstevel@tonic-gate tsd_get(bypass_snapshot_throttle_key)); 1524*0Sstevel@tonic-gate 1525*0Sstevel@tonic-gate /* 1526*0Sstevel@tonic-gate * Iterate through all chunks covered by this write and perform the 1527*0Sstevel@tonic-gate * copy-aside if necessary. Once all chunks have been safely 1528*0Sstevel@tonic-gate * stowed away, the new data may be written in a single sweep. 1529*0Sstevel@tonic-gate * 1530*0Sstevel@tonic-gate * For each chunk in the range, the following sequence is performed: 1531*0Sstevel@tonic-gate * - Is the chunk a candidate for translation? 1532*0Sstevel@tonic-gate * o If not, then no translation is necessary, continue 1533*0Sstevel@tonic-gate * - If it is a candidate, then does it already have a translation? 1534*0Sstevel@tonic-gate * o If so, then no translation is necessary, continue 1535*0Sstevel@tonic-gate * - If it is a candidate, but does not yet have a translation, 1536*0Sstevel@tonic-gate * then read the old data and schedule an asynchronous taskq 1537*0Sstevel@tonic-gate * to write the old data to the backing file. 1538*0Sstevel@tonic-gate * 1539*0Sstevel@tonic-gate * Once this has been performed over the entire range of chunks, then 1540*0Sstevel@tonic-gate * it is safe to overwrite the data that is there. 1541*0Sstevel@tonic-gate * 1542*0Sstevel@tonic-gate * Note that no lock is required to check the candidate bitmap because 1543*0Sstevel@tonic-gate * it never changes once the snapshot is created. The reader lock is 1544*0Sstevel@tonic-gate * taken to check the hastrans bitmap since it may change. If it 1545*0Sstevel@tonic-gate * turns out a copy is required, then the lock is upgraded to a 1546*0Sstevel@tonic-gate * writer, and the bitmap is re-checked as it may have changed while 1547*0Sstevel@tonic-gate * the lock was released. Finally, the write lock is held while 1548*0Sstevel@tonic-gate * reading the old data to make sure it is not translated out from 1549*0Sstevel@tonic-gate * under us. 1550*0Sstevel@tonic-gate * 1551*0Sstevel@tonic-gate * This locking mechanism should be sufficient to handle multiple 1552*0Sstevel@tonic-gate * threads writing to overlapping chunks simultaneously. 1553*0Sstevel@tonic-gate */ 1554*0Sstevel@tonic-gate for (cowchunk = startchunk; cowchunk <= endchunk; cowchunk++) { 1555*0Sstevel@tonic-gate /* 1556*0Sstevel@tonic-gate * If the cowchunk is outside of the range of our 1557*0Sstevel@tonic-gate * candidate maps, then simply break out of the 1558*0Sstevel@tonic-gate * loop and pass the I/O through to bdev_strategy. 1559*0Sstevel@tonic-gate * This would occur if the file system has grown 1560*0Sstevel@tonic-gate * larger since the snapshot was taken. 1561*0Sstevel@tonic-gate */ 1562*0Sstevel@tonic-gate if (cowchunk >= (cmap->cmap_bmsize * NBBY)) 1563*0Sstevel@tonic-gate break; 1564*0Sstevel@tonic-gate 1565*0Sstevel@tonic-gate /* 1566*0Sstevel@tonic-gate * If no disk blocks were allocated in this chunk when the 1567*0Sstevel@tonic-gate * snapshot was created then no copy-on-write will be 1568*0Sstevel@tonic-gate * required. Since this bitmap is read-only no locks are 1569*0Sstevel@tonic-gate * necessary. 1570*0Sstevel@tonic-gate */ 1571*0Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, cowchunk)) { 1572*0Sstevel@tonic-gate continue; 1573*0Sstevel@tonic-gate } 1574*0Sstevel@tonic-gate 1575*0Sstevel@tonic-gate /* 1576*0Sstevel@tonic-gate * If a translation already exists, the data can be written 1577*0Sstevel@tonic-gate * through since the old data has already been saved off. 1578*0Sstevel@tonic-gate */ 1579*0Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 1580*0Sstevel@tonic-gate continue; 1581*0Sstevel@tonic-gate } 1582*0Sstevel@tonic-gate 1583*0Sstevel@tonic-gate 1584*0Sstevel@tonic-gate /* 1585*0Sstevel@tonic-gate * Throttle translations if there are too many outstanding 1586*0Sstevel@tonic-gate * chunks in memory. The semaphore is sema_v'd by the taskq. 1587*0Sstevel@tonic-gate * 1588*0Sstevel@tonic-gate * You can't keep the sid_rwlock if you would go to sleep. 1589*0Sstevel@tonic-gate * This will result in deadlock when someone tries to delete 1590*0Sstevel@tonic-gate * the snapshot (wants the sid_rwlock as a writer, but can't 1591*0Sstevel@tonic-gate * get it). 1592*0Sstevel@tonic-gate */ 1593*0Sstevel@tonic-gate if (throttle_write) { 1594*0Sstevel@tonic-gate if (sema_tryp(&cmap->cmap_throttle_sem) == 0) { 1595*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1596*0Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, 1); 1597*0Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 1598*0Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, -1); 1599*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gate /* 1602*0Sstevel@tonic-gate * Now since we released the sid_rwlock the state may 1603*0Sstevel@tonic-gate * have transitioned underneath us. so check that again. 1604*0Sstevel@tonic-gate */ 1605*0Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1606*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1607*0Sstevel@tonic-gate return (ENXIO); 1608*0Sstevel@tonic-gate } 1609*0Sstevel@tonic-gate } 1610*0Sstevel@tonic-gate } 1611*0Sstevel@tonic-gate 1612*0Sstevel@tonic-gate /* 1613*0Sstevel@tonic-gate * Acquire the lock as a writer and check to see if a 1614*0Sstevel@tonic-gate * translation has been added in the meantime. 1615*0Sstevel@tonic-gate */ 1616*0Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1617*0Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 1618*0Sstevel@tonic-gate if (throttle_write) 1619*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1620*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1621*0Sstevel@tonic-gate continue; /* go to the next chunk */ 1622*0Sstevel@tonic-gate } 1623*0Sstevel@tonic-gate 1624*0Sstevel@tonic-gate /* 1625*0Sstevel@tonic-gate * read a full chunk of data from the requested offset rounded 1626*0Sstevel@tonic-gate * down to the nearest chunk size. 1627*0Sstevel@tonic-gate */ 1628*0Sstevel@tonic-gate oldbp = getrbuf(KM_SLEEP); 1629*0Sstevel@tonic-gate oldbp->b_lblkno = cowchunktodb(cmap, cowchunk); 1630*0Sstevel@tonic-gate oldbp->b_edev = wbp->b_edev; 1631*0Sstevel@tonic-gate oldbp->b_bcount = cmap->cmap_chunksz; 1632*0Sstevel@tonic-gate oldbp->b_bufsize = cmap->cmap_chunksz; 1633*0Sstevel@tonic-gate oldbp->b_iodone = NULL; 1634*0Sstevel@tonic-gate oldbp->b_proc = NULL; 1635*0Sstevel@tonic-gate oldbp->b_flags = B_READ; 1636*0Sstevel@tonic-gate oldbp->b_un.b_addr = kmem_alloc(cmap->cmap_chunksz, KM_SLEEP); 1637*0Sstevel@tonic-gate 1638*0Sstevel@tonic-gate (void) bdev_strategy(oldbp); 1639*0Sstevel@tonic-gate (void) biowait(oldbp); 1640*0Sstevel@tonic-gate 1641*0Sstevel@tonic-gate /* 1642*0Sstevel@tonic-gate * It's ok to bail in the middle of translating the range 1643*0Sstevel@tonic-gate * because the extra copy-asides will not hurt anything 1644*0Sstevel@tonic-gate * (except by using extra space in the backing store). 1645*0Sstevel@tonic-gate */ 1646*0Sstevel@tonic-gate if ((error = geterror(oldbp)) != 0) { 1647*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_translate: error reading " 1648*0Sstevel@tonic-gate "old data for snapshot %d, chunk %llu, disk block " 1649*0Sstevel@tonic-gate "%lld, size %lu, error %d.", sidp->sid_snapnumber, 1650*0Sstevel@tonic-gate cowchunk, oldbp->b_lblkno, oldbp->b_bcount, error); 1651*0Sstevel@tonic-gate kmem_free(oldbp->b_un.b_addr, cmap->cmap_chunksz); 1652*0Sstevel@tonic-gate freerbuf(oldbp); 1653*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1654*0Sstevel@tonic-gate if (throttle_write) 1655*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1656*0Sstevel@tonic-gate return (error); 1657*0Sstevel@tonic-gate } 1658*0Sstevel@tonic-gate 1659*0Sstevel@tonic-gate /* 1660*0Sstevel@tonic-gate * add the node to the translation table and save a reference 1661*0Sstevel@tonic-gate * to pass to the taskq for writing out to the backing file 1662*0Sstevel@tonic-gate */ 1663*0Sstevel@tonic-gate cmn = transtbl_add(cmap, cowchunk, oldbp->b_un.b_addr); 1664*0Sstevel@tonic-gate freerbuf(oldbp); 1665*0Sstevel@tonic-gate 1666*0Sstevel@tonic-gate /* 1667*0Sstevel@tonic-gate * Add a reference to the snapshot id so the lower level 1668*0Sstevel@tonic-gate * processing (ie. the taskq) can get back to the state 1669*0Sstevel@tonic-gate * information. 1670*0Sstevel@tonic-gate */ 1671*0Sstevel@tonic-gate cmn->cmn_sid = sidp; 1672*0Sstevel@tonic-gate cmn->release_sem = throttle_write; 1673*0Sstevel@tonic-gate setbit(cmap->cmap_hastrans, cowchunk); 1674*0Sstevel@tonic-gate 1675*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1676*0Sstevel@tonic-gate 1677*0Sstevel@tonic-gate /* 1678*0Sstevel@tonic-gate * schedule the asynchronous write to the backing file 1679*0Sstevel@tonic-gate */ 1680*0Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) 1681*0Sstevel@tonic-gate (void) taskq_dispatch(cowp->cow_taskq, 1682*0Sstevel@tonic-gate fssnap_write_taskq, cmn, TQ_SLEEP); 1683*0Sstevel@tonic-gate } 1684*0Sstevel@tonic-gate 1685*0Sstevel@tonic-gate /* 1686*0Sstevel@tonic-gate * Write new data in place of the old data. At this point all of the 1687*0Sstevel@tonic-gate * chunks touched by this write have been copied aside and so the new 1688*0Sstevel@tonic-gate * data can be written out all at once. 1689*0Sstevel@tonic-gate */ 1690*0Sstevel@tonic-gate (void) bdev_strategy(wbp); 1691*0Sstevel@tonic-gate 1692*0Sstevel@tonic-gate return (0); 1693*0Sstevel@tonic-gate } 1694*0Sstevel@tonic-gate 1695*0Sstevel@tonic-gate /* 1696*0Sstevel@tonic-gate * fssnap_write_taskq() - write in-memory translations to the backing file 1697*0Sstevel@tonic-gate * 1698*0Sstevel@tonic-gate * writes in-memory translations to the backing file asynchronously. A 1699*0Sstevel@tonic-gate * task is dispatched each time a new translation is created. The task 1700*0Sstevel@tonic-gate * writes the data to the backing file and removes it from the memory 1701*0Sstevel@tonic-gate * list. The throttling semaphore is released only if the particular 1702*0Sstevel@tonic-gate * translation was throttled in fssnap_translate. 1703*0Sstevel@tonic-gate */ 1704*0Sstevel@tonic-gate static void 1705*0Sstevel@tonic-gate fssnap_write_taskq(void *arg) 1706*0Sstevel@tonic-gate { 1707*0Sstevel@tonic-gate cow_map_node_t *cmn = (cow_map_node_t *)arg; 1708*0Sstevel@tonic-gate snapshot_id_t *sidp = cmn->cmn_sid; 1709*0Sstevel@tonic-gate cow_info_t *cowp = sidp->sid_cowinfo; 1710*0Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 1711*0Sstevel@tonic-gate int error; 1712*0Sstevel@tonic-gate int bf_index; 1713*0Sstevel@tonic-gate int release_sem = cmn->release_sem; 1714*0Sstevel@tonic-gate 1715*0Sstevel@tonic-gate /* 1716*0Sstevel@tonic-gate * The sid_rwlock does not need to be held here because the taskqs 1717*0Sstevel@tonic-gate * are destroyed explicitly by fssnap_delete (with the sid_rwlock 1718*0Sstevel@tonic-gate * held as a writer). taskq_destroy() will flush all of the tasks 1719*0Sstevel@tonic-gate * out before fssnap_delete frees up all of the structures. 1720*0Sstevel@tonic-gate */ 1721*0Sstevel@tonic-gate 1722*0Sstevel@tonic-gate /* if the snapshot was disabled from under us, drop the request. */ 1723*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1724*0Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 1725*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1726*0Sstevel@tonic-gate if (release_sem) 1727*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1728*0Sstevel@tonic-gate return; 1729*0Sstevel@tonic-gate } 1730*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1731*0Sstevel@tonic-gate 1732*0Sstevel@tonic-gate atomic_add_64((uint64_t *)&cmap->cmap_nchunks, 1); 1733*0Sstevel@tonic-gate 1734*0Sstevel@tonic-gate if ((cmap->cmap_maxsize != 0) && 1735*0Sstevel@tonic-gate ((cmap->cmap_nchunks * cmap->cmap_chunksz) > cmap->cmap_maxsize)) { 1736*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: snapshot %d (%s) has " 1737*0Sstevel@tonic-gate "reached the maximum backing file size specified (%llu " 1738*0Sstevel@tonic-gate "bytes) and will be deleted.", sidp->sid_snapnumber, 1739*0Sstevel@tonic-gate (char *)cowp->cow_kstat_mntpt->ks_data, 1740*0Sstevel@tonic-gate cmap->cmap_maxsize); 1741*0Sstevel@tonic-gate if (release_sem) 1742*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1743*0Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 1744*0Sstevel@tonic-gate return; 1745*0Sstevel@tonic-gate } 1746*0Sstevel@tonic-gate 1747*0Sstevel@tonic-gate /* perform the write */ 1748*0Sstevel@tonic-gate bf_index = cmn->cmn_chunk / cmap->cmap_chunksperbf; 1749*0Sstevel@tonic-gate 1750*0Sstevel@tonic-gate if (error = vn_rdwr(UIO_WRITE, (cowp->cow_backfile_array)[bf_index], 1751*0Sstevel@tonic-gate cmn->cmn_buf, cmap->cmap_chunksz, 1752*0Sstevel@tonic-gate (cmn->cmn_chunk % cmap->cmap_chunksperbf) * cmap->cmap_chunksz, 1753*0Sstevel@tonic-gate UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, (ssize_t *)NULL)) { 1754*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: error writing to " 1755*0Sstevel@tonic-gate "backing file. DELETING SNAPSHOT %d, backing file path " 1756*0Sstevel@tonic-gate "%s, offset %llu bytes, error %d.", sidp->sid_snapnumber, 1757*0Sstevel@tonic-gate (char *)cowp->cow_kstat_bfname->ks_data, 1758*0Sstevel@tonic-gate cmn->cmn_chunk * cmap->cmap_chunksz, error); 1759*0Sstevel@tonic-gate if (release_sem) 1760*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1761*0Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 1762*0Sstevel@tonic-gate return; 1763*0Sstevel@tonic-gate } 1764*0Sstevel@tonic-gate 1765*0Sstevel@tonic-gate /* 1766*0Sstevel@tonic-gate * now remove the node and buffer from memory 1767*0Sstevel@tonic-gate */ 1768*0Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1769*0Sstevel@tonic-gate transtbl_delete(cmap, cmn); 1770*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1771*0Sstevel@tonic-gate 1772*0Sstevel@tonic-gate /* Allow more translations */ 1773*0Sstevel@tonic-gate if (release_sem) 1774*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1775*0Sstevel@tonic-gate 1776*0Sstevel@tonic-gate } 1777*0Sstevel@tonic-gate 1778*0Sstevel@tonic-gate /* 1779*0Sstevel@tonic-gate * fssnap_create_impl() - called from the file system to create a new snapshot 1780*0Sstevel@tonic-gate * 1781*0Sstevel@tonic-gate * allocates and initializes the structures needed for a new snapshot. 1782*0Sstevel@tonic-gate * This is called by the file system when it receives an ioctl request to 1783*0Sstevel@tonic-gate * create a new snapshot. An unused snapshot identifier is either found 1784*0Sstevel@tonic-gate * or created, and eventually returned as the opaque handle the file 1785*0Sstevel@tonic-gate * system will use to identify this snapshot. The snapshot number 1786*0Sstevel@tonic-gate * associated with the snapshot identifier is the same as the minor 1787*0Sstevel@tonic-gate * number for the snapshot device that is used to access that snapshot. 1788*0Sstevel@tonic-gate * 1789*0Sstevel@tonic-gate * The snapshot can not be used until the candidate bitmap is populated 1790*0Sstevel@tonic-gate * by the file system (see fssnap_set_candidate_impl()), and the file 1791*0Sstevel@tonic-gate * system finishes the setup process by calling fssnap_create_done(). 1792*0Sstevel@tonic-gate * Nearly all of the snapshot locks are held for the duration of the 1793*0Sstevel@tonic-gate * create, and are not released until fssnap_create_done is called(). 1794*0Sstevel@tonic-gate */ 1795*0Sstevel@tonic-gate static void * 1796*0Sstevel@tonic-gate fssnap_create_impl(chunknumber_t nchunks, uint_t chunksz, u_offset_t maxsize, 1797*0Sstevel@tonic-gate struct vnode *fsvp, int backfilecount, struct vnode **bfvpp, char *backpath, 1798*0Sstevel@tonic-gate u_offset_t max_backfile_size) 1799*0Sstevel@tonic-gate { 1800*0Sstevel@tonic-gate refstr_t *mountpoint; 1801*0Sstevel@tonic-gate char taskqname[50]; 1802*0Sstevel@tonic-gate struct cow_info *cowp; 1803*0Sstevel@tonic-gate struct cow_map *cmap; 1804*0Sstevel@tonic-gate struct snapshot_id *sidp; 1805*0Sstevel@tonic-gate int lastsnap; 1806*0Sstevel@tonic-gate 1807*0Sstevel@tonic-gate /* 1808*0Sstevel@tonic-gate * Sanity check the parameters we care about 1809*0Sstevel@tonic-gate * (we don't care about the informational parameters) 1810*0Sstevel@tonic-gate */ 1811*0Sstevel@tonic-gate if ((nchunks == 0) || 1812*0Sstevel@tonic-gate ((chunksz % DEV_BSIZE) != 0) || 1813*0Sstevel@tonic-gate (bfvpp == NULL)) { 1814*0Sstevel@tonic-gate return (NULL); 1815*0Sstevel@tonic-gate } 1816*0Sstevel@tonic-gate 1817*0Sstevel@tonic-gate /* 1818*0Sstevel@tonic-gate * Look for unused snapshot identifiers. Snapshot ids are never 1819*0Sstevel@tonic-gate * freed, but deleted snapshot ids will be recycled as needed. 1820*0Sstevel@tonic-gate */ 1821*0Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 1822*0Sstevel@tonic-gate 1823*0Sstevel@tonic-gate findagain: 1824*0Sstevel@tonic-gate lastsnap = 0; 1825*0Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidp->sid_next) { 1826*0Sstevel@tonic-gate if (sidp->sid_snapnumber > lastsnap) 1827*0Sstevel@tonic-gate lastsnap = sidp->sid_snapnumber; 1828*0Sstevel@tonic-gate 1829*0Sstevel@tonic-gate /* 1830*0Sstevel@tonic-gate * The sid_rwlock is taken as a reader initially so that 1831*0Sstevel@tonic-gate * activity on each snapshot is not stalled while searching 1832*0Sstevel@tonic-gate * for a free snapshot id. 1833*0Sstevel@tonic-gate */ 1834*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1835*0Sstevel@tonic-gate 1836*0Sstevel@tonic-gate /* 1837*0Sstevel@tonic-gate * If the snapshot has been deleted and nobody is using the 1838*0Sstevel@tonic-gate * snapshot device than we can reuse this snapshot_id. If 1839*0Sstevel@tonic-gate * the snapshot is marked to be deleted (SID_DELETE), then 1840*0Sstevel@tonic-gate * it hasn't been deleted yet so don't reuse it. 1841*0Sstevel@tonic-gate */ 1842*0Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) 1843*0Sstevel@tonic-gate break; /* This spot is unused, so take it */ 1844*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1845*0Sstevel@tonic-gate } 1846*0Sstevel@tonic-gate 1847*0Sstevel@tonic-gate /* 1848*0Sstevel@tonic-gate * add a new snapshot identifier if there are no deleted 1849*0Sstevel@tonic-gate * entries. Since it doesn't matter what order the entries 1850*0Sstevel@tonic-gate * are in we can just add it to the beginning of the list. 1851*0Sstevel@tonic-gate */ 1852*0Sstevel@tonic-gate if (sidp) { 1853*0Sstevel@tonic-gate if (rw_tryupgrade(&sidp->sid_rwlock) == 0) { 1854*0Sstevel@tonic-gate /* someone else grabbed it as a writer, try again */ 1855*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1856*0Sstevel@tonic-gate goto findagain; 1857*0Sstevel@tonic-gate } 1858*0Sstevel@tonic-gate } else { 1859*0Sstevel@tonic-gate /* Create a new node if we didn't find an unused one */ 1860*0Sstevel@tonic-gate sidp = kmem_alloc(sizeof (struct snapshot_id), KM_SLEEP); 1861*0Sstevel@tonic-gate rw_init(&sidp->sid_rwlock, NULL, RW_DEFAULT, NULL); 1862*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 1863*0Sstevel@tonic-gate sidp->sid_snapnumber = (snapshot == NULL) ? 0 : lastsnap + 1; 1864*0Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 1865*0Sstevel@tonic-gate sidp->sid_flags = 0; 1866*0Sstevel@tonic-gate sidp->sid_next = snapshot; 1867*0Sstevel@tonic-gate snapshot = sidp; 1868*0Sstevel@tonic-gate } 1869*0Sstevel@tonic-gate 1870*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 1871*0Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo == NULL); 1872*0Sstevel@tonic-gate ASSERT(sidp->sid_snapnumber <= (lastsnap + 1)); 1873*0Sstevel@tonic-gate 1874*0Sstevel@tonic-gate sidp->sid_flags |= SID_CREATING; 1875*0Sstevel@tonic-gate /* The root vnode is held until snap_delete_impl() is called */ 1876*0Sstevel@tonic-gate VN_HOLD(fsvp); 1877*0Sstevel@tonic-gate sidp->sid_fvp = fsvp; 1878*0Sstevel@tonic-gate num_snapshots++; 1879*0Sstevel@tonic-gate 1880*0Sstevel@tonic-gate /* allocate and initialize structures */ 1881*0Sstevel@tonic-gate 1882*0Sstevel@tonic-gate cowp = kmem_zalloc(sizeof (struct cow_info), KM_SLEEP); 1883*0Sstevel@tonic-gate 1884*0Sstevel@tonic-gate cowp->cow_backfile_array = bfvpp; 1885*0Sstevel@tonic-gate cowp->cow_backcount = backfilecount; 1886*0Sstevel@tonic-gate cowp->cow_backfile_sz = max_backfile_size; 1887*0Sstevel@tonic-gate 1888*0Sstevel@tonic-gate /* 1889*0Sstevel@tonic-gate * Initialize task queues for this snapshot. Only a small number 1890*0Sstevel@tonic-gate * of threads are required because they will be serialized on the 1891*0Sstevel@tonic-gate * backing file's reader/writer lock anyway. 1892*0Sstevel@tonic-gate */ 1893*0Sstevel@tonic-gate (void) snprintf(taskqname, sizeof (taskqname), "%s_taskq_%d", snapname, 1894*0Sstevel@tonic-gate sidp->sid_snapnumber); 1895*0Sstevel@tonic-gate cowp->cow_taskq = taskq_create(taskqname, fssnap_taskq_nthreads, 1896*0Sstevel@tonic-gate minclsyspri, 1, fssnap_taskq_maxtasks, 0); 1897*0Sstevel@tonic-gate 1898*0Sstevel@tonic-gate /* don't allow tasks to start until after everything is ready */ 1899*0Sstevel@tonic-gate taskq_suspend(cowp->cow_taskq); 1900*0Sstevel@tonic-gate 1901*0Sstevel@tonic-gate /* initialize translation table */ 1902*0Sstevel@tonic-gate cmap = &cowp->cow_map; 1903*0Sstevel@tonic-gate rw_init(&cmap->cmap_rwlock, NULL, RW_DEFAULT, NULL); 1904*0Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1905*0Sstevel@tonic-gate 1906*0Sstevel@tonic-gate sema_init(&cmap->cmap_throttle_sem, fssnap_max_mem_chunks, NULL, 1907*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1908*0Sstevel@tonic-gate 1909*0Sstevel@tonic-gate cmap->cmap_chunksz = chunksz; 1910*0Sstevel@tonic-gate cmap->cmap_maxsize = maxsize; 1911*0Sstevel@tonic-gate cmap->cmap_chunksperbf = max_backfile_size / chunksz; 1912*0Sstevel@tonic-gate 1913*0Sstevel@tonic-gate /* 1914*0Sstevel@tonic-gate * allocate one bit per chunk for the bitmaps, round up 1915*0Sstevel@tonic-gate */ 1916*0Sstevel@tonic-gate cmap->cmap_bmsize = (nchunks + (NBBY - 1)) / NBBY; 1917*0Sstevel@tonic-gate cmap->cmap_hastrans = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 1918*0Sstevel@tonic-gate cmap->cmap_candidate = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 1919*0Sstevel@tonic-gate 1920*0Sstevel@tonic-gate sidp->sid_cowinfo = cowp; 1921*0Sstevel@tonic-gate 1922*0Sstevel@tonic-gate /* initialize kstats for this snapshot */ 1923*0Sstevel@tonic-gate mountpoint = vfs_getmntpoint(fsvp->v_vfsp); 1924*0Sstevel@tonic-gate fssnap_create_kstats(sidp, sidp->sid_snapnumber, 1925*0Sstevel@tonic-gate refstr_value(mountpoint), backpath); 1926*0Sstevel@tonic-gate refstr_rele(mountpoint); 1927*0Sstevel@tonic-gate 1928*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 1929*0Sstevel@tonic-gate 1930*0Sstevel@tonic-gate /* 1931*0Sstevel@tonic-gate * return with snapshot id rwlock held as a writer until 1932*0Sstevel@tonic-gate * fssnap_create_done is called 1933*0Sstevel@tonic-gate */ 1934*0Sstevel@tonic-gate return (sidp); 1935*0Sstevel@tonic-gate } 1936*0Sstevel@tonic-gate 1937*0Sstevel@tonic-gate /* 1938*0Sstevel@tonic-gate * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write 1939*0Sstevel@tonic-gate * 1940*0Sstevel@tonic-gate * sets a bit in the candidate bitmap that indicates that a chunk is a 1941*0Sstevel@tonic-gate * candidate for copy-on-write. Typically, chunks that are allocated on 1942*0Sstevel@tonic-gate * the file system at the time the snapshot is taken are candidates, 1943*0Sstevel@tonic-gate * while chunks that have no allocated data do not need to be copied. 1944*0Sstevel@tonic-gate * Chunks containing metadata must be marked as candidates as well. 1945*0Sstevel@tonic-gate */ 1946*0Sstevel@tonic-gate static void 1947*0Sstevel@tonic-gate fssnap_set_candidate_impl(void *snapshot_id, chunknumber_t chunknumber) 1948*0Sstevel@tonic-gate { 1949*0Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 1950*0Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 1951*0Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 1952*0Sstevel@tonic-gate 1953*0Sstevel@tonic-gate /* simple bitmap operation for now */ 1954*0Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 1955*0Sstevel@tonic-gate setbit(cmap->cmap_candidate, chunknumber); 1956*0Sstevel@tonic-gate } 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate /* 1959*0Sstevel@tonic-gate * fssnap_is_candidate_impl() - check whether a chunk is a candidate 1960*0Sstevel@tonic-gate * 1961*0Sstevel@tonic-gate * returns 0 if the chunk is not a candidate and 1 if the chunk is a 1962*0Sstevel@tonic-gate * candidate. This can be used by the file system to change behavior for 1963*0Sstevel@tonic-gate * chunks that might induce a copy-on-write. The offset is specified in 1964*0Sstevel@tonic-gate * bytes since the chunk size may not be known by the file system. 1965*0Sstevel@tonic-gate */ 1966*0Sstevel@tonic-gate static int 1967*0Sstevel@tonic-gate fssnap_is_candidate_impl(void *snapshot_id, u_offset_t off) 1968*0Sstevel@tonic-gate { 1969*0Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 1970*0Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 1971*0Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 1972*0Sstevel@tonic-gate ulong_t chunknumber = off / cmap->cmap_chunksz; 1973*0Sstevel@tonic-gate 1974*0Sstevel@tonic-gate /* simple bitmap operation for now */ 1975*0Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 1976*0Sstevel@tonic-gate return (isset(cmap->cmap_candidate, chunknumber)); 1977*0Sstevel@tonic-gate } 1978*0Sstevel@tonic-gate 1979*0Sstevel@tonic-gate /* 1980*0Sstevel@tonic-gate * fssnap_create_done_impl() - complete the snapshot setup process 1981*0Sstevel@tonic-gate * 1982*0Sstevel@tonic-gate * called when the file system is done populating the candidate bitmap 1983*0Sstevel@tonic-gate * and it is ready to start using the snapshot. This routine releases 1984*0Sstevel@tonic-gate * the snapshot locks, allows taskq tasks to start processing, and 1985*0Sstevel@tonic-gate * creates the device minor nodes associated with the snapshot. 1986*0Sstevel@tonic-gate */ 1987*0Sstevel@tonic-gate static int 1988*0Sstevel@tonic-gate fssnap_create_done_impl(void *snapshot_id) 1989*0Sstevel@tonic-gate { 1990*0Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp = snapshot_id; 1991*0Sstevel@tonic-gate struct cow_info *cowp; 1992*0Sstevel@tonic-gate struct cow_map *cmap; 1993*0Sstevel@tonic-gate int snapnumber = -1; 1994*0Sstevel@tonic-gate char name[20]; 1995*0Sstevel@tonic-gate 1996*0Sstevel@tonic-gate /* sid rwlock and cmap rwlock should be taken from fssnap_create */ 1997*0Sstevel@tonic-gate ASSERT(sidp); 1998*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 1999*0Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo); 2000*0Sstevel@tonic-gate 2001*0Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 2002*0Sstevel@tonic-gate cmap = &cowp->cow_map; 2003*0Sstevel@tonic-gate 2004*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 2005*0Sstevel@tonic-gate 2006*0Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CREATING | SID_DISABLED); 2007*0Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 2008*0Sstevel@tonic-gate 2009*0Sstevel@tonic-gate /* allocate state structure and find new snapshot id */ 2010*0Sstevel@tonic-gate if (ddi_soft_state_zalloc(statep, snapnumber) != DDI_SUCCESS) { 2011*0Sstevel@tonic-gate cmn_err(CE_WARN, 2012*0Sstevel@tonic-gate "snap_ioctl: create: could not allocate " 2013*0Sstevel@tonic-gate "state for snapshot %d.", snapnumber); 2014*0Sstevel@tonic-gate snapnumber = -1; 2015*0Sstevel@tonic-gate goto out; 2016*0Sstevel@tonic-gate } 2017*0Sstevel@tonic-gate 2018*0Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, snapnumber); 2019*0Sstevel@tonic-gate *sidpp = sidp; 2020*0Sstevel@tonic-gate 2021*0Sstevel@tonic-gate /* create minor node based on snapshot number */ 2022*0Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 2023*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", snapnumber); 2024*0Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFBLK, 2025*0Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 2026*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 2027*0Sstevel@tonic-gate "block minor node for snapshot %d.", snapnumber); 2028*0Sstevel@tonic-gate snapnumber = -1; 2029*0Sstevel@tonic-gate goto out; 2030*0Sstevel@tonic-gate } 2031*0Sstevel@tonic-gate 2032*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", snapnumber); 2033*0Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFCHR, 2034*0Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 2035*0Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 2036*0Sstevel@tonic-gate "character minor node for snapshot %d.", snapnumber); 2037*0Sstevel@tonic-gate snapnumber = -1; 2038*0Sstevel@tonic-gate } 2039*0Sstevel@tonic-gate 2040*0Sstevel@tonic-gate out: 2041*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2042*0Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 2043*0Sstevel@tonic-gate 2044*0Sstevel@tonic-gate /* let the taskq threads start processing */ 2045*0Sstevel@tonic-gate taskq_resume(cowp->cow_taskq); 2046*0Sstevel@tonic-gate 2047*0Sstevel@tonic-gate return (snapnumber); 2048*0Sstevel@tonic-gate } 2049*0Sstevel@tonic-gate 2050*0Sstevel@tonic-gate /* 2051*0Sstevel@tonic-gate * fssnap_delete_impl() - delete a snapshot 2052*0Sstevel@tonic-gate * 2053*0Sstevel@tonic-gate * used when a snapshot is no longer needed. This is called by the file 2054*0Sstevel@tonic-gate * system when it receives an ioctl request to delete a snapshot. It is 2055*0Sstevel@tonic-gate * also called internally when error conditions such as disk full, errors 2056*0Sstevel@tonic-gate * writing to the backing file, or backing file maxsize exceeded occur. 2057*0Sstevel@tonic-gate * If the snapshot device is busy when the delete request is received, 2058*0Sstevel@tonic-gate * all state will be deleted except for the soft state and device files 2059*0Sstevel@tonic-gate * associated with the snapshot; they will be deleted when the snapshot 2060*0Sstevel@tonic-gate * device is closed. 2061*0Sstevel@tonic-gate * 2062*0Sstevel@tonic-gate * NOTE this function takes a POINTER TO A POINTER to the snapshot id, 2063*0Sstevel@tonic-gate * and expects to be able to set the handle held by the file system to 2064*0Sstevel@tonic-gate * NULL. This depends on the file system checking that variable for NULL 2065*0Sstevel@tonic-gate * before calling fssnap_strategy(). 2066*0Sstevel@tonic-gate */ 2067*0Sstevel@tonic-gate static int 2068*0Sstevel@tonic-gate fssnap_delete_impl(void *snapshot_id) 2069*0Sstevel@tonic-gate { 2070*0Sstevel@tonic-gate struct snapshot_id **sidpp = (struct snapshot_id **)snapshot_id; 2071*0Sstevel@tonic-gate struct snapshot_id *sidp; 2072*0Sstevel@tonic-gate struct snapshot_id **statesidpp; 2073*0Sstevel@tonic-gate struct cow_info *cowp; 2074*0Sstevel@tonic-gate struct cow_map *cmap; 2075*0Sstevel@tonic-gate char name[20]; 2076*0Sstevel@tonic-gate int snapnumber = -1; 2077*0Sstevel@tonic-gate vnode_t **vpp; 2078*0Sstevel@tonic-gate 2079*0Sstevel@tonic-gate /* 2080*0Sstevel@tonic-gate * sidp is guaranteed to be valid if sidpp is valid because 2081*0Sstevel@tonic-gate * the snapshot list is append-only. 2082*0Sstevel@tonic-gate */ 2083*0Sstevel@tonic-gate if (sidpp == NULL) { 2084*0Sstevel@tonic-gate return (-1); 2085*0Sstevel@tonic-gate } 2086*0Sstevel@tonic-gate 2087*0Sstevel@tonic-gate sidp = *sidpp; 2088*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 2089*0Sstevel@tonic-gate 2090*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 2091*0Sstevel@tonic-gate 2092*0Sstevel@tonic-gate /* 2093*0Sstevel@tonic-gate * double check that the snapshot is still valid for THIS file system 2094*0Sstevel@tonic-gate */ 2095*0Sstevel@tonic-gate if (*sidpp == NULL) { 2096*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2097*0Sstevel@tonic-gate return (-1); 2098*0Sstevel@tonic-gate } 2099*0Sstevel@tonic-gate 2100*0Sstevel@tonic-gate /* 2101*0Sstevel@tonic-gate * Now we know the snapshot is still valid and will not go away 2102*0Sstevel@tonic-gate * because we have the write lock. Once the state is transitioned 2103*0Sstevel@tonic-gate * to "disabling", the sid_rwlock can be released. Any pending I/O 2104*0Sstevel@tonic-gate * waiting for the lock as a reader will check for this state and 2105*0Sstevel@tonic-gate * abort without touching data that may be getting freed. 2106*0Sstevel@tonic-gate */ 2107*0Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLING; 2108*0Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 2109*0Sstevel@tonic-gate cmn_err(CE_WARN, "Snapshot %d automatically deleted.", 2110*0Sstevel@tonic-gate sidp->sid_snapnumber); 2111*0Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DELETE); 2112*0Sstevel@tonic-gate } 2113*0Sstevel@tonic-gate 2114*0Sstevel@tonic-gate 2115*0Sstevel@tonic-gate /* 2116*0Sstevel@tonic-gate * This is pointing into file system specific data! The assumption is 2117*0Sstevel@tonic-gate * that fssnap_strategy() gets called from the file system based on 2118*0Sstevel@tonic-gate * whether this reference to the snapshot_id is NULL or not. So 2119*0Sstevel@tonic-gate * setting this to NULL should disable snapshots for the file system. 2120*0Sstevel@tonic-gate */ 2121*0Sstevel@tonic-gate *sidpp = NULL; 2122*0Sstevel@tonic-gate 2123*0Sstevel@tonic-gate /* remove cowinfo */ 2124*0Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 2125*0Sstevel@tonic-gate if (cowp == NULL) { 2126*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2127*0Sstevel@tonic-gate return (-1); 2128*0Sstevel@tonic-gate } 2129*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2130*0Sstevel@tonic-gate 2131*0Sstevel@tonic-gate /* destroy task queues first so they don't reference freed data. */ 2132*0Sstevel@tonic-gate if (cowp->cow_taskq) { 2133*0Sstevel@tonic-gate taskq_destroy(cowp->cow_taskq); 2134*0Sstevel@tonic-gate cowp->cow_taskq = NULL; 2135*0Sstevel@tonic-gate } 2136*0Sstevel@tonic-gate 2137*0Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) { 2138*0Sstevel@tonic-gate for (vpp = cowp->cow_backfile_array; *vpp; vpp++) 2139*0Sstevel@tonic-gate VN_RELE(*vpp); 2140*0Sstevel@tonic-gate kmem_free(cowp->cow_backfile_array, 2141*0Sstevel@tonic-gate (cowp->cow_backcount + 1) * sizeof (vnode_t *)); 2142*0Sstevel@tonic-gate cowp->cow_backfile_array = NULL; 2143*0Sstevel@tonic-gate } 2144*0Sstevel@tonic-gate 2145*0Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 2146*0Sstevel@tonic-gate 2147*0Sstevel@tonic-gate /* remove cmap */ 2148*0Sstevel@tonic-gate cmap = &cowp->cow_map; 2149*0Sstevel@tonic-gate ASSERT(cmap); 2150*0Sstevel@tonic-gate 2151*0Sstevel@tonic-gate if (cmap->cmap_candidate) 2152*0Sstevel@tonic-gate kmem_free(cmap->cmap_candidate, cmap->cmap_bmsize); 2153*0Sstevel@tonic-gate 2154*0Sstevel@tonic-gate if (cmap->cmap_hastrans) 2155*0Sstevel@tonic-gate kmem_free(cmap->cmap_hastrans, cmap->cmap_bmsize); 2156*0Sstevel@tonic-gate 2157*0Sstevel@tonic-gate if (cmap->cmap_table) 2158*0Sstevel@tonic-gate transtbl_free(&cowp->cow_map); 2159*0Sstevel@tonic-gate 2160*0Sstevel@tonic-gate rw_destroy(&cmap->cmap_rwlock); 2161*0Sstevel@tonic-gate 2162*0Sstevel@tonic-gate while (cmap->cmap_waiters) { 2163*0Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 2164*0Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 2165*0Sstevel@tonic-gate } 2166*0Sstevel@tonic-gate sema_destroy(&cmap->cmap_throttle_sem); 2167*0Sstevel@tonic-gate 2168*0Sstevel@tonic-gate /* remove kstats */ 2169*0Sstevel@tonic-gate fssnap_delete_kstats(cowp); 2170*0Sstevel@tonic-gate 2171*0Sstevel@tonic-gate kmem_free(cowp, sizeof (struct cow_info)); 2172*0Sstevel@tonic-gate 2173*0Sstevel@tonic-gate statesidpp = ddi_get_soft_state(statep, sidp->sid_snapnumber); 2174*0Sstevel@tonic-gate if (statesidpp == NULL || *statesidpp == NULL) { 2175*0Sstevel@tonic-gate cmn_err(CE_WARN, 2176*0Sstevel@tonic-gate "fssnap_delete_impl: could not find state for snapshot %d.", 2177*0Sstevel@tonic-gate sidp->sid_snapnumber); 2178*0Sstevel@tonic-gate } 2179*0Sstevel@tonic-gate ASSERT(*statesidpp == sidp); 2180*0Sstevel@tonic-gate 2181*0Sstevel@tonic-gate /* 2182*0Sstevel@tonic-gate * Leave the node in the list marked DISABLED so it can be reused 2183*0Sstevel@tonic-gate * and avoid many race conditions. Return the snapshot number 2184*0Sstevel@tonic-gate * that was deleted. 2185*0Sstevel@tonic-gate */ 2186*0Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 2187*0Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 2188*0Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DISABLING); 2189*0Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLED; 2190*0Sstevel@tonic-gate VN_RELE(sidp->sid_fvp); 2191*0Sstevel@tonic-gate sidp->sid_fvp = NULL; 2192*0Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 2193*0Sstevel@tonic-gate 2194*0Sstevel@tonic-gate /* 2195*0Sstevel@tonic-gate * If the snapshot is not busy, free the device info now. Otherwise 2196*0Sstevel@tonic-gate * the device nodes are freed in snap_close() when the device is 2197*0Sstevel@tonic-gate * closed. The sid will not be reused until the device is not busy. 2198*0Sstevel@tonic-gate */ 2199*0Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 2200*0Sstevel@tonic-gate /* remove the device nodes */ 2201*0Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 2202*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 2203*0Sstevel@tonic-gate sidp->sid_snapnumber); 2204*0Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 2205*0Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 2206*0Sstevel@tonic-gate sidp->sid_snapnumber); 2207*0Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 2208*0Sstevel@tonic-gate 2209*0Sstevel@tonic-gate /* delete the state structure */ 2210*0Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 2211*0Sstevel@tonic-gate num_snapshots--; 2212*0Sstevel@tonic-gate } 2213*0Sstevel@tonic-gate 2214*0Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 2215*0Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2216*0Sstevel@tonic-gate 2217*0Sstevel@tonic-gate return (snapnumber); 2218*0Sstevel@tonic-gate } 2219*0Sstevel@tonic-gate 2220*0Sstevel@tonic-gate /* 2221*0Sstevel@tonic-gate * fssnap_create_kstats() - allocate and initialize snapshot kstats 2222*0Sstevel@tonic-gate * 2223*0Sstevel@tonic-gate */ 2224*0Sstevel@tonic-gate static void 2225*0Sstevel@tonic-gate fssnap_create_kstats(snapshot_id_t *sidp, int snapnum, 2226*0Sstevel@tonic-gate const char *mountpoint, const char *backfilename) 2227*0Sstevel@tonic-gate { 2228*0Sstevel@tonic-gate kstat_t *num, *mntpoint, *bfname; 2229*0Sstevel@tonic-gate kstat_named_t *hw; 2230*0Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 2231*0Sstevel@tonic-gate struct cow_kstat_num *stats; 2232*0Sstevel@tonic-gate 2233*0Sstevel@tonic-gate /* update the high water mark */ 2234*0Sstevel@tonic-gate if (fssnap_highwater_kstat == NULL) { 2235*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to lookup " 2236*0Sstevel@tonic-gate "high water mark kstat."); 2237*0Sstevel@tonic-gate return; 2238*0Sstevel@tonic-gate } 2239*0Sstevel@tonic-gate 2240*0Sstevel@tonic-gate hw = (kstat_named_t *)fssnap_highwater_kstat->ks_data; 2241*0Sstevel@tonic-gate if (hw->value.ui32 < snapnum) 2242*0Sstevel@tonic-gate hw->value.ui32 = snapnum; 2243*0Sstevel@tonic-gate 2244*0Sstevel@tonic-gate /* initialize the mount point kstat */ 2245*0Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_MNTPT); 2246*0Sstevel@tonic-gate 2247*0Sstevel@tonic-gate if (mountpoint != NULL) { 2248*0Sstevel@tonic-gate mntpoint = kstat_create(snapname, snapnum, FSSNAP_KSTAT_MNTPT, 2249*0Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(mountpoint) + 1, 0); 2250*0Sstevel@tonic-gate if (mntpoint == NULL) { 2251*0Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2252*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 2253*0Sstevel@tonic-gate "create mount point kstat"); 2254*0Sstevel@tonic-gate } else { 2255*0Sstevel@tonic-gate (void) strncpy(mntpoint->ks_data, mountpoint, 2256*0Sstevel@tonic-gate strlen(mountpoint)); 2257*0Sstevel@tonic-gate cowp->cow_kstat_mntpt = mntpoint; 2258*0Sstevel@tonic-gate kstat_install(mntpoint); 2259*0Sstevel@tonic-gate } 2260*0Sstevel@tonic-gate } else { 2261*0Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2262*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: mount point not " 2263*0Sstevel@tonic-gate "specified."); 2264*0Sstevel@tonic-gate } 2265*0Sstevel@tonic-gate 2266*0Sstevel@tonic-gate /* initialize the backing file kstat */ 2267*0Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_BFNAME); 2268*0Sstevel@tonic-gate 2269*0Sstevel@tonic-gate if (backfilename == NULL) { 2270*0Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2271*0Sstevel@tonic-gate } else { 2272*0Sstevel@tonic-gate bfname = kstat_create(snapname, snapnum, FSSNAP_KSTAT_BFNAME, 2273*0Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(backfilename) + 1, 0); 2274*0Sstevel@tonic-gate if (bfname != NULL) { 2275*0Sstevel@tonic-gate (void) strncpy(bfname->ks_data, backfilename, 2276*0Sstevel@tonic-gate strlen(backfilename)); 2277*0Sstevel@tonic-gate cowp->cow_kstat_bfname = bfname; 2278*0Sstevel@tonic-gate kstat_install(bfname); 2279*0Sstevel@tonic-gate } else { 2280*0Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2281*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 2282*0Sstevel@tonic-gate "create backing file name kstat"); 2283*0Sstevel@tonic-gate } 2284*0Sstevel@tonic-gate } 2285*0Sstevel@tonic-gate 2286*0Sstevel@tonic-gate /* initialize numeric kstats */ 2287*0Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_NUM); 2288*0Sstevel@tonic-gate 2289*0Sstevel@tonic-gate num = kstat_create(snapname, snapnum, FSSNAP_KSTAT_NUM, 2290*0Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, 2291*0Sstevel@tonic-gate sizeof (struct cow_kstat_num) / sizeof (kstat_named_t), 2292*0Sstevel@tonic-gate 0); 2293*0Sstevel@tonic-gate if (num == NULL) { 2294*0Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to create " 2295*0Sstevel@tonic-gate "numeric kstats"); 2296*0Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 2297*0Sstevel@tonic-gate return; 2298*0Sstevel@tonic-gate } 2299*0Sstevel@tonic-gate 2300*0Sstevel@tonic-gate cowp->cow_kstat_num = num; 2301*0Sstevel@tonic-gate stats = num->ks_data; 2302*0Sstevel@tonic-gate num->ks_update = fssnap_update_kstat_num; 2303*0Sstevel@tonic-gate num->ks_private = sidp; 2304*0Sstevel@tonic-gate 2305*0Sstevel@tonic-gate kstat_named_init(&stats->ckn_state, FSSNAP_KSTAT_NUM_STATE, 2306*0Sstevel@tonic-gate KSTAT_DATA_INT32); 2307*0Sstevel@tonic-gate kstat_named_init(&stats->ckn_bfsize, FSSNAP_KSTAT_NUM_BFSIZE, 2308*0Sstevel@tonic-gate KSTAT_DATA_UINT64); 2309*0Sstevel@tonic-gate kstat_named_init(&stats->ckn_maxsize, FSSNAP_KSTAT_NUM_MAXSIZE, 2310*0Sstevel@tonic-gate KSTAT_DATA_UINT64); 2311*0Sstevel@tonic-gate kstat_named_init(&stats->ckn_createtime, FSSNAP_KSTAT_NUM_CREATETIME, 2312*0Sstevel@tonic-gate KSTAT_DATA_LONG); 2313*0Sstevel@tonic-gate kstat_named_init(&stats->ckn_chunksize, FSSNAP_KSTAT_NUM_CHUNKSIZE, 2314*0Sstevel@tonic-gate KSTAT_DATA_UINT32); 2315*0Sstevel@tonic-gate 2316*0Sstevel@tonic-gate /* initialize the static kstats */ 2317*0Sstevel@tonic-gate stats->ckn_chunksize.value.ui32 = cowp->cow_map.cmap_chunksz; 2318*0Sstevel@tonic-gate stats->ckn_maxsize.value.ui64 = cowp->cow_map.cmap_maxsize; 2319*0Sstevel@tonic-gate stats->ckn_createtime.value.l = gethrestime_sec(); 2320*0Sstevel@tonic-gate 2321*0Sstevel@tonic-gate kstat_install(num); 2322*0Sstevel@tonic-gate } 2323*0Sstevel@tonic-gate 2324*0Sstevel@tonic-gate /* 2325*0Sstevel@tonic-gate * fssnap_update_kstat_num() - update a numerical snapshot kstat value 2326*0Sstevel@tonic-gate * 2327*0Sstevel@tonic-gate */ 2328*0Sstevel@tonic-gate int 2329*0Sstevel@tonic-gate fssnap_update_kstat_num(kstat_t *ksp, int rw) 2330*0Sstevel@tonic-gate { 2331*0Sstevel@tonic-gate snapshot_id_t *sidp = (snapshot_id_t *)ksp->ks_private; 2332*0Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 2333*0Sstevel@tonic-gate struct cow_kstat_num *stats = ksp->ks_data; 2334*0Sstevel@tonic-gate 2335*0Sstevel@tonic-gate if (rw == KSTAT_WRITE) 2336*0Sstevel@tonic-gate return (EACCES); 2337*0Sstevel@tonic-gate 2338*0Sstevel@tonic-gate /* state */ 2339*0Sstevel@tonic-gate if (sidp->sid_flags & SID_CREATING) 2340*0Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_CREATING; 2341*0Sstevel@tonic-gate else if (SID_INACTIVE(sidp)) 2342*0Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_DISABLED; 2343*0Sstevel@tonic-gate else if (SID_BUSY(sidp)) 2344*0Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_ACTIVE; 2345*0Sstevel@tonic-gate else 2346*0Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_IDLE; 2347*0Sstevel@tonic-gate 2348*0Sstevel@tonic-gate /* bfsize */ 2349*0Sstevel@tonic-gate stats->ckn_bfsize.value.ui64 = cowp->cow_map.cmap_nchunks * 2350*0Sstevel@tonic-gate cowp->cow_map.cmap_chunksz; 2351*0Sstevel@tonic-gate 2352*0Sstevel@tonic-gate return (0); 2353*0Sstevel@tonic-gate } 2354*0Sstevel@tonic-gate 2355*0Sstevel@tonic-gate /* 2356*0Sstevel@tonic-gate * fssnap_delete_kstats() - deallocate snapshot kstats 2357*0Sstevel@tonic-gate * 2358*0Sstevel@tonic-gate */ 2359*0Sstevel@tonic-gate void 2360*0Sstevel@tonic-gate fssnap_delete_kstats(struct cow_info *cowp) 2361*0Sstevel@tonic-gate { 2362*0Sstevel@tonic-gate if (cowp->cow_kstat_num != NULL) { 2363*0Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_num); 2364*0Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 2365*0Sstevel@tonic-gate } 2366*0Sstevel@tonic-gate if (cowp->cow_kstat_mntpt != NULL) { 2367*0Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_mntpt); 2368*0Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2369*0Sstevel@tonic-gate } 2370*0Sstevel@tonic-gate if (cowp->cow_kstat_bfname != NULL) { 2371*0Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_bfname); 2372*0Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2373*0Sstevel@tonic-gate } 2374*0Sstevel@tonic-gate } 2375