1 /* $NetBSD: fss.c,v 1.73 2011/02/24 09:38:57 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * File system snapshot disk driver. 34 * 35 * Block/character interface to the snapshot of a mounted file system. 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.73 2011/02/24 09:38:57 hannken Exp $"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/proc.h> 45 #include <sys/errno.h> 46 #include <sys/malloc.h> 47 #include <sys/buf.h> 48 #include <sys/ioctl.h> 49 #include <sys/disklabel.h> 50 #include <sys/device.h> 51 #include <sys/disk.h> 52 #include <sys/stat.h> 53 #include <sys/mount.h> 54 #include <sys/vnode.h> 55 #include <sys/file.h> 56 #include <sys/uio.h> 57 #include <sys/conf.h> 58 #include <sys/kthread.h> 59 #include <sys/fstrans.h> 60 #include <sys/simplelock.h> 61 #include <sys/vfs_syscalls.h> /* For do_sys_unlink(). */ 62 63 #include <miscfs/specfs/specdev.h> 64 65 #include <dev/fssvar.h> 66 67 #include <uvm/uvm.h> 68 69 void fssattach(int); 70 71 dev_type_open(fss_open); 72 dev_type_close(fss_close); 73 dev_type_read(fss_read); 74 dev_type_write(fss_write); 75 dev_type_ioctl(fss_ioctl); 76 dev_type_strategy(fss_strategy); 77 dev_type_dump(fss_dump); 78 dev_type_size(fss_size); 79 80 static void fss_unmount_hook(struct mount *); 81 static int fss_copy_on_write(void *, struct buf *, bool); 82 static inline void fss_error(struct fss_softc *, const char *); 83 static int fss_create_files(struct fss_softc *, struct fss_set *, 84 off_t *, struct lwp *); 85 static int fss_create_snapshot(struct fss_softc *, struct fss_set *, 86 struct lwp *); 87 static int fss_delete_snapshot(struct fss_softc *, struct lwp *); 88 static int fss_softc_alloc(struct fss_softc *); 89 static void fss_softc_free(struct fss_softc *); 90 static int fss_read_cluster(struct fss_softc *, u_int32_t); 91 static void fss_bs_thread(void *); 92 static int fss_bs_io(struct fss_softc *, fss_io_type, 93 u_int32_t, off_t, int, void *); 94 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); 95 96 static kmutex_t fss_device_lock; /* Protect all units. */ 97 static int fss_num_attached = 0; /* Number of attached devices. */ 98 static struct vfs_hooks fss_vfs_hooks = { 99 .vh_unmount = fss_unmount_hook 100 }; 101 102 const struct bdevsw fss_bdevsw = { 103 fss_open, fss_close, fss_strategy, fss_ioctl, 104 fss_dump, fss_size, D_DISK | D_MPSAFE 105 }; 106 107 const struct cdevsw fss_cdevsw = { 108 fss_open, fss_close, fss_read, fss_write, fss_ioctl, 109 nostop, notty, nopoll, nommap, nokqfilter, D_DISK | D_MPSAFE 110 }; 111 112 static int fss_match(device_t, cfdata_t, void *); 113 static void fss_attach(device_t, device_t, void *); 114 static int fss_detach(device_t, int); 115 116 CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc), 117 fss_match, fss_attach, fss_detach, NULL); 118 extern struct cfdriver fss_cd; 119 120 void 121 fssattach(int num) 122 { 123 124 mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE); 125 if (config_cfattach_attach(fss_cd.cd_name, &fss_ca)) 126 aprint_error("%s: unable to register\n", fss_cd.cd_name); 127 } 128 129 static int 130 fss_match(device_t self, cfdata_t cfdata, void *aux) 131 { 132 return 1; 133 } 134 135 static void 136 fss_attach(device_t parent, device_t self, void *aux) 137 { 138 struct fss_softc *sc = device_private(self); 139 140 sc->sc_dev = self; 141 sc->sc_bdev = NODEV; 142 mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); 144 cv_init(&sc->sc_work_cv, "fssbs"); 145 cv_init(&sc->sc_cache_cv, "cowwait"); 146 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 147 sc->sc_dkdev = malloc(sizeof(*sc->sc_dkdev), M_DEVBUF, M_WAITOK); 148 sc->sc_dkdev->dk_info = NULL; 149 disk_init(sc->sc_dkdev, device_xname(self), NULL); 150 if (!pmf_device_register(self, NULL, NULL)) 151 aprint_error_dev(self, "couldn't establish power handler\n"); 152 153 if (fss_num_attached++ == 0) 154 vfs_hooks_attach(&fss_vfs_hooks); 155 } 156 157 static int 158 fss_detach(device_t self, int flags) 159 { 160 struct fss_softc *sc = device_private(self); 161 162 if (sc->sc_flags & FSS_ACTIVE) 163 return EBUSY; 164 165 if (--fss_num_attached == 0) 166 vfs_hooks_detach(&fss_vfs_hooks); 167 168 pmf_device_deregister(self); 169 mutex_destroy(&sc->sc_slock); 170 mutex_destroy(&sc->sc_lock); 171 cv_destroy(&sc->sc_work_cv); 172 cv_destroy(&sc->sc_cache_cv); 173 bufq_drain(sc->sc_bufq); 174 bufq_free(sc->sc_bufq); 175 disk_destroy(sc->sc_dkdev); 176 free(sc->sc_dkdev, M_DEVBUF); 177 178 return 0; 179 } 180 181 int 182 fss_open(dev_t dev, int flags, int mode, struct lwp *l) 183 { 184 int mflag; 185 cfdata_t cf; 186 struct fss_softc *sc; 187 188 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 189 190 mutex_enter(&fss_device_lock); 191 192 sc = device_lookup_private(&fss_cd, minor(dev)); 193 if (sc == NULL) { 194 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 195 cf->cf_name = fss_cd.cd_name; 196 cf->cf_atname = fss_cd.cd_name; 197 cf->cf_unit = minor(dev); 198 cf->cf_fstate = FSTATE_STAR; 199 sc = device_private(config_attach_pseudo(cf)); 200 if (sc == NULL) { 201 mutex_exit(&fss_device_lock); 202 return ENOMEM; 203 } 204 } 205 206 mutex_enter(&sc->sc_slock); 207 208 sc->sc_flags |= mflag; 209 210 mutex_exit(&sc->sc_slock); 211 mutex_exit(&fss_device_lock); 212 213 return 0; 214 } 215 216 int 217 fss_close(dev_t dev, int flags, int mode, struct lwp *l) 218 { 219 int mflag, error; 220 cfdata_t cf; 221 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); 222 223 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 224 error = 0; 225 226 restart: 227 mutex_enter(&sc->sc_slock); 228 if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) { 229 sc->sc_flags &= ~mflag; 230 mutex_exit(&sc->sc_slock); 231 return 0; 232 } 233 if ((sc->sc_flags & FSS_ACTIVE) != 0 && 234 (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) { 235 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE; 236 mutex_exit(&sc->sc_slock); 237 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l); 238 goto restart; 239 } 240 if ((sc->sc_flags & FSS_ACTIVE) != 0) { 241 mutex_exit(&sc->sc_slock); 242 return error; 243 } 244 if (! mutex_tryenter(&fss_device_lock)) { 245 mutex_exit(&sc->sc_slock); 246 goto restart; 247 } 248 249 KASSERT((sc->sc_flags & FSS_ACTIVE) == 0); 250 KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag); 251 mutex_exit(&sc->sc_slock); 252 cf = device_cfdata(sc->sc_dev); 253 error = config_detach(sc->sc_dev, DETACH_QUIET); 254 if (! error) 255 free(cf, M_DEVBUF); 256 mutex_exit(&fss_device_lock); 257 258 return error; 259 } 260 261 void 262 fss_strategy(struct buf *bp) 263 { 264 const bool write = ((bp->b_flags & B_READ) != B_READ); 265 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev)); 266 267 mutex_enter(&sc->sc_slock); 268 269 if (write || !FSS_ISVALID(sc)) { 270 271 mutex_exit(&sc->sc_slock); 272 273 bp->b_error = (write ? EROFS : ENXIO); 274 bp->b_resid = bp->b_bcount; 275 biodone(bp); 276 return; 277 } 278 279 bp->b_rawblkno = bp->b_blkno; 280 bufq_put(sc->sc_bufq, bp); 281 cv_signal(&sc->sc_work_cv); 282 283 mutex_exit(&sc->sc_slock); 284 } 285 286 int 287 fss_read(dev_t dev, struct uio *uio, int flags) 288 { 289 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); 290 } 291 292 int 293 fss_write(dev_t dev, struct uio *uio, int flags) 294 { 295 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); 296 } 297 298 int 299 fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 300 { 301 int error; 302 struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev)); 303 struct fss_set *fss = (struct fss_set *)data; 304 struct fss_get *fsg = (struct fss_get *)data; 305 306 switch (cmd) { 307 case FSSIOCSET50: 308 fss->fss_flags = 0; 309 /* Fall through */ 310 case FSSIOCSET: 311 mutex_enter(&sc->sc_lock); 312 if ((flag & FWRITE) == 0) 313 error = EPERM; 314 else if ((sc->sc_flags & FSS_ACTIVE) != 0) 315 error = EBUSY; 316 else 317 error = fss_create_snapshot(sc, fss, l); 318 if (error == 0) 319 sc->sc_uflags = fss->fss_flags; 320 mutex_exit(&sc->sc_lock); 321 break; 322 323 case FSSIOCCLR: 324 mutex_enter(&sc->sc_lock); 325 if ((flag & FWRITE) == 0) 326 error = EPERM; 327 else if ((sc->sc_flags & FSS_ACTIVE) == 0) 328 error = ENXIO; 329 else 330 error = fss_delete_snapshot(sc, l); 331 mutex_exit(&sc->sc_lock); 332 break; 333 334 case FSSIOCGET: 335 mutex_enter(&sc->sc_lock); 336 switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) { 337 case FSS_ACTIVE: 338 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 339 fsg->fsg_csize = FSS_CLSIZE(sc); 340 fsg->fsg_time = sc->sc_time; 341 fsg->fsg_mount_size = sc->sc_clcount; 342 fsg->fsg_bs_size = sc->sc_clnext; 343 error = 0; 344 break; 345 case FSS_PERSISTENT | FSS_ACTIVE: 346 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 347 fsg->fsg_csize = 0; 348 fsg->fsg_time = sc->sc_time; 349 fsg->fsg_mount_size = 0; 350 fsg->fsg_bs_size = 0; 351 error = 0; 352 break; 353 default: 354 error = ENXIO; 355 break; 356 } 357 mutex_exit(&sc->sc_lock); 358 break; 359 360 case FSSIOFSET: 361 mutex_enter(&sc->sc_slock); 362 sc->sc_uflags = *(int *)data; 363 mutex_exit(&sc->sc_slock); 364 error = 0; 365 break; 366 367 case FSSIOFGET: 368 mutex_enter(&sc->sc_slock); 369 *(int *)data = sc->sc_uflags; 370 mutex_exit(&sc->sc_slock); 371 error = 0; 372 break; 373 374 default: 375 error = EINVAL; 376 break; 377 } 378 379 return error; 380 } 381 382 int 383 fss_size(dev_t dev) 384 { 385 return -1; 386 } 387 388 int 389 fss_dump(dev_t dev, daddr_t blkno, void *va, 390 size_t size) 391 { 392 return EROFS; 393 } 394 395 /* 396 * An error occurred reading or writing the snapshot or backing store. 397 * If it is the first error log to console. 398 * The caller holds the mutex. 399 */ 400 static inline void 401 fss_error(struct fss_softc *sc, const char *msg) 402 { 403 404 if ((sc->sc_flags & (FSS_ACTIVE|FSS_ERROR)) == FSS_ACTIVE) 405 aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n", msg); 406 if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE) 407 sc->sc_flags |= FSS_ERROR; 408 } 409 410 /* 411 * Allocate the variable sized parts of the softc and 412 * fork the kernel thread. 413 * 414 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size 415 * must be initialized. 416 */ 417 static int 418 fss_softc_alloc(struct fss_softc *sc) 419 { 420 int i, error; 421 422 if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 423 sc->sc_copied = 424 kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP); 425 if (sc->sc_copied == NULL) 426 return(ENOMEM); 427 428 sc->sc_cache = kmem_alloc(sc->sc_cache_size * 429 sizeof(struct fss_cache), KM_SLEEP); 430 if (sc->sc_cache == NULL) 431 return(ENOMEM); 432 433 for (i = 0; i < sc->sc_cache_size; i++) { 434 sc->sc_cache[i].fc_type = FSS_CACHE_FREE; 435 sc->sc_cache[i].fc_data = 436 kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP); 437 if (sc->sc_cache[i].fc_data == NULL) 438 return(ENOMEM); 439 cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1"); 440 } 441 442 sc->sc_indir_valid = 443 kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP); 444 if (sc->sc_indir_valid == NULL) 445 return(ENOMEM); 446 447 sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP); 448 if (sc->sc_indir_data == NULL) 449 return(ENOMEM); 450 } else { 451 sc->sc_copied = NULL; 452 sc->sc_cache = NULL; 453 sc->sc_indir_valid = NULL; 454 sc->sc_indir_data = NULL; 455 } 456 457 sc->sc_flags |= FSS_BS_THREAD; 458 if ((error = kthread_create(PRI_BIO, 0, NULL, fss_bs_thread, sc, 459 &sc->sc_bs_lwp, device_xname(sc->sc_dev))) != 0) { 460 sc->sc_flags &= ~FSS_BS_THREAD; 461 return error; 462 } 463 464 disk_attach(sc->sc_dkdev); 465 466 return 0; 467 } 468 469 /* 470 * Free the variable sized parts of the softc. 471 */ 472 static void 473 fss_softc_free(struct fss_softc *sc) 474 { 475 int i; 476 477 if ((sc->sc_flags & FSS_BS_THREAD) != 0) { 478 mutex_enter(&sc->sc_slock); 479 sc->sc_flags &= ~FSS_BS_THREAD; 480 cv_signal(&sc->sc_work_cv); 481 while (sc->sc_bs_lwp != NULL) 482 kpause("fssdetach", false, 1, &sc->sc_slock); 483 mutex_exit(&sc->sc_slock); 484 485 disk_detach(sc->sc_dkdev); 486 } 487 488 if (sc->sc_copied != NULL) 489 kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY)); 490 sc->sc_copied = NULL; 491 492 if (sc->sc_cache != NULL) { 493 for (i = 0; i < sc->sc_cache_size; i++) 494 if (sc->sc_cache[i].fc_data != NULL) { 495 cv_destroy(&sc->sc_cache[i].fc_state_cv); 496 kmem_free(sc->sc_cache[i].fc_data, 497 FSS_CLSIZE(sc)); 498 } 499 kmem_free(sc->sc_cache, 500 sc->sc_cache_size*sizeof(struct fss_cache)); 501 } 502 sc->sc_cache = NULL; 503 504 if (sc->sc_indir_valid != NULL) 505 kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY)); 506 sc->sc_indir_valid = NULL; 507 508 if (sc->sc_indir_data != NULL) 509 kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc)); 510 sc->sc_indir_data = NULL; 511 } 512 513 /* 514 * Set all active snapshots on this file system into ERROR state. 515 */ 516 static void 517 fss_unmount_hook(struct mount *mp) 518 { 519 int i; 520 struct fss_softc *sc; 521 522 mutex_enter(&fss_device_lock); 523 for (i = 0; i < fss_cd.cd_ndevs; i++) { 524 if ((sc = device_lookup_private(&fss_cd, i)) == NULL) 525 continue; 526 mutex_enter(&sc->sc_slock); 527 if ((sc->sc_flags & FSS_ACTIVE) != 0 && 528 sc->sc_mount == mp) 529 fss_error(sc, "forced unmount"); 530 mutex_exit(&sc->sc_slock); 531 } 532 mutex_exit(&fss_device_lock); 533 } 534 535 /* 536 * A buffer is written to the snapshotted block device. Copy to 537 * backing store if needed. 538 */ 539 static int 540 fss_copy_on_write(void *v, struct buf *bp, bool data_valid) 541 { 542 int error; 543 u_int32_t cl, ch, c; 544 struct fss_softc *sc = v; 545 546 mutex_enter(&sc->sc_slock); 547 if (!FSS_ISVALID(sc)) { 548 mutex_exit(&sc->sc_slock); 549 return 0; 550 } 551 552 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 553 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 554 error = 0; 555 if (curlwp == uvm.pagedaemon_lwp) { 556 for (c = cl; c <= ch; c++) 557 if (isclr(sc->sc_copied, c)) { 558 error = ENOMEM; 559 break; 560 } 561 } 562 mutex_exit(&sc->sc_slock); 563 564 if (error == 0) 565 for (c = cl; c <= ch; c++) { 566 error = fss_read_cluster(sc, c); 567 if (error) 568 break; 569 } 570 571 return error; 572 } 573 574 /* 575 * Lookup and open needed files. 576 * 577 * For file system internal snapshot initializes sc_mntname, sc_mount, 578 * sc_bs_vp and sc_time. 579 * 580 * Otherwise returns dev and size of the underlying block device. 581 * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount 582 */ 583 static int 584 fss_create_files(struct fss_softc *sc, struct fss_set *fss, 585 off_t *bsize, struct lwp *l) 586 { 587 int error, bits, fsbsize; 588 struct timespec ts; 589 struct partinfo dpart; 590 /* nd -> nd2 to reduce mistakes while updating only some namei calls */ 591 struct pathbuf *pb2; 592 struct nameidata nd2; 593 struct vnode *vp; 594 595 /* 596 * Get the mounted file system. 597 */ 598 599 error = namei_simple_user(fss->fss_mount, 600 NSM_FOLLOW_NOEMULROOT, &vp); 601 if (error != 0) 602 return error; 603 604 if ((vp->v_vflag & VV_ROOT) != VV_ROOT) { 605 vrele(vp); 606 return EINVAL; 607 } 608 609 sc->sc_mount = vp->v_mount; 610 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); 611 612 vrele(vp); 613 614 /* 615 * Check for file system internal snapshot. 616 */ 617 618 error = namei_simple_user(fss->fss_bstore, 619 NSM_FOLLOW_NOEMULROOT, &vp); 620 if (error != 0) 621 return error; 622 623 if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) { 624 sc->sc_flags |= FSS_PERSISTENT; 625 sc->sc_bs_vp = vp; 626 627 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 628 bits = sizeof(sc->sc_bs_bshift)*NBBY; 629 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; 630 sc->sc_bs_bshift++) 631 if (FSS_FSBSIZE(sc) == fsbsize) 632 break; 633 if (sc->sc_bs_bshift >= bits) 634 return EINVAL; 635 636 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 637 sc->sc_clshift = 0; 638 639 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { 640 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); 641 if (error) 642 return error; 643 } 644 error = vn_lock(vp, LK_EXCLUSIVE); 645 if (error != 0) 646 return error; 647 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); 648 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); 649 650 VOP_UNLOCK(sc->sc_bs_vp); 651 652 return error; 653 } 654 vrele(vp); 655 656 /* 657 * Get the block device it is mounted on. 658 */ 659 660 error = namei_simple_kernel(sc->sc_mount->mnt_stat.f_mntfromname, 661 NSM_FOLLOW_NOEMULROOT, &vp); 662 if (error != 0) 663 return error; 664 665 if (vp->v_type != VBLK) { 666 vrele(vp); 667 return EINVAL; 668 } 669 670 sc->sc_bdev = vp->v_rdev; 671 vrele(vp); 672 673 /* 674 * Get the block device size. 675 */ 676 677 error = bdev_ioctl(sc->sc_bdev, DIOCGPART, &dpart, FREAD, l); 678 if (error) 679 return error; 680 681 *bsize = (off_t)dpart.disklab->d_secsize*dpart.part->p_size; 682 683 /* 684 * Get the backing store 685 */ 686 687 error = pathbuf_copyin(fss->fss_bstore, &pb2); 688 if (error) { 689 return error; 690 } 691 NDINIT(&nd2, LOOKUP, FOLLOW, pb2); 692 if ((error = vn_open(&nd2, FREAD|FWRITE, 0)) != 0) { 693 pathbuf_destroy(pb2); 694 return error; 695 } 696 VOP_UNLOCK(nd2.ni_vp); 697 698 sc->sc_bs_vp = nd2.ni_vp; 699 700 if (nd2.ni_vp->v_type != VREG && nd2.ni_vp->v_type != VCHR) { 701 pathbuf_destroy(pb2); 702 return EINVAL; 703 } 704 pathbuf_destroy(pb2); 705 706 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { 707 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); 708 if (error) 709 return error; 710 } 711 if (sc->sc_bs_vp->v_type == VREG) { 712 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 713 if (fsbsize & (fsbsize-1)) /* No power of two */ 714 return EINVAL; 715 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; 716 sc->sc_bs_bshift++) 717 if (FSS_FSBSIZE(sc) == fsbsize) 718 break; 719 if (sc->sc_bs_bshift >= 32) 720 return EINVAL; 721 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 722 } else { 723 sc->sc_bs_bshift = DEV_BSHIFT; 724 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 725 } 726 727 return 0; 728 } 729 730 /* 731 * Create a snapshot. 732 */ 733 static int 734 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) 735 { 736 int len, error; 737 u_int32_t csize; 738 off_t bsize; 739 740 bsize = 0; /* XXX gcc */ 741 742 /* 743 * Open needed files. 744 */ 745 if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) 746 goto bad; 747 748 if (sc->sc_flags & FSS_PERSISTENT) { 749 fss_softc_alloc(sc); 750 sc->sc_flags |= FSS_ACTIVE; 751 return 0; 752 } 753 754 /* 755 * Set cluster size. Must be a power of two and 756 * a multiple of backing store block size. 757 */ 758 if (fss->fss_csize <= 0) 759 csize = MAXPHYS; 760 else 761 csize = fss->fss_csize; 762 if (bsize/csize > FSS_CLUSTER_MAX) 763 csize = bsize/FSS_CLUSTER_MAX+1; 764 765 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; 766 sc->sc_clshift++) 767 if (FSS_CLSIZE(sc) >= csize) 768 break; 769 if (sc->sc_clshift >= 32) { 770 error = EINVAL; 771 goto bad; 772 } 773 sc->sc_clmask = FSS_CLSIZE(sc)-1; 774 775 /* 776 * Set number of cache slots. 777 */ 778 if (FSS_CLSIZE(sc) <= 8192) 779 sc->sc_cache_size = 32; 780 else if (FSS_CLSIZE(sc) <= 65536) 781 sc->sc_cache_size = 8; 782 else 783 sc->sc_cache_size = 4; 784 785 /* 786 * Set number of clusters and size of last cluster. 787 */ 788 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; 789 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; 790 791 /* 792 * Set size of indirect table. 793 */ 794 len = sc->sc_clcount*sizeof(u_int32_t); 795 sc->sc_indir_size = FSS_BTOCL(sc, len)+1; 796 sc->sc_clnext = sc->sc_indir_size; 797 sc->sc_indir_cur = 0; 798 799 if ((error = fss_softc_alloc(sc)) != 0) 800 goto bad; 801 802 /* 803 * Activate the snapshot. 804 */ 805 806 if ((error = vfs_suspend(sc->sc_mount, 0)) != 0) 807 goto bad; 808 809 microtime(&sc->sc_time); 810 811 error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc); 812 if (error == 0) 813 sc->sc_flags |= FSS_ACTIVE; 814 815 vfs_resume(sc->sc_mount); 816 817 if (error != 0) 818 goto bad; 819 820 aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname); 821 aprint_debug_dev(sc->sc_dev, 822 "%u clusters of %u, %u cache slots, %u indir clusters\n", 823 sc->sc_clcount, FSS_CLSIZE(sc), 824 sc->sc_cache_size, sc->sc_indir_size); 825 826 return 0; 827 828 bad: 829 fss_softc_free(sc); 830 if (sc->sc_bs_vp != NULL) { 831 if (sc->sc_flags & FSS_PERSISTENT) 832 vrele(sc->sc_bs_vp); 833 else 834 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); 835 } 836 sc->sc_bs_vp = NULL; 837 838 return error; 839 } 840 841 /* 842 * Delete a snapshot. 843 */ 844 static int 845 fss_delete_snapshot(struct fss_softc *sc, struct lwp *l) 846 { 847 848 if ((sc->sc_flags & FSS_PERSISTENT) == 0) 849 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc); 850 851 mutex_enter(&sc->sc_slock); 852 sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR); 853 sc->sc_mount = NULL; 854 sc->sc_bdev = NODEV; 855 mutex_exit(&sc->sc_slock); 856 857 fss_softc_free(sc); 858 if (sc->sc_flags & FSS_PERSISTENT) 859 vrele(sc->sc_bs_vp); 860 else 861 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); 862 sc->sc_bs_vp = NULL; 863 sc->sc_flags &= ~FSS_PERSISTENT; 864 865 return 0; 866 } 867 868 /* 869 * Read a cluster from the snapshotted block device to the cache. 870 */ 871 static int 872 fss_read_cluster(struct fss_softc *sc, u_int32_t cl) 873 { 874 int error, todo, offset, len; 875 daddr_t dblk; 876 struct buf *bp, *mbp; 877 struct fss_cache *scp, *scl; 878 879 /* 880 * Get a free cache slot. 881 */ 882 scl = sc->sc_cache+sc->sc_cache_size; 883 884 mutex_enter(&sc->sc_slock); 885 886 restart: 887 if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { 888 mutex_exit(&sc->sc_slock); 889 return 0; 890 } 891 892 for (scp = sc->sc_cache; scp < scl; scp++) 893 if (scp->fc_cluster == cl) { 894 if (scp->fc_type == FSS_CACHE_VALID) { 895 mutex_exit(&sc->sc_slock); 896 return 0; 897 } else if (scp->fc_type == FSS_CACHE_BUSY) { 898 cv_wait(&scp->fc_state_cv, &sc->sc_slock); 899 goto restart; 900 } 901 } 902 903 for (scp = sc->sc_cache; scp < scl; scp++) 904 if (scp->fc_type == FSS_CACHE_FREE) { 905 scp->fc_type = FSS_CACHE_BUSY; 906 scp->fc_cluster = cl; 907 break; 908 } 909 if (scp >= scl) { 910 cv_wait(&sc->sc_cache_cv, &sc->sc_slock); 911 goto restart; 912 } 913 914 mutex_exit(&sc->sc_slock); 915 916 /* 917 * Start the read. 918 */ 919 dblk = btodb(FSS_CLTOB(sc, cl)); 920 if (cl == sc->sc_clcount-1) { 921 todo = sc->sc_clresid; 922 memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo); 923 } else 924 todo = FSS_CLSIZE(sc); 925 offset = 0; 926 mbp = getiobuf(NULL, true); 927 mbp->b_bufsize = todo; 928 mbp->b_data = scp->fc_data; 929 mbp->b_resid = mbp->b_bcount = todo; 930 mbp->b_flags = B_READ; 931 mbp->b_cflags = BC_BUSY; 932 mbp->b_dev = sc->sc_bdev; 933 while (todo > 0) { 934 len = todo; 935 if (len > MAXPHYS) 936 len = MAXPHYS; 937 if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo) 938 bp = mbp; 939 else { 940 bp = getiobuf(NULL, true); 941 nestiobuf_setup(mbp, bp, offset, len); 942 } 943 bp->b_lblkno = 0; 944 bp->b_blkno = dblk; 945 bdev_strategy(bp); 946 dblk += btodb(len); 947 offset += len; 948 todo -= len; 949 } 950 error = biowait(mbp); 951 putiobuf(mbp); 952 953 mutex_enter(&sc->sc_slock); 954 scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID); 955 cv_broadcast(&scp->fc_state_cv); 956 if (error == 0) { 957 setbit(sc->sc_copied, scp->fc_cluster); 958 cv_signal(&sc->sc_work_cv); 959 } 960 mutex_exit(&sc->sc_slock); 961 962 return error; 963 } 964 965 /* 966 * Read/write clusters from/to backing store. 967 * For persistent snapshots must be called with cl == 0. off is the 968 * offset into the snapshot. 969 */ 970 static int 971 fss_bs_io(struct fss_softc *sc, fss_io_type rw, 972 u_int32_t cl, off_t off, int len, void *data) 973 { 974 int error; 975 976 off += FSS_CLTOB(sc, cl); 977 978 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); 979 980 error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, 981 data, len, off, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, 982 sc->sc_bs_lwp->l_cred, NULL, NULL); 983 if (error == 0) { 984 mutex_enter(&sc->sc_bs_vp->v_interlock); 985 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), 986 round_page(off+len), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); 987 } 988 989 VOP_UNLOCK(sc->sc_bs_vp); 990 991 return error; 992 } 993 994 /* 995 * Get a pointer to the indirect slot for this cluster. 996 */ 997 static u_int32_t * 998 fss_bs_indir(struct fss_softc *sc, u_int32_t cl) 999 { 1000 u_int32_t icl; 1001 int ioff; 1002 1003 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1004 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1005 1006 if (sc->sc_indir_cur == icl) 1007 return &sc->sc_indir_data[ioff]; 1008 1009 if (sc->sc_indir_dirty) { 1010 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, 1011 FSS_CLSIZE(sc), (void *)sc->sc_indir_data) != 0) 1012 return NULL; 1013 setbit(sc->sc_indir_valid, sc->sc_indir_cur); 1014 } 1015 1016 sc->sc_indir_dirty = 0; 1017 sc->sc_indir_cur = icl; 1018 1019 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { 1020 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, 1021 FSS_CLSIZE(sc), (void *)sc->sc_indir_data) != 0) 1022 return NULL; 1023 } else 1024 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); 1025 1026 return &sc->sc_indir_data[ioff]; 1027 } 1028 1029 /* 1030 * The kernel thread (one for every active snapshot). 1031 * 1032 * After wakeup it cleans the cache and runs the I/O requests. 1033 */ 1034 static void 1035 fss_bs_thread(void *arg) 1036 { 1037 bool thread_idle, is_valid; 1038 int error, i, todo, len, crotor, is_read; 1039 long off; 1040 char *addr; 1041 u_int32_t c, cl, ch, *indirp; 1042 struct buf *bp, *nbp; 1043 struct fss_softc *sc; 1044 struct fss_cache *scp, *scl; 1045 1046 sc = arg; 1047 scl = sc->sc_cache+sc->sc_cache_size; 1048 crotor = 0; 1049 thread_idle = false; 1050 1051 mutex_enter(&sc->sc_slock); 1052 1053 for (;;) { 1054 if (thread_idle) 1055 cv_wait(&sc->sc_work_cv, &sc->sc_slock); 1056 thread_idle = true; 1057 if ((sc->sc_flags & FSS_BS_THREAD) == 0) { 1058 sc->sc_bs_lwp = NULL; 1059 mutex_exit(&sc->sc_slock); 1060 kthread_exit(0); 1061 } 1062 1063 /* 1064 * Process I/O requests (persistent) 1065 */ 1066 1067 if (sc->sc_flags & FSS_PERSISTENT) { 1068 if ((bp = bufq_get(sc->sc_bufq)) == NULL) 1069 continue; 1070 is_valid = FSS_ISVALID(sc); 1071 is_read = (bp->b_flags & B_READ); 1072 thread_idle = false; 1073 mutex_exit(&sc->sc_slock); 1074 1075 if (is_valid) { 1076 disk_busy(sc->sc_dkdev); 1077 error = fss_bs_io(sc, FSS_READ, 0, 1078 dbtob(bp->b_blkno), bp->b_bcount, 1079 bp->b_data); 1080 disk_unbusy(sc->sc_dkdev, 1081 (error ? 0 : bp->b_bcount), is_read); 1082 } else 1083 error = ENXIO; 1084 1085 bp->b_error = error; 1086 bp->b_resid = (error ? bp->b_bcount : 0); 1087 biodone(bp); 1088 1089 mutex_enter(&sc->sc_slock); 1090 continue; 1091 } 1092 1093 /* 1094 * Clean the cache 1095 */ 1096 for (i = 0; i < sc->sc_cache_size; i++) { 1097 crotor = (crotor + 1) % sc->sc_cache_size; 1098 scp = sc->sc_cache + crotor; 1099 if (scp->fc_type != FSS_CACHE_VALID) 1100 continue; 1101 mutex_exit(&sc->sc_slock); 1102 1103 thread_idle = false; 1104 indirp = fss_bs_indir(sc, scp->fc_cluster); 1105 if (indirp != NULL) { 1106 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, 1107 0, FSS_CLSIZE(sc), scp->fc_data); 1108 } else 1109 error = EIO; 1110 1111 mutex_enter(&sc->sc_slock); 1112 if (error == 0) { 1113 *indirp = sc->sc_clnext++; 1114 sc->sc_indir_dirty = 1; 1115 } else 1116 fss_error(sc, "write error on backing store"); 1117 1118 scp->fc_type = FSS_CACHE_FREE; 1119 cv_signal(&sc->sc_cache_cv); 1120 break; 1121 } 1122 1123 /* 1124 * Process I/O requests 1125 */ 1126 if ((bp = bufq_get(sc->sc_bufq)) == NULL) 1127 continue; 1128 is_valid = FSS_ISVALID(sc); 1129 is_read = (bp->b_flags & B_READ); 1130 thread_idle = false; 1131 1132 if (!is_valid) { 1133 mutex_exit(&sc->sc_slock); 1134 1135 bp->b_error = ENXIO; 1136 bp->b_resid = bp->b_bcount; 1137 biodone(bp); 1138 1139 mutex_enter(&sc->sc_slock); 1140 continue; 1141 } 1142 1143 disk_busy(sc->sc_dkdev); 1144 1145 /* 1146 * First read from the snapshotted block device unless 1147 * this request is completely covered by backing store. 1148 */ 1149 1150 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 1151 off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); 1152 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 1153 error = 0; 1154 bp->b_resid = 0; 1155 bp->b_error = 0; 1156 for (c = cl; c <= ch; c++) { 1157 if (isset(sc->sc_copied, c)) 1158 continue; 1159 mutex_exit(&sc->sc_slock); 1160 1161 /* Not on backing store, read from device. */ 1162 nbp = getiobuf(NULL, true); 1163 nbp->b_flags = B_READ; 1164 nbp->b_resid = nbp->b_bcount = bp->b_bcount; 1165 nbp->b_bufsize = bp->b_bcount; 1166 nbp->b_data = bp->b_data; 1167 nbp->b_blkno = bp->b_blkno; 1168 nbp->b_lblkno = 0; 1169 nbp->b_dev = sc->sc_bdev; 1170 SET(nbp->b_cflags, BC_BUSY); /* mark buffer busy */ 1171 1172 bdev_strategy(nbp); 1173 1174 error = biowait(nbp); 1175 if (error != 0) { 1176 bp->b_resid = bp->b_bcount; 1177 bp->b_error = nbp->b_error; 1178 disk_unbusy(sc->sc_dkdev, 0, is_read); 1179 biodone(bp); 1180 } 1181 putiobuf(nbp); 1182 1183 mutex_enter(&sc->sc_slock); 1184 break; 1185 } 1186 if (error) 1187 continue; 1188 1189 /* 1190 * Replace those parts that have been saved to backing store. 1191 */ 1192 1193 addr = bp->b_data; 1194 todo = bp->b_bcount; 1195 for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) { 1196 len = FSS_CLSIZE(sc)-off; 1197 if (len > todo) 1198 len = todo; 1199 if (isclr(sc->sc_copied, c)) 1200 continue; 1201 mutex_exit(&sc->sc_slock); 1202 1203 indirp = fss_bs_indir(sc, c); 1204 if (indirp == NULL || *indirp == 0) { 1205 /* 1206 * Not on backing store. Either in cache 1207 * or hole in the snapshotted block device. 1208 */ 1209 1210 mutex_enter(&sc->sc_slock); 1211 for (scp = sc->sc_cache; scp < scl; scp++) 1212 if (scp->fc_type == FSS_CACHE_VALID && 1213 scp->fc_cluster == c) 1214 break; 1215 if (scp < scl) 1216 memcpy(addr, (char *)scp->fc_data+off, 1217 len); 1218 else 1219 memset(addr, 0, len); 1220 continue; 1221 } 1222 1223 /* 1224 * Read from backing store. 1225 */ 1226 error = 1227 fss_bs_io(sc, FSS_READ, *indirp, off, len, addr); 1228 1229 mutex_enter(&sc->sc_slock); 1230 if (error) { 1231 bp->b_resid = bp->b_bcount; 1232 bp->b_error = error; 1233 break; 1234 } 1235 } 1236 mutex_exit(&sc->sc_slock); 1237 1238 disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read); 1239 biodone(bp); 1240 1241 mutex_enter(&sc->sc_slock); 1242 } 1243 } 1244 1245 #ifdef _MODULE 1246 1247 #include <sys/module.h> 1248 1249 MODULE(MODULE_CLASS_DRIVER, fss, NULL); 1250 CFDRIVER_DECL(fss, DV_DISK, NULL); 1251 1252 static int 1253 fss_modcmd(modcmd_t cmd, void *arg) 1254 { 1255 int bmajor = -1, cmajor = -1, error = 0; 1256 1257 switch (cmd) { 1258 case MODULE_CMD_INIT: 1259 mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE); 1260 error = config_cfdriver_attach(&fss_cd); 1261 if (error) { 1262 mutex_destroy(&fss_device_lock); 1263 break; 1264 } 1265 error = config_cfattach_attach(fss_cd.cd_name, &fss_ca); 1266 if (error) { 1267 config_cfdriver_detach(&fss_cd); 1268 mutex_destroy(&fss_device_lock); 1269 break; 1270 } 1271 error = devsw_attach(fss_cd.cd_name, 1272 &fss_bdevsw, &bmajor, &fss_cdevsw, &cmajor); 1273 if (error == EEXIST) 1274 error = 0; 1275 if (error) { 1276 config_cfattach_detach(fss_cd.cd_name, &fss_ca); 1277 config_cfdriver_detach(&fss_cd); 1278 mutex_destroy(&fss_device_lock); 1279 break; 1280 } 1281 break; 1282 1283 case MODULE_CMD_FINI: 1284 error = config_cfattach_detach(fss_cd.cd_name, &fss_ca); 1285 if (error) 1286 break; 1287 config_cfdriver_detach(&fss_cd); 1288 devsw_detach(&fss_bdevsw, &fss_cdevsw); 1289 mutex_destroy(&fss_device_lock); 1290 break; 1291 1292 default: 1293 error = ENOTTY; 1294 break; 1295 } 1296 1297 return error; 1298 } 1299 1300 #endif /* _MODULE */ 1301