1 /* $NetBSD: fss.c,v 1.18 2005/12/11 12:20:53 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * File system snapshot disk driver. 41 * 42 * Block/character interface to the snapshot of a mounted file system. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.18 2005/12/11 12:20:53 christos Exp $"); 47 48 #include "fss.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/namei.h> 53 #include <sys/proc.h> 54 #include <sys/errno.h> 55 #include <sys/buf.h> 56 #include <sys/malloc.h> 57 #include <sys/ioctl.h> 58 #include <sys/disklabel.h> 59 #include <sys/device.h> 60 #include <sys/disk.h> 61 #include <sys/stat.h> 62 #include <sys/mount.h> 63 #include <sys/vnode.h> 64 #include <sys/file.h> 65 #include <sys/uio.h> 66 #include <sys/conf.h> 67 #include <sys/kthread.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <dev/fssvar.h> 72 73 #include <machine/stdarg.h> 74 75 #ifdef DEBUG 76 #define FSS_STATISTICS 77 #endif 78 79 #ifdef FSS_STATISTICS 80 struct fss_stat { 81 u_int64_t cow_calls; 82 u_int64_t cow_copied; 83 u_int64_t cow_cache_full; 84 u_int64_t indir_read; 85 u_int64_t indir_write; 86 }; 87 88 static struct fss_stat fss_stat[NFSS]; 89 90 #define FSS_STAT_INC(sc, field) \ 91 do { \ 92 fss_stat[sc->sc_unit].field++; \ 93 } while (0) 94 #define FSS_STAT_SET(sc, field, value) \ 95 do { \ 96 fss_stat[sc->sc_unit].field = value; \ 97 } while (0) 98 #define FSS_STAT_ADD(sc, field, value) \ 99 do { \ 100 fss_stat[sc->sc_unit].field += value; \ 101 } while (0) 102 #define FSS_STAT_VAL(sc, field) fss_stat[sc->sc_unit].field 103 #define FSS_STAT_CLEAR(sc) \ 104 do { \ 105 memset(&fss_stat[sc->sc_unit], 0, \ 106 sizeof(struct fss_stat)); \ 107 } while (0) 108 #else /* FSS_STATISTICS */ 109 #define FSS_STAT_INC(sc, field) 110 #define FSS_STAT_SET(sc, field, value) 111 #define FSS_STAT_ADD(sc, field, value) 112 #define FSS_STAT_CLEAR(sc) 113 #endif /* FSS_STATISTICS */ 114 115 static struct fss_softc fss_softc[NFSS]; 116 117 void fssattach(int); 118 119 dev_type_open(fss_open); 120 dev_type_close(fss_close); 121 dev_type_read(fss_read); 122 dev_type_write(fss_write); 123 dev_type_ioctl(fss_ioctl); 124 dev_type_strategy(fss_strategy); 125 dev_type_dump(fss_dump); 126 dev_type_size(fss_size); 127 128 static int fss_copy_on_write(void *, struct buf *); 129 static inline void fss_error(struct fss_softc *, const char *, ...); 130 static int fss_create_files(struct fss_softc *, struct fss_set *, 131 off_t *, struct lwp *); 132 static int fss_create_snapshot(struct fss_softc *, struct fss_set *, 133 struct lwp *); 134 static int fss_delete_snapshot(struct fss_softc *, struct lwp *); 135 static int fss_softc_alloc(struct fss_softc *); 136 static void fss_softc_free(struct fss_softc *); 137 static void fss_cluster_iodone(struct buf *); 138 static void fss_read_cluster(struct fss_softc *, u_int32_t); 139 static void fss_bs_thread(void *); 140 static int fss_bs_io(struct fss_softc *, fss_io_type, 141 u_int32_t, off_t, int, caddr_t); 142 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); 143 144 const struct bdevsw fss_bdevsw = { 145 fss_open, fss_close, fss_strategy, fss_ioctl, 146 fss_dump, fss_size, D_DISK 147 }; 148 149 const struct cdevsw fss_cdevsw = { 150 fss_open, fss_close, fss_read, fss_write, fss_ioctl, 151 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 152 }; 153 154 void 155 fssattach(int num) 156 { 157 int i; 158 struct fss_softc *sc; 159 160 for (i = 0; i < NFSS; i++) { 161 sc = &fss_softc[i]; 162 sc->sc_unit = i; 163 sc->sc_bdev = NODEV; 164 simple_lock_init(&sc->sc_slock); 165 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 166 } 167 } 168 169 int 170 fss_open(dev_t dev, int flags, int mode, struct lwp *l) 171 { 172 int s, mflag; 173 struct fss_softc *sc; 174 175 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 176 177 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 178 return ENODEV; 179 180 FSS_LOCK(sc, s); 181 182 sc->sc_flags |= mflag; 183 184 FSS_UNLOCK(sc, s); 185 186 return 0; 187 } 188 189 int 190 fss_close(dev_t dev, int flags, int mode, struct lwp *l) 191 { 192 int s, mflag, error; 193 struct fss_softc *sc; 194 195 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 196 197 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 198 return ENODEV; 199 200 FSS_LOCK(sc, s); 201 202 if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag) { 203 if ((sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0 && 204 (sc->sc_flags & FSS_ACTIVE) != 0) { 205 FSS_UNLOCK(sc, s); 206 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l); 207 if (error) 208 return error; 209 FSS_LOCK(sc, s); 210 } 211 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE; 212 } 213 214 sc->sc_flags &= ~mflag; 215 216 FSS_UNLOCK(sc, s); 217 218 return 0; 219 } 220 221 void 222 fss_strategy(struct buf *bp) 223 { 224 int s; 225 struct fss_softc *sc; 226 227 sc = FSS_DEV_TO_SOFTC(bp->b_dev); 228 229 FSS_LOCK(sc, s); 230 231 if ((bp->b_flags & B_READ) != B_READ || 232 sc == NULL || !FSS_ISVALID(sc)) { 233 234 FSS_UNLOCK(sc, s); 235 236 bp->b_error = (sc == NULL ? ENODEV : EROFS); 237 bp->b_flags |= B_ERROR; 238 bp->b_resid = bp->b_bcount; 239 biodone(bp); 240 return; 241 } 242 243 bp->b_rawblkno = bp->b_blkno; 244 BUFQ_PUT(sc->sc_bufq, bp); 245 wakeup(&sc->sc_bs_proc); 246 247 FSS_UNLOCK(sc, s); 248 } 249 250 int 251 fss_read(dev_t dev, struct uio *uio, int flags) 252 { 253 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); 254 } 255 256 int 257 fss_write(dev_t dev, struct uio *uio, int flags) 258 { 259 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); 260 } 261 262 int 263 fss_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 264 { 265 int s, error; 266 struct fss_softc *sc; 267 struct fss_set *fss = (struct fss_set *)data; 268 struct fss_get *fsg = (struct fss_get *)data; 269 270 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 271 return ENODEV; 272 273 FSS_LOCK(sc, s); 274 while ((sc->sc_flags & FSS_EXCL) == FSS_EXCL) { 275 error = ltsleep(sc, PRIBIO|PCATCH, "fsslock", 0, &sc->sc_slock); 276 if (error) { 277 FSS_UNLOCK(sc, s); 278 return error; 279 } 280 } 281 sc->sc_flags |= FSS_EXCL; 282 FSS_UNLOCK(sc, s); 283 284 switch (cmd) { 285 case FSSIOCSET: 286 if ((flag & FWRITE) == 0) 287 error = EPERM; 288 else if ((sc->sc_flags & FSS_ACTIVE) != 0) 289 error = EBUSY; 290 else 291 error = fss_create_snapshot(sc, fss, l); 292 break; 293 294 case FSSIOCCLR: 295 if ((flag & FWRITE) == 0) 296 error = EPERM; 297 else if ((sc->sc_flags & FSS_ACTIVE) == 0) 298 error = ENXIO; 299 else 300 error = fss_delete_snapshot(sc, l); 301 break; 302 303 case FSSIOCGET: 304 switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) { 305 case FSS_ACTIVE: 306 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 307 fsg->fsg_csize = FSS_CLSIZE(sc); 308 fsg->fsg_time = sc->sc_time; 309 fsg->fsg_mount_size = sc->sc_clcount; 310 fsg->fsg_bs_size = sc->sc_clnext; 311 error = 0; 312 break; 313 case FSS_PERSISTENT | FSS_ACTIVE: 314 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 315 fsg->fsg_csize = 0; 316 fsg->fsg_time = sc->sc_time; 317 fsg->fsg_mount_size = 0; 318 fsg->fsg_bs_size = 0; 319 error = 0; 320 break; 321 default: 322 error = ENXIO; 323 break; 324 } 325 break; 326 327 case FSSIOFSET: 328 sc->sc_uflags = *(int *)data; 329 error = 0; 330 break; 331 332 case FSSIOFGET: 333 *(int *)data = sc->sc_uflags; 334 error = 0; 335 break; 336 337 default: 338 error = EINVAL; 339 break; 340 } 341 342 FSS_LOCK(sc, s); 343 sc->sc_flags &= ~FSS_EXCL; 344 FSS_UNLOCK(sc, s); 345 wakeup(sc); 346 347 return error; 348 } 349 350 int 351 fss_size(dev_t dev) 352 { 353 return -1; 354 } 355 356 int 357 fss_dump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 358 { 359 return EROFS; 360 } 361 362 /* 363 * An error occurred reading or writing the snapshot or backing store. 364 * If it is the first error log to console. 365 * The caller holds the simplelock. 366 */ 367 static inline void 368 fss_error(struct fss_softc *sc, const char *fmt, ...) 369 { 370 va_list ap; 371 372 if ((sc->sc_flags & (FSS_ACTIVE|FSS_ERROR)) == FSS_ACTIVE) { 373 va_start(ap, fmt); 374 printf("fss%d: snapshot invalid: ", sc->sc_unit); 375 vprintf(fmt, ap); 376 printf("\n"); 377 va_end(ap); 378 } 379 if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE) 380 sc->sc_flags |= FSS_ERROR; 381 } 382 383 /* 384 * Allocate the variable sized parts of the softc and 385 * fork the kernel thread. 386 * 387 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size 388 * must be initialized. 389 */ 390 static int 391 fss_softc_alloc(struct fss_softc *sc) 392 { 393 int i, len, error; 394 395 len = (sc->sc_clcount+NBBY-1)/NBBY; 396 sc->sc_copied = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 397 if (sc->sc_copied == NULL) 398 return(ENOMEM); 399 400 len = sc->sc_cache_size*sizeof(struct fss_cache); 401 sc->sc_cache = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 402 if (sc->sc_cache == NULL) 403 return(ENOMEM); 404 405 len = FSS_CLSIZE(sc); 406 for (i = 0; i < sc->sc_cache_size; i++) { 407 sc->sc_cache[i].fc_type = FSS_CACHE_FREE; 408 sc->sc_cache[i].fc_softc = sc; 409 sc->sc_cache[i].fc_xfercount = 0; 410 sc->sc_cache[i].fc_data = malloc(len, M_TEMP, 411 M_WAITOK|M_CANFAIL); 412 if (sc->sc_cache[i].fc_data == NULL) 413 return(ENOMEM); 414 } 415 416 len = (sc->sc_indir_size+NBBY-1)/NBBY; 417 sc->sc_indir_valid = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 418 if (sc->sc_indir_valid == NULL) 419 return(ENOMEM); 420 421 len = FSS_CLSIZE(sc); 422 sc->sc_indir_data = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 423 if (sc->sc_indir_data == NULL) 424 return(ENOMEM); 425 426 if ((error = kthread_create1(fss_bs_thread, sc, &sc->sc_bs_proc, 427 "fssbs%d", sc->sc_unit)) != 0) 428 return error; 429 430 sc->sc_flags |= FSS_BS_THREAD; 431 return 0; 432 } 433 434 /* 435 * Free the variable sized parts of the softc. 436 */ 437 static void 438 fss_softc_free(struct fss_softc *sc) 439 { 440 int s, i; 441 442 if ((sc->sc_flags & FSS_BS_THREAD) != 0) { 443 FSS_LOCK(sc, s); 444 sc->sc_flags &= ~FSS_BS_THREAD; 445 wakeup(&sc->sc_bs_proc); 446 while (sc->sc_bs_proc != NULL) 447 ltsleep(&sc->sc_bs_proc, PRIBIO, "fssthread", 0, 448 &sc->sc_slock); 449 FSS_UNLOCK(sc, s); 450 } 451 452 if (sc->sc_copied != NULL) 453 free(sc->sc_copied, M_TEMP); 454 sc->sc_copied = NULL; 455 456 if (sc->sc_cache != NULL) { 457 for (i = 0; i < sc->sc_cache_size; i++) 458 if (sc->sc_cache[i].fc_data != NULL) 459 free(sc->sc_cache[i].fc_data, M_TEMP); 460 free(sc->sc_cache, M_TEMP); 461 } 462 sc->sc_cache = NULL; 463 464 if (sc->sc_indir_valid != NULL) 465 free(sc->sc_indir_valid, M_TEMP); 466 sc->sc_indir_valid = NULL; 467 468 if (sc->sc_indir_data != NULL) 469 free(sc->sc_indir_data, M_TEMP); 470 sc->sc_indir_data = NULL; 471 } 472 473 /* 474 * Check if an unmount is ok. If forced, set this snapshot into ERROR state. 475 */ 476 int 477 fss_umount_hook(struct mount *mp, int forced) 478 { 479 int i, s; 480 481 for (i = 0; i < NFSS; i++) { 482 FSS_LOCK(&fss_softc[i], s); 483 if ((fss_softc[i].sc_flags & FSS_ACTIVE) != 0 && 484 fss_softc[i].sc_mount == mp) { 485 if (forced) 486 fss_error(&fss_softc[i], "forced unmount"); 487 else { 488 FSS_UNLOCK(&fss_softc[i], s); 489 return EBUSY; 490 } 491 } 492 FSS_UNLOCK(&fss_softc[i], s); 493 } 494 495 return 0; 496 } 497 498 /* 499 * A buffer is written to the snapshotted block device. Copy to 500 * backing store if needed. 501 */ 502 static int 503 fss_copy_on_write(void *v, struct buf *bp) 504 { 505 int s; 506 u_int32_t cl, ch, c; 507 struct fss_softc *sc = v; 508 509 FSS_LOCK(sc, s); 510 if (!FSS_ISVALID(sc)) { 511 FSS_UNLOCK(sc, s); 512 return 0; 513 } 514 515 FSS_UNLOCK(sc, s); 516 517 FSS_STAT_INC(sc, cow_calls); 518 519 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 520 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 521 522 for (c = cl; c <= ch; c++) 523 fss_read_cluster(sc, c); 524 525 return 0; 526 } 527 528 /* 529 * Lookup and open needed files. 530 * 531 * For file system internal snapshot initializes sc_mntname, sc_mount, 532 * sc_bs_vp and sc_time. 533 * 534 * Otherwise returns dev and size of the underlying block device. 535 * Initializes sc_mntname, sc_mount_vp, sc_bdev, sc_bs_vp and sc_mount 536 */ 537 static int 538 fss_create_files(struct fss_softc *sc, struct fss_set *fss, 539 off_t *bsize, struct lwp *l) 540 { 541 int error, bits, fsbsize; 542 struct timespec ts; 543 struct partinfo dpart; 544 struct vattr va; 545 struct nameidata nd; 546 547 /* 548 * Get the mounted file system. 549 */ 550 551 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_mount, l); 552 if ((error = namei(&nd)) != 0) 553 return error; 554 555 if ((nd.ni_vp->v_flag & VROOT) != VROOT) { 556 vrele(nd.ni_vp); 557 return EINVAL; 558 } 559 560 sc->sc_mount = nd.ni_vp->v_mount; 561 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); 562 563 vrele(nd.ni_vp); 564 565 /* 566 * Check for file system internal snapshot. 567 */ 568 569 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 570 if ((error = namei(&nd)) != 0) 571 return error; 572 573 if (nd.ni_vp->v_type == VREG && nd.ni_vp->v_mount == sc->sc_mount) { 574 vrele(nd.ni_vp); 575 sc->sc_flags |= FSS_PERSISTENT; 576 577 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 578 if ((error = vn_open(&nd, FREAD, 0)) != 0) 579 return error; 580 sc->sc_bs_vp = nd.ni_vp; 581 582 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 583 bits = sizeof(sc->sc_bs_bshift)*NBBY; 584 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; 585 sc->sc_bs_bshift++) 586 if (FSS_FSBSIZE(sc) == fsbsize) 587 break; 588 if (sc->sc_bs_bshift >= bits) { 589 VOP_UNLOCK(sc->sc_bs_vp, 0); 590 return EINVAL; 591 } 592 593 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 594 sc->sc_clshift = 0; 595 596 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); 597 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); 598 599 VOP_UNLOCK(sc->sc_bs_vp, 0); 600 601 return error; 602 } 603 vrele(nd.ni_vp); 604 605 /* 606 * Get the block device it is mounted on. 607 */ 608 609 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, 610 sc->sc_mount->mnt_stat.f_mntfromname, l); 611 if ((error = namei(&nd)) != 0) 612 return error; 613 614 if (nd.ni_vp->v_type != VBLK) { 615 vrele(nd.ni_vp); 616 return EINVAL; 617 } 618 619 error = VOP_IOCTL(nd.ni_vp, DIOCGPART, &dpart, FREAD, 620 l->l_proc->p_ucred, l); 621 if (error) { 622 vrele(nd.ni_vp); 623 return error; 624 } 625 626 sc->sc_mount_vp = nd.ni_vp; 627 sc->sc_bdev = nd.ni_vp->v_rdev; 628 *bsize = (off_t)dpart.disklab->d_secsize*dpart.part->p_size; 629 vrele(nd.ni_vp); 630 631 /* 632 * Get the backing store 633 */ 634 635 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 636 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 637 return error; 638 VOP_UNLOCK(nd.ni_vp, 0); 639 640 sc->sc_bs_vp = nd.ni_vp; 641 642 if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR) 643 return EINVAL; 644 645 if (sc->sc_bs_vp->v_type == VREG) { 646 error = VOP_GETATTR(sc->sc_bs_vp, &va, l->l_proc->p_ucred, l); 647 if (error != 0) 648 return error; 649 sc->sc_bs_size = va.va_size; 650 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 651 if (fsbsize & (fsbsize-1)) /* No power of two */ 652 return EINVAL; 653 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; 654 sc->sc_bs_bshift++) 655 if (FSS_FSBSIZE(sc) == fsbsize) 656 break; 657 if (sc->sc_bs_bshift >= 32) 658 return EINVAL; 659 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 660 } else { 661 sc->sc_bs_bshift = DEV_BSHIFT; 662 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 663 } 664 665 /* 666 * As all IO to from/to the backing store goes through 667 * VOP_STRATEGY() clean the buffer cache to prevent 668 * cache incoherencies. 669 */ 670 if ((error = vinvalbuf(sc->sc_bs_vp, V_SAVE, l->l_proc->p_ucred, l, 0, 0)) != 0) 671 return error; 672 673 return 0; 674 } 675 676 /* 677 * Create a snapshot. 678 */ 679 static int 680 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) 681 { 682 int len, error; 683 u_int32_t csize; 684 off_t bsize; 685 686 /* 687 * Open needed files. 688 */ 689 if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) 690 goto bad; 691 692 if (sc->sc_flags & FSS_PERSISTENT) { 693 fss_softc_alloc(sc); 694 sc->sc_flags |= FSS_ACTIVE; 695 return 0; 696 } 697 698 /* 699 * Set cluster size. Must be a power of two and 700 * a multiple of backing store block size. 701 */ 702 if (fss->fss_csize <= 0) 703 csize = MAXPHYS; 704 else 705 csize = fss->fss_csize; 706 if (bsize/csize > FSS_CLUSTER_MAX) 707 csize = bsize/FSS_CLUSTER_MAX+1; 708 709 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; 710 sc->sc_clshift++) 711 if (FSS_CLSIZE(sc) >= csize) 712 break; 713 if (sc->sc_clshift >= 32) { 714 error = EINVAL; 715 goto bad; 716 } 717 sc->sc_clmask = FSS_CLSIZE(sc)-1; 718 719 /* 720 * Set number of cache slots. 721 */ 722 if (FSS_CLSIZE(sc) <= 8192) 723 sc->sc_cache_size = 32; 724 else if (FSS_CLSIZE(sc) <= 65536) 725 sc->sc_cache_size = 8; 726 else 727 sc->sc_cache_size = 4; 728 729 /* 730 * Set number of clusters and size of last cluster. 731 */ 732 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; 733 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; 734 735 /* 736 * Set size of indirect table. 737 */ 738 len = sc->sc_clcount*sizeof(u_int32_t); 739 sc->sc_indir_size = FSS_BTOCL(sc, len)+1; 740 sc->sc_clnext = sc->sc_indir_size; 741 sc->sc_indir_cur = 0; 742 743 if ((error = fss_softc_alloc(sc)) != 0) 744 goto bad; 745 746 /* 747 * Activate the snapshot. 748 */ 749 750 if ((error = vfs_write_suspend(sc->sc_mount, PUSER|PCATCH, 0)) != 0) 751 goto bad; 752 753 microtime(&sc->sc_time); 754 755 if (error == 0) 756 error = vn_cow_establish(sc->sc_mount_vp, 757 fss_copy_on_write, sc); 758 if (error == 0) 759 sc->sc_flags |= FSS_ACTIVE; 760 761 vfs_write_resume(sc->sc_mount); 762 763 if (error != 0) 764 goto bad; 765 766 #ifdef DEBUG 767 printf("fss%d: %s snapshot active\n", sc->sc_unit, sc->sc_mntname); 768 printf("fss%d: %u clusters of %u, %u cache slots, %u indir clusters\n", 769 sc->sc_unit, sc->sc_clcount, FSS_CLSIZE(sc), 770 sc->sc_cache_size, sc->sc_indir_size); 771 #endif 772 773 return 0; 774 775 bad: 776 fss_softc_free(sc); 777 if (sc->sc_bs_vp != NULL) { 778 if (sc->sc_flags & FSS_PERSISTENT) 779 vn_close(sc->sc_bs_vp, FREAD, l->l_proc->p_ucred, l); 780 else 781 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_proc->p_ucred, l); 782 } 783 sc->sc_bs_vp = NULL; 784 785 return error; 786 } 787 788 /* 789 * Delete a snapshot. 790 */ 791 static int 792 fss_delete_snapshot(struct fss_softc *sc, struct lwp *l) 793 { 794 int s; 795 796 if ((sc->sc_flags & FSS_PERSISTENT) == 0) 797 vn_cow_disestablish(sc->sc_mount_vp, fss_copy_on_write, sc); 798 799 FSS_LOCK(sc, s); 800 sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR); 801 sc->sc_mount = NULL; 802 sc->sc_bdev = NODEV; 803 FSS_UNLOCK(sc, s); 804 805 fss_softc_free(sc); 806 if (sc->sc_flags & FSS_PERSISTENT) 807 vn_close(sc->sc_bs_vp, FREAD, l->l_proc->p_ucred, l); 808 else 809 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_proc->p_ucred, l); 810 sc->sc_bs_vp = NULL; 811 sc->sc_flags &= ~FSS_PERSISTENT; 812 813 FSS_STAT_CLEAR(sc); 814 815 return 0; 816 } 817 818 /* 819 * A read from the snapshotted block device has completed. 820 */ 821 static void 822 fss_cluster_iodone(struct buf *bp) 823 { 824 int s; 825 struct fss_cache *scp = bp->b_private; 826 827 FSS_LOCK(scp->fc_softc, s); 828 829 if (bp->b_flags & B_EINTR) 830 fss_error(scp->fc_softc, "fs read interrupted"); 831 if (bp->b_flags & B_ERROR) 832 fss_error(scp->fc_softc, "fs read error %d", bp->b_error); 833 834 if (bp->b_vp != NULL) 835 brelvp(bp); 836 837 if (--scp->fc_xfercount == 0) 838 wakeup(&scp->fc_data); 839 840 FSS_UNLOCK(scp->fc_softc, s); 841 842 s = splbio(); 843 pool_put(&bufpool, bp); 844 splx(s); 845 } 846 847 /* 848 * Read a cluster from the snapshotted block device to the cache. 849 */ 850 static void 851 fss_read_cluster(struct fss_softc *sc, u_int32_t cl) 852 { 853 int s, todo, len; 854 caddr_t addr; 855 daddr_t dblk; 856 struct buf *bp; 857 struct fss_cache *scp, *scl; 858 859 /* 860 * Get a free cache slot. 861 */ 862 scl = sc->sc_cache+sc->sc_cache_size; 863 864 FSS_LOCK(sc, s); 865 866 restart: 867 if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { 868 FSS_UNLOCK(sc, s); 869 return; 870 } 871 872 for (scp = sc->sc_cache; scp < scl; scp++) 873 if (scp->fc_type != FSS_CACHE_FREE && 874 scp->fc_cluster == cl) { 875 ltsleep(&scp->fc_type, PRIBIO, "cowwait2", 0, 876 &sc->sc_slock); 877 goto restart; 878 } 879 880 for (scp = sc->sc_cache; scp < scl; scp++) 881 if (scp->fc_type == FSS_CACHE_FREE) { 882 scp->fc_type = FSS_CACHE_BUSY; 883 scp->fc_cluster = cl; 884 break; 885 } 886 if (scp >= scl) { 887 FSS_STAT_INC(sc, cow_cache_full); 888 ltsleep(&sc->sc_cache, PRIBIO, "cowwait3", 0, &sc->sc_slock); 889 goto restart; 890 } 891 892 FSS_UNLOCK(sc, s); 893 894 /* 895 * Start the read. 896 */ 897 FSS_STAT_INC(sc, cow_copied); 898 899 dblk = btodb(FSS_CLTOB(sc, cl)); 900 addr = scp->fc_data; 901 if (cl == sc->sc_clcount-1) { 902 todo = sc->sc_clresid; 903 memset(addr+todo, 0, FSS_CLSIZE(sc)-todo); 904 } else 905 todo = FSS_CLSIZE(sc); 906 while (todo > 0) { 907 len = todo; 908 if (len > MAXPHYS) 909 len = MAXPHYS; 910 911 s = splbio(); 912 bp = pool_get(&bufpool, PR_WAITOK); 913 splx(s); 914 915 BUF_INIT(bp); 916 bp->b_flags = B_READ|B_CALL; 917 bp->b_bcount = len; 918 bp->b_bufsize = bp->b_bcount; 919 bp->b_error = 0; 920 bp->b_data = addr; 921 bp->b_blkno = bp->b_rawblkno = dblk; 922 bp->b_proc = NULL; 923 bp->b_dev = sc->sc_bdev; 924 bp->b_vp = NULLVP; 925 bp->b_private = scp; 926 bp->b_iodone = fss_cluster_iodone; 927 928 DEV_STRATEGY(bp); 929 930 FSS_LOCK(sc, s); 931 scp->fc_xfercount++; 932 FSS_UNLOCK(sc, s); 933 934 dblk += btodb(len); 935 addr += len; 936 todo -= len; 937 } 938 939 /* 940 * Wait for all read requests to complete. 941 */ 942 FSS_LOCK(sc, s); 943 while (scp->fc_xfercount > 0) 944 ltsleep(&scp->fc_data, PRIBIO, "cowwait", 0, &sc->sc_slock); 945 946 scp->fc_type = FSS_CACHE_VALID; 947 setbit(sc->sc_copied, scp->fc_cluster); 948 FSS_UNLOCK(sc, s); 949 950 wakeup(&sc->sc_bs_proc); 951 } 952 953 /* 954 * Read/write clusters from/to backing store. 955 * For persistent snapshots must be called with cl == 0. off is the 956 * offset into the snapshot. 957 */ 958 static int 959 fss_bs_io(struct fss_softc *sc, fss_io_type rw, 960 u_int32_t cl, off_t off, int len, caddr_t data) 961 { 962 int error; 963 964 off += FSS_CLTOB(sc, cl); 965 966 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); 967 968 error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, 969 data, len, off, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, 970 sc->sc_bs_proc->p_ucred, NULL, NULL); 971 if (error == 0) { 972 simple_lock(&sc->sc_bs_vp->v_interlock); 973 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), 974 round_page(off+len), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); 975 } 976 977 VOP_UNLOCK(sc->sc_bs_vp, 0); 978 979 return error; 980 } 981 982 /* 983 * Get a pointer to the indirect slot for this cluster. 984 */ 985 static u_int32_t * 986 fss_bs_indir(struct fss_softc *sc, u_int32_t cl) 987 { 988 u_int32_t icl; 989 int ioff; 990 991 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 992 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 993 994 if (sc->sc_indir_cur == icl) 995 return &sc->sc_indir_data[ioff]; 996 997 if (sc->sc_indir_dirty) { 998 FSS_STAT_INC(sc, indir_write); 999 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, 1000 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 1001 return NULL; 1002 setbit(sc->sc_indir_valid, sc->sc_indir_cur); 1003 } 1004 1005 sc->sc_indir_dirty = 0; 1006 sc->sc_indir_cur = icl; 1007 1008 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { 1009 FSS_STAT_INC(sc, indir_read); 1010 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, 1011 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 1012 return NULL; 1013 } else 1014 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); 1015 1016 return &sc->sc_indir_data[ioff]; 1017 } 1018 1019 /* 1020 * The kernel thread (one for every active snapshot). 1021 * 1022 * After wakeup it cleans the cache and runs the I/O requests. 1023 */ 1024 static void 1025 fss_bs_thread(void *arg) 1026 { 1027 int error, len, nfreed, nio, s; 1028 long off; 1029 caddr_t addr; 1030 u_int32_t c, cl, ch, *indirp; 1031 struct buf *bp, *nbp; 1032 struct fss_softc *sc; 1033 struct fss_cache *scp, *scl; 1034 1035 sc = arg; 1036 1037 scl = sc->sc_cache+sc->sc_cache_size; 1038 1039 s = splbio(); 1040 nbp = pool_get(&bufpool, PR_WAITOK); 1041 splx(s); 1042 1043 nfreed = nio = 1; /* Dont sleep the first time */ 1044 1045 FSS_LOCK(sc, s); 1046 1047 for (;;) { 1048 if (nfreed == 0 && nio == 0) 1049 ltsleep(&sc->sc_bs_proc, PVM-1, "fssbs", 0, 1050 &sc->sc_slock); 1051 1052 if ((sc->sc_flags & FSS_BS_THREAD) == 0) { 1053 sc->sc_bs_proc = NULL; 1054 wakeup(&sc->sc_bs_proc); 1055 1056 FSS_UNLOCK(sc, s); 1057 1058 s = splbio(); 1059 pool_put(&bufpool, nbp); 1060 splx(s); 1061 #ifdef FSS_STATISTICS 1062 if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 1063 printf("fss%d: cow called %" PRId64 " times," 1064 " copied %" PRId64 " clusters," 1065 " cache full %" PRId64 " times\n", 1066 sc->sc_unit, 1067 FSS_STAT_VAL(sc, cow_calls), 1068 FSS_STAT_VAL(sc, cow_copied), 1069 FSS_STAT_VAL(sc, cow_cache_full)); 1070 printf("fss%d: %" PRId64 " indir reads," 1071 " %" PRId64 " indir writes\n", 1072 sc->sc_unit, 1073 FSS_STAT_VAL(sc, indir_read), 1074 FSS_STAT_VAL(sc, indir_write)); 1075 } 1076 #endif /* FSS_STATISTICS */ 1077 kthread_exit(0); 1078 } 1079 1080 /* 1081 * Process I/O requests (persistent) 1082 */ 1083 1084 if (sc->sc_flags & FSS_PERSISTENT) { 1085 nfreed = nio = 0; 1086 1087 if ((bp = BUFQ_GET(sc->sc_bufq)) == NULL) 1088 continue; 1089 1090 nio++; 1091 1092 if (FSS_ISVALID(sc)) { 1093 FSS_UNLOCK(sc, s); 1094 1095 error = fss_bs_io(sc, FSS_READ, 0, 1096 dbtob(bp->b_blkno), bp->b_bcount, 1097 bp->b_data); 1098 1099 FSS_LOCK(sc, s); 1100 } else 1101 error = ENXIO; 1102 1103 if (error) { 1104 bp->b_error = error; 1105 bp->b_flags |= B_ERROR; 1106 bp->b_resid = bp->b_bcount; 1107 } 1108 biodone(bp); 1109 1110 continue; 1111 } 1112 1113 /* 1114 * Clean the cache 1115 */ 1116 nfreed = 0; 1117 for (scp = sc->sc_cache; scp < scl; scp++) { 1118 if (scp->fc_type != FSS_CACHE_VALID) 1119 continue; 1120 1121 FSS_UNLOCK(sc, s); 1122 1123 indirp = fss_bs_indir(sc, scp->fc_cluster); 1124 if (indirp != NULL) { 1125 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, 1126 0, FSS_CLSIZE(sc), scp->fc_data); 1127 } else 1128 error = EIO; 1129 1130 FSS_LOCK(sc, s); 1131 1132 if (error == 0) { 1133 *indirp = sc->sc_clnext++; 1134 sc->sc_indir_dirty = 1; 1135 } else 1136 fss_error(sc, "write bs error %d", error); 1137 1138 scp->fc_type = FSS_CACHE_FREE; 1139 nfreed++; 1140 wakeup(&scp->fc_type); 1141 } 1142 1143 if (nfreed) 1144 wakeup(&sc->sc_cache); 1145 1146 /* 1147 * Process I/O requests 1148 */ 1149 nio = 0; 1150 1151 if ((bp = BUFQ_GET(sc->sc_bufq)) == NULL) 1152 continue; 1153 1154 nio++; 1155 1156 if (!FSS_ISVALID(sc)) { 1157 bp->b_error = ENXIO; 1158 bp->b_flags |= B_ERROR; 1159 bp->b_resid = bp->b_bcount; 1160 biodone(bp); 1161 continue; 1162 } 1163 1164 /* 1165 * First read from the snapshotted block device. 1166 * XXX Split to only read those parts that have not 1167 * been saved to backing store? 1168 */ 1169 1170 FSS_UNLOCK(sc, s); 1171 1172 BUF_INIT(nbp); 1173 nbp->b_flags = B_READ; 1174 nbp->b_bcount = bp->b_bcount; 1175 nbp->b_bufsize = bp->b_bcount; 1176 nbp->b_error = 0; 1177 nbp->b_data = bp->b_data; 1178 nbp->b_blkno = nbp->b_rawblkno = bp->b_blkno; 1179 nbp->b_proc = bp->b_proc; 1180 nbp->b_dev = sc->sc_bdev; 1181 nbp->b_vp = NULLVP; 1182 1183 DEV_STRATEGY(nbp); 1184 1185 if (biowait(nbp) != 0) { 1186 bp->b_resid = bp->b_bcount; 1187 bp->b_error = nbp->b_error; 1188 bp->b_flags |= B_ERROR; 1189 biodone(bp); 1190 continue; 1191 } 1192 1193 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 1194 off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); 1195 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 1196 bp->b_resid = bp->b_bcount; 1197 addr = bp->b_data; 1198 1199 FSS_LOCK(sc, s); 1200 1201 /* 1202 * Replace those parts that have been saved to backing store. 1203 */ 1204 1205 for (c = cl; c <= ch; 1206 c++, off = 0, bp->b_resid -= len, addr += len) { 1207 len = FSS_CLSIZE(sc)-off; 1208 if (len > bp->b_resid) 1209 len = bp->b_resid; 1210 1211 if (isclr(sc->sc_copied, c)) 1212 continue; 1213 1214 FSS_UNLOCK(sc, s); 1215 1216 indirp = fss_bs_indir(sc, c); 1217 1218 FSS_LOCK(sc, s); 1219 1220 if (indirp == NULL || *indirp == 0) { 1221 /* 1222 * Not on backing store. Either in cache 1223 * or hole in the snapshotted block device. 1224 */ 1225 for (scp = sc->sc_cache; scp < scl; scp++) 1226 if (scp->fc_type == FSS_CACHE_VALID && 1227 scp->fc_cluster == c) 1228 break; 1229 if (scp < scl) 1230 memcpy(addr, scp->fc_data+off, len); 1231 else 1232 memset(addr, 0, len); 1233 continue; 1234 } 1235 /* 1236 * Read from backing store. 1237 */ 1238 1239 FSS_UNLOCK(sc, s); 1240 1241 if ((error = fss_bs_io(sc, FSS_READ, *indirp, 1242 off, len, addr)) != 0) { 1243 bp->b_resid = bp->b_bcount; 1244 bp->b_error = error; 1245 bp->b_flags |= B_ERROR; 1246 break; 1247 } 1248 1249 FSS_LOCK(sc, s); 1250 1251 } 1252 1253 biodone(bp); 1254 } 1255 } 1256