1 /* $NetBSD: fss.c,v 1.22 2006/01/11 00:49:59 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * File system snapshot disk driver. 41 * 42 * Block/character interface to the snapshot of a mounted file system. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.22 2006/01/11 00:49:59 yamt Exp $"); 47 48 #include "fss.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/namei.h> 53 #include <sys/proc.h> 54 #include <sys/errno.h> 55 #include <sys/buf.h> 56 #include <sys/malloc.h> 57 #include <sys/ioctl.h> 58 #include <sys/disklabel.h> 59 #include <sys/device.h> 60 #include <sys/disk.h> 61 #include <sys/stat.h> 62 #include <sys/mount.h> 63 #include <sys/vnode.h> 64 #include <sys/file.h> 65 #include <sys/uio.h> 66 #include <sys/conf.h> 67 #include <sys/kthread.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <dev/fssvar.h> 72 73 #include <machine/stdarg.h> 74 75 #ifdef DEBUG 76 #define FSS_STATISTICS 77 #endif 78 79 #ifdef FSS_STATISTICS 80 struct fss_stat { 81 u_int64_t cow_calls; 82 u_int64_t cow_copied; 83 u_int64_t cow_cache_full; 84 u_int64_t indir_read; 85 u_int64_t indir_write; 86 }; 87 88 static struct fss_stat fss_stat[NFSS]; 89 90 #define FSS_STAT_INC(sc, field) \ 91 do { \ 92 fss_stat[sc->sc_unit].field++; \ 93 } while (0) 94 #define FSS_STAT_SET(sc, field, value) \ 95 do { \ 96 fss_stat[sc->sc_unit].field = value; \ 97 } while (0) 98 #define FSS_STAT_ADD(sc, field, value) \ 99 do { \ 100 fss_stat[sc->sc_unit].field += value; \ 101 } while (0) 102 #define FSS_STAT_VAL(sc, field) fss_stat[sc->sc_unit].field 103 #define FSS_STAT_CLEAR(sc) \ 104 do { \ 105 memset(&fss_stat[sc->sc_unit], 0, \ 106 sizeof(struct fss_stat)); \ 107 } while (0) 108 #else /* FSS_STATISTICS */ 109 #define FSS_STAT_INC(sc, field) 110 #define FSS_STAT_SET(sc, field, value) 111 #define FSS_STAT_ADD(sc, field, value) 112 #define FSS_STAT_CLEAR(sc) 113 #endif /* FSS_STATISTICS */ 114 115 static struct fss_softc fss_softc[NFSS]; 116 117 void fssattach(int); 118 119 dev_type_open(fss_open); 120 dev_type_close(fss_close); 121 dev_type_read(fss_read); 122 dev_type_write(fss_write); 123 dev_type_ioctl(fss_ioctl); 124 dev_type_strategy(fss_strategy); 125 dev_type_dump(fss_dump); 126 dev_type_size(fss_size); 127 128 static int fss_copy_on_write(void *, struct buf *); 129 static inline void fss_error(struct fss_softc *, const char *, ...); 130 static int fss_create_files(struct fss_softc *, struct fss_set *, 131 off_t *, struct lwp *); 132 static int fss_create_snapshot(struct fss_softc *, struct fss_set *, 133 struct lwp *); 134 static int fss_delete_snapshot(struct fss_softc *, struct lwp *); 135 static int fss_softc_alloc(struct fss_softc *); 136 static void fss_softc_free(struct fss_softc *); 137 static void fss_cluster_iodone(struct buf *); 138 static void fss_read_cluster(struct fss_softc *, u_int32_t); 139 static void fss_bs_thread(void *); 140 static int fss_bs_io(struct fss_softc *, fss_io_type, 141 u_int32_t, off_t, int, caddr_t); 142 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); 143 144 const struct bdevsw fss_bdevsw = { 145 fss_open, fss_close, fss_strategy, fss_ioctl, 146 fss_dump, fss_size, D_DISK 147 }; 148 149 const struct cdevsw fss_cdevsw = { 150 fss_open, fss_close, fss_read, fss_write, fss_ioctl, 151 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 152 }; 153 154 void 155 fssattach(int num) 156 { 157 int i; 158 struct fss_softc *sc; 159 160 for (i = 0; i < NFSS; i++) { 161 sc = &fss_softc[i]; 162 sc->sc_unit = i; 163 sc->sc_bdev = NODEV; 164 simple_lock_init(&sc->sc_slock); 165 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 166 } 167 } 168 169 int 170 fss_open(dev_t dev, int flags, int mode, struct lwp *l) 171 { 172 int s, mflag; 173 struct fss_softc *sc; 174 175 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 176 177 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 178 return ENODEV; 179 180 FSS_LOCK(sc, s); 181 182 sc->sc_flags |= mflag; 183 184 FSS_UNLOCK(sc, s); 185 186 return 0; 187 } 188 189 int 190 fss_close(dev_t dev, int flags, int mode, struct lwp *l) 191 { 192 int s, mflag, error; 193 struct fss_softc *sc; 194 195 mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN); 196 197 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 198 return ENODEV; 199 200 FSS_LOCK(sc, s); 201 202 if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag) { 203 if ((sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0 && 204 (sc->sc_flags & FSS_ACTIVE) != 0) { 205 FSS_UNLOCK(sc, s); 206 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l); 207 if (error) 208 return error; 209 FSS_LOCK(sc, s); 210 } 211 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE; 212 } 213 214 sc->sc_flags &= ~mflag; 215 216 FSS_UNLOCK(sc, s); 217 218 return 0; 219 } 220 221 void 222 fss_strategy(struct buf *bp) 223 { 224 int s; 225 struct fss_softc *sc; 226 227 sc = FSS_DEV_TO_SOFTC(bp->b_dev); 228 229 FSS_LOCK(sc, s); 230 231 if ((bp->b_flags & B_READ) != B_READ || 232 sc == NULL || !FSS_ISVALID(sc)) { 233 234 FSS_UNLOCK(sc, s); 235 236 bp->b_error = (sc == NULL ? ENODEV : EROFS); 237 bp->b_flags |= B_ERROR; 238 bp->b_resid = bp->b_bcount; 239 biodone(bp); 240 return; 241 } 242 243 bp->b_rawblkno = bp->b_blkno; 244 BUFQ_PUT(sc->sc_bufq, bp); 245 wakeup(&sc->sc_bs_proc); 246 247 FSS_UNLOCK(sc, s); 248 } 249 250 int 251 fss_read(dev_t dev, struct uio *uio, int flags) 252 { 253 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); 254 } 255 256 int 257 fss_write(dev_t dev, struct uio *uio, int flags) 258 { 259 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); 260 } 261 262 int 263 fss_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 264 { 265 int s, error; 266 struct fss_softc *sc; 267 struct fss_set *fss = (struct fss_set *)data; 268 struct fss_get *fsg = (struct fss_get *)data; 269 270 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 271 return ENODEV; 272 273 FSS_LOCK(sc, s); 274 while ((sc->sc_flags & FSS_EXCL) == FSS_EXCL) { 275 error = ltsleep(sc, PRIBIO|PCATCH, "fsslock", 0, &sc->sc_slock); 276 if (error) { 277 FSS_UNLOCK(sc, s); 278 return error; 279 } 280 } 281 sc->sc_flags |= FSS_EXCL; 282 FSS_UNLOCK(sc, s); 283 284 switch (cmd) { 285 case FSSIOCSET: 286 if ((flag & FWRITE) == 0) 287 error = EPERM; 288 else if ((sc->sc_flags & FSS_ACTIVE) != 0) 289 error = EBUSY; 290 else 291 error = fss_create_snapshot(sc, fss, l); 292 break; 293 294 case FSSIOCCLR: 295 if ((flag & FWRITE) == 0) 296 error = EPERM; 297 else if ((sc->sc_flags & FSS_ACTIVE) == 0) 298 error = ENXIO; 299 else 300 error = fss_delete_snapshot(sc, l); 301 break; 302 303 case FSSIOCGET: 304 switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) { 305 case FSS_ACTIVE: 306 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 307 fsg->fsg_csize = FSS_CLSIZE(sc); 308 fsg->fsg_time = sc->sc_time; 309 fsg->fsg_mount_size = sc->sc_clcount; 310 fsg->fsg_bs_size = sc->sc_clnext; 311 error = 0; 312 break; 313 case FSS_PERSISTENT | FSS_ACTIVE: 314 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 315 fsg->fsg_csize = 0; 316 fsg->fsg_time = sc->sc_time; 317 fsg->fsg_mount_size = 0; 318 fsg->fsg_bs_size = 0; 319 error = 0; 320 break; 321 default: 322 error = ENXIO; 323 break; 324 } 325 break; 326 327 case FSSIOFSET: 328 sc->sc_uflags = *(int *)data; 329 error = 0; 330 break; 331 332 case FSSIOFGET: 333 *(int *)data = sc->sc_uflags; 334 error = 0; 335 break; 336 337 default: 338 error = EINVAL; 339 break; 340 } 341 342 FSS_LOCK(sc, s); 343 sc->sc_flags &= ~FSS_EXCL; 344 FSS_UNLOCK(sc, s); 345 wakeup(sc); 346 347 return error; 348 } 349 350 int 351 fss_size(dev_t dev) 352 { 353 return -1; 354 } 355 356 int 357 fss_dump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 358 { 359 return EROFS; 360 } 361 362 /* 363 * An error occurred reading or writing the snapshot or backing store. 364 * If it is the first error log to console. 365 * The caller holds the simplelock. 366 */ 367 static inline void 368 fss_error(struct fss_softc *sc, const char *fmt, ...) 369 { 370 va_list ap; 371 372 if ((sc->sc_flags & (FSS_ACTIVE|FSS_ERROR)) == FSS_ACTIVE) { 373 va_start(ap, fmt); 374 printf("fss%d: snapshot invalid: ", sc->sc_unit); 375 vprintf(fmt, ap); 376 printf("\n"); 377 va_end(ap); 378 } 379 if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE) 380 sc->sc_flags |= FSS_ERROR; 381 } 382 383 /* 384 * Allocate the variable sized parts of the softc and 385 * fork the kernel thread. 386 * 387 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size 388 * must be initialized. 389 */ 390 static int 391 fss_softc_alloc(struct fss_softc *sc) 392 { 393 int i, len, error; 394 395 len = (sc->sc_clcount+NBBY-1)/NBBY; 396 sc->sc_copied = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 397 if (sc->sc_copied == NULL) 398 return(ENOMEM); 399 400 len = sc->sc_cache_size*sizeof(struct fss_cache); 401 sc->sc_cache = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 402 if (sc->sc_cache == NULL) 403 return(ENOMEM); 404 405 len = FSS_CLSIZE(sc); 406 for (i = 0; i < sc->sc_cache_size; i++) { 407 sc->sc_cache[i].fc_type = FSS_CACHE_FREE; 408 sc->sc_cache[i].fc_softc = sc; 409 sc->sc_cache[i].fc_xfercount = 0; 410 sc->sc_cache[i].fc_data = malloc(len, M_TEMP, 411 M_WAITOK|M_CANFAIL); 412 if (sc->sc_cache[i].fc_data == NULL) 413 return(ENOMEM); 414 } 415 416 len = (sc->sc_indir_size+NBBY-1)/NBBY; 417 sc->sc_indir_valid = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 418 if (sc->sc_indir_valid == NULL) 419 return(ENOMEM); 420 421 len = FSS_CLSIZE(sc); 422 sc->sc_indir_data = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 423 if (sc->sc_indir_data == NULL) 424 return(ENOMEM); 425 426 if ((error = kthread_create1(fss_bs_thread, sc, &sc->sc_bs_proc, 427 "fssbs%d", sc->sc_unit)) != 0) 428 return error; 429 430 sc->sc_flags |= FSS_BS_THREAD; 431 return 0; 432 } 433 434 /* 435 * Free the variable sized parts of the softc. 436 */ 437 static void 438 fss_softc_free(struct fss_softc *sc) 439 { 440 int s, i; 441 442 if ((sc->sc_flags & FSS_BS_THREAD) != 0) { 443 FSS_LOCK(sc, s); 444 sc->sc_flags &= ~FSS_BS_THREAD; 445 wakeup(&sc->sc_bs_proc); 446 while (sc->sc_bs_proc != NULL) 447 ltsleep(&sc->sc_bs_proc, PRIBIO, "fssthread", 0, 448 &sc->sc_slock); 449 FSS_UNLOCK(sc, s); 450 } 451 452 if (sc->sc_copied != NULL) 453 free(sc->sc_copied, M_TEMP); 454 sc->sc_copied = NULL; 455 456 if (sc->sc_cache != NULL) { 457 for (i = 0; i < sc->sc_cache_size; i++) 458 if (sc->sc_cache[i].fc_data != NULL) 459 free(sc->sc_cache[i].fc_data, M_TEMP); 460 free(sc->sc_cache, M_TEMP); 461 } 462 sc->sc_cache = NULL; 463 464 if (sc->sc_indir_valid != NULL) 465 free(sc->sc_indir_valid, M_TEMP); 466 sc->sc_indir_valid = NULL; 467 468 if (sc->sc_indir_data != NULL) 469 free(sc->sc_indir_data, M_TEMP); 470 sc->sc_indir_data = NULL; 471 } 472 473 /* 474 * Check if an unmount is ok. If forced, set this snapshot into ERROR state. 475 */ 476 int 477 fss_umount_hook(struct mount *mp, int forced) 478 { 479 int i, s; 480 481 for (i = 0; i < NFSS; i++) { 482 FSS_LOCK(&fss_softc[i], s); 483 if ((fss_softc[i].sc_flags & FSS_ACTIVE) != 0 && 484 fss_softc[i].sc_mount == mp) { 485 if (forced) 486 fss_error(&fss_softc[i], "forced unmount"); 487 else { 488 FSS_UNLOCK(&fss_softc[i], s); 489 return EBUSY; 490 } 491 } 492 FSS_UNLOCK(&fss_softc[i], s); 493 } 494 495 return 0; 496 } 497 498 /* 499 * A buffer is written to the snapshotted block device. Copy to 500 * backing store if needed. 501 */ 502 static int 503 fss_copy_on_write(void *v, struct buf *bp) 504 { 505 int s; 506 u_int32_t cl, ch, c; 507 struct fss_softc *sc = v; 508 509 FSS_LOCK(sc, s); 510 if (!FSS_ISVALID(sc)) { 511 FSS_UNLOCK(sc, s); 512 return 0; 513 } 514 515 FSS_UNLOCK(sc, s); 516 517 FSS_STAT_INC(sc, cow_calls); 518 519 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 520 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 521 522 for (c = cl; c <= ch; c++) 523 fss_read_cluster(sc, c); 524 525 return 0; 526 } 527 528 /* 529 * Lookup and open needed files. 530 * 531 * For file system internal snapshot initializes sc_mntname, sc_mount, 532 * sc_bs_vp and sc_time. 533 * 534 * Otherwise returns dev and size of the underlying block device. 535 * Initializes sc_mntname, sc_mount_vp, sc_bdev, sc_bs_vp and sc_mount 536 */ 537 static int 538 fss_create_files(struct fss_softc *sc, struct fss_set *fss, 539 off_t *bsize, struct lwp *l) 540 { 541 int error, bits, fsbsize; 542 struct timespec ts; 543 struct partinfo dpart; 544 struct vattr va; 545 struct nameidata nd; 546 547 /* 548 * Get the mounted file system. 549 */ 550 551 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_mount, l); 552 if ((error = namei(&nd)) != 0) 553 return error; 554 555 if ((nd.ni_vp->v_flag & VROOT) != VROOT) { 556 vrele(nd.ni_vp); 557 return EINVAL; 558 } 559 560 sc->sc_mount = nd.ni_vp->v_mount; 561 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); 562 563 vrele(nd.ni_vp); 564 565 /* 566 * Check for file system internal snapshot. 567 */ 568 569 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 570 if ((error = namei(&nd)) != 0) 571 return error; 572 573 if (nd.ni_vp->v_type == VREG && nd.ni_vp->v_mount == sc->sc_mount) { 574 vrele(nd.ni_vp); 575 sc->sc_flags |= FSS_PERSISTENT; 576 577 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 578 if ((error = vn_open(&nd, FREAD, 0)) != 0) 579 return error; 580 sc->sc_bs_vp = nd.ni_vp; 581 582 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 583 bits = sizeof(sc->sc_bs_bshift)*NBBY; 584 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; 585 sc->sc_bs_bshift++) 586 if (FSS_FSBSIZE(sc) == fsbsize) 587 break; 588 if (sc->sc_bs_bshift >= bits) { 589 VOP_UNLOCK(sc->sc_bs_vp, 0); 590 return EINVAL; 591 } 592 593 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 594 sc->sc_clshift = 0; 595 596 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); 597 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); 598 599 VOP_UNLOCK(sc->sc_bs_vp, 0); 600 601 return error; 602 } 603 vrele(nd.ni_vp); 604 605 /* 606 * Get the block device it is mounted on. 607 */ 608 609 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, 610 sc->sc_mount->mnt_stat.f_mntfromname, l); 611 if ((error = namei(&nd)) != 0) 612 return error; 613 614 if (nd.ni_vp->v_type != VBLK) { 615 vrele(nd.ni_vp); 616 return EINVAL; 617 } 618 619 error = VOP_IOCTL(nd.ni_vp, DIOCGPART, &dpart, FREAD, 620 l->l_proc->p_ucred, l); 621 if (error) { 622 vrele(nd.ni_vp); 623 return error; 624 } 625 626 sc->sc_mount_vp = nd.ni_vp; 627 sc->sc_bdev = nd.ni_vp->v_rdev; 628 *bsize = (off_t)dpart.disklab->d_secsize*dpart.part->p_size; 629 vrele(nd.ni_vp); 630 631 /* 632 * Get the backing store 633 */ 634 635 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, l); 636 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 637 return error; 638 VOP_UNLOCK(nd.ni_vp, 0); 639 640 sc->sc_bs_vp = nd.ni_vp; 641 642 if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR) 643 return EINVAL; 644 645 if (sc->sc_bs_vp->v_type == VREG) { 646 error = VOP_GETATTR(sc->sc_bs_vp, &va, l->l_proc->p_ucred, l); 647 if (error != 0) 648 return error; 649 sc->sc_bs_size = va.va_size; 650 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 651 if (fsbsize & (fsbsize-1)) /* No power of two */ 652 return EINVAL; 653 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; 654 sc->sc_bs_bshift++) 655 if (FSS_FSBSIZE(sc) == fsbsize) 656 break; 657 if (sc->sc_bs_bshift >= 32) 658 return EINVAL; 659 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 660 } else { 661 sc->sc_bs_bshift = DEV_BSHIFT; 662 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 663 } 664 665 /* 666 * As all IO to from/to the backing store goes through 667 * VOP_STRATEGY() clean the buffer cache to prevent 668 * cache incoherencies. 669 */ 670 if ((error = vinvalbuf(sc->sc_bs_vp, V_SAVE, l->l_proc->p_ucred, l, 0, 0)) != 0) 671 return error; 672 673 return 0; 674 } 675 676 /* 677 * Create a snapshot. 678 */ 679 static int 680 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) 681 { 682 int len, error; 683 u_int32_t csize; 684 off_t bsize; 685 686 /* 687 * Open needed files. 688 */ 689 if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) 690 goto bad; 691 692 if (sc->sc_flags & FSS_PERSISTENT) { 693 fss_softc_alloc(sc); 694 sc->sc_flags |= FSS_ACTIVE; 695 return 0; 696 } 697 698 /* 699 * Set cluster size. Must be a power of two and 700 * a multiple of backing store block size. 701 */ 702 if (fss->fss_csize <= 0) 703 csize = MAXPHYS; 704 else 705 csize = fss->fss_csize; 706 if (bsize/csize > FSS_CLUSTER_MAX) 707 csize = bsize/FSS_CLUSTER_MAX+1; 708 709 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; 710 sc->sc_clshift++) 711 if (FSS_CLSIZE(sc) >= csize) 712 break; 713 if (sc->sc_clshift >= 32) { 714 error = EINVAL; 715 goto bad; 716 } 717 sc->sc_clmask = FSS_CLSIZE(sc)-1; 718 719 /* 720 * Set number of cache slots. 721 */ 722 if (FSS_CLSIZE(sc) <= 8192) 723 sc->sc_cache_size = 32; 724 else if (FSS_CLSIZE(sc) <= 65536) 725 sc->sc_cache_size = 8; 726 else 727 sc->sc_cache_size = 4; 728 729 /* 730 * Set number of clusters and size of last cluster. 731 */ 732 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; 733 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; 734 735 /* 736 * Set size of indirect table. 737 */ 738 len = sc->sc_clcount*sizeof(u_int32_t); 739 sc->sc_indir_size = FSS_BTOCL(sc, len)+1; 740 sc->sc_clnext = sc->sc_indir_size; 741 sc->sc_indir_cur = 0; 742 743 if ((error = fss_softc_alloc(sc)) != 0) 744 goto bad; 745 746 /* 747 * Activate the snapshot. 748 */ 749 750 if ((error = vfs_write_suspend(sc->sc_mount, PUSER|PCATCH, 0)) != 0) 751 goto bad; 752 753 microtime(&sc->sc_time); 754 755 if (error == 0) 756 error = vn_cow_establish(sc->sc_mount_vp, 757 fss_copy_on_write, sc); 758 if (error == 0) 759 sc->sc_flags |= FSS_ACTIVE; 760 761 vfs_write_resume(sc->sc_mount); 762 763 if (error != 0) 764 goto bad; 765 766 #ifdef DEBUG 767 printf("fss%d: %s snapshot active\n", sc->sc_unit, sc->sc_mntname); 768 printf("fss%d: %u clusters of %u, %u cache slots, %u indir clusters\n", 769 sc->sc_unit, sc->sc_clcount, FSS_CLSIZE(sc), 770 sc->sc_cache_size, sc->sc_indir_size); 771 #endif 772 773 return 0; 774 775 bad: 776 fss_softc_free(sc); 777 if (sc->sc_bs_vp != NULL) { 778 if (sc->sc_flags & FSS_PERSISTENT) 779 vn_close(sc->sc_bs_vp, FREAD, l->l_proc->p_ucred, l); 780 else 781 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_proc->p_ucred, l); 782 } 783 sc->sc_bs_vp = NULL; 784 785 return error; 786 } 787 788 /* 789 * Delete a snapshot. 790 */ 791 static int 792 fss_delete_snapshot(struct fss_softc *sc, struct lwp *l) 793 { 794 int s; 795 796 if ((sc->sc_flags & FSS_PERSISTENT) == 0) 797 vn_cow_disestablish(sc->sc_mount_vp, fss_copy_on_write, sc); 798 799 FSS_LOCK(sc, s); 800 sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR); 801 sc->sc_mount = NULL; 802 sc->sc_bdev = NODEV; 803 FSS_UNLOCK(sc, s); 804 805 fss_softc_free(sc); 806 if (sc->sc_flags & FSS_PERSISTENT) 807 vn_close(sc->sc_bs_vp, FREAD, l->l_proc->p_ucred, l); 808 else 809 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_proc->p_ucred, l); 810 sc->sc_bs_vp = NULL; 811 sc->sc_flags &= ~FSS_PERSISTENT; 812 813 FSS_STAT_CLEAR(sc); 814 815 return 0; 816 } 817 818 /* 819 * A read from the snapshotted block device has completed. 820 */ 821 static void 822 fss_cluster_iodone(struct buf *bp) 823 { 824 int s; 825 struct fss_cache *scp = bp->b_private; 826 827 KASSERT(bp->b_vp == NULL); 828 829 FSS_LOCK(scp->fc_softc, s); 830 831 if (bp->b_flags & B_ERROR) 832 fss_error(scp->fc_softc, "fs read error %d", bp->b_error); 833 834 if (--scp->fc_xfercount == 0) 835 wakeup(&scp->fc_data); 836 837 FSS_UNLOCK(scp->fc_softc, s); 838 839 putiobuf(bp); 840 } 841 842 /* 843 * Read a cluster from the snapshotted block device to the cache. 844 */ 845 static void 846 fss_read_cluster(struct fss_softc *sc, u_int32_t cl) 847 { 848 int s, todo, len; 849 caddr_t addr; 850 daddr_t dblk; 851 struct buf *bp; 852 struct fss_cache *scp, *scl; 853 854 /* 855 * Get a free cache slot. 856 */ 857 scl = sc->sc_cache+sc->sc_cache_size; 858 859 FSS_LOCK(sc, s); 860 861 restart: 862 if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { 863 FSS_UNLOCK(sc, s); 864 return; 865 } 866 867 for (scp = sc->sc_cache; scp < scl; scp++) 868 if (scp->fc_type != FSS_CACHE_FREE && 869 scp->fc_cluster == cl) { 870 ltsleep(&scp->fc_type, PRIBIO, "cowwait2", 0, 871 &sc->sc_slock); 872 goto restart; 873 } 874 875 for (scp = sc->sc_cache; scp < scl; scp++) 876 if (scp->fc_type == FSS_CACHE_FREE) { 877 scp->fc_type = FSS_CACHE_BUSY; 878 scp->fc_cluster = cl; 879 break; 880 } 881 if (scp >= scl) { 882 FSS_STAT_INC(sc, cow_cache_full); 883 ltsleep(&sc->sc_cache, PRIBIO, "cowwait3", 0, &sc->sc_slock); 884 goto restart; 885 } 886 887 FSS_UNLOCK(sc, s); 888 889 /* 890 * Start the read. 891 */ 892 FSS_STAT_INC(sc, cow_copied); 893 894 dblk = btodb(FSS_CLTOB(sc, cl)); 895 addr = scp->fc_data; 896 if (cl == sc->sc_clcount-1) { 897 todo = sc->sc_clresid; 898 memset(addr+todo, 0, FSS_CLSIZE(sc)-todo); 899 } else 900 todo = FSS_CLSIZE(sc); 901 while (todo > 0) { 902 len = todo; 903 if (len > MAXPHYS) 904 len = MAXPHYS; 905 906 bp = getiobuf(); 907 bp->b_flags = B_READ|B_CALL; 908 bp->b_bcount = len; 909 bp->b_bufsize = bp->b_bcount; 910 bp->b_error = 0; 911 bp->b_data = addr; 912 bp->b_blkno = dblk; 913 bp->b_proc = NULL; 914 bp->b_dev = sc->sc_bdev; 915 bp->b_vp = NULLVP; 916 bp->b_private = scp; 917 bp->b_iodone = fss_cluster_iodone; 918 919 DEV_STRATEGY(bp); 920 921 FSS_LOCK(sc, s); 922 scp->fc_xfercount++; 923 FSS_UNLOCK(sc, s); 924 925 dblk += btodb(len); 926 addr += len; 927 todo -= len; 928 } 929 930 /* 931 * Wait for all read requests to complete. 932 */ 933 FSS_LOCK(sc, s); 934 while (scp->fc_xfercount > 0) 935 ltsleep(&scp->fc_data, PRIBIO, "cowwait", 0, &sc->sc_slock); 936 937 scp->fc_type = FSS_CACHE_VALID; 938 setbit(sc->sc_copied, scp->fc_cluster); 939 FSS_UNLOCK(sc, s); 940 941 wakeup(&sc->sc_bs_proc); 942 } 943 944 /* 945 * Read/write clusters from/to backing store. 946 * For persistent snapshots must be called with cl == 0. off is the 947 * offset into the snapshot. 948 */ 949 static int 950 fss_bs_io(struct fss_softc *sc, fss_io_type rw, 951 u_int32_t cl, off_t off, int len, caddr_t data) 952 { 953 int error; 954 955 off += FSS_CLTOB(sc, cl); 956 957 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); 958 959 error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, 960 data, len, off, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, 961 sc->sc_bs_proc->p_ucred, NULL, NULL); 962 if (error == 0) { 963 simple_lock(&sc->sc_bs_vp->v_interlock); 964 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), 965 round_page(off+len), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); 966 } 967 968 VOP_UNLOCK(sc->sc_bs_vp, 0); 969 970 return error; 971 } 972 973 /* 974 * Get a pointer to the indirect slot for this cluster. 975 */ 976 static u_int32_t * 977 fss_bs_indir(struct fss_softc *sc, u_int32_t cl) 978 { 979 u_int32_t icl; 980 int ioff; 981 982 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 983 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 984 985 if (sc->sc_indir_cur == icl) 986 return &sc->sc_indir_data[ioff]; 987 988 if (sc->sc_indir_dirty) { 989 FSS_STAT_INC(sc, indir_write); 990 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, 991 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 992 return NULL; 993 setbit(sc->sc_indir_valid, sc->sc_indir_cur); 994 } 995 996 sc->sc_indir_dirty = 0; 997 sc->sc_indir_cur = icl; 998 999 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { 1000 FSS_STAT_INC(sc, indir_read); 1001 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, 1002 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 1003 return NULL; 1004 } else 1005 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); 1006 1007 return &sc->sc_indir_data[ioff]; 1008 } 1009 1010 /* 1011 * The kernel thread (one for every active snapshot). 1012 * 1013 * After wakeup it cleans the cache and runs the I/O requests. 1014 */ 1015 static void 1016 fss_bs_thread(void *arg) 1017 { 1018 int error, len, nfreed, nio, s; 1019 long off; 1020 caddr_t addr; 1021 u_int32_t c, cl, ch, *indirp; 1022 struct buf *bp, *nbp; 1023 struct fss_softc *sc; 1024 struct fss_cache *scp, *scl; 1025 1026 sc = arg; 1027 1028 scl = sc->sc_cache+sc->sc_cache_size; 1029 1030 nbp = getiobuf(); 1031 1032 nfreed = nio = 1; /* Dont sleep the first time */ 1033 1034 FSS_LOCK(sc, s); 1035 1036 for (;;) { 1037 if (nfreed == 0 && nio == 0) 1038 ltsleep(&sc->sc_bs_proc, PVM-1, "fssbs", 0, 1039 &sc->sc_slock); 1040 1041 if ((sc->sc_flags & FSS_BS_THREAD) == 0) { 1042 sc->sc_bs_proc = NULL; 1043 wakeup(&sc->sc_bs_proc); 1044 1045 FSS_UNLOCK(sc, s); 1046 1047 putiobuf(nbp); 1048 #ifdef FSS_STATISTICS 1049 if ((sc->sc_flags & FSS_PERSISTENT) == 0) { 1050 printf("fss%d: cow called %" PRId64 " times," 1051 " copied %" PRId64 " clusters," 1052 " cache full %" PRId64 " times\n", 1053 sc->sc_unit, 1054 FSS_STAT_VAL(sc, cow_calls), 1055 FSS_STAT_VAL(sc, cow_copied), 1056 FSS_STAT_VAL(sc, cow_cache_full)); 1057 printf("fss%d: %" PRId64 " indir reads," 1058 " %" PRId64 " indir writes\n", 1059 sc->sc_unit, 1060 FSS_STAT_VAL(sc, indir_read), 1061 FSS_STAT_VAL(sc, indir_write)); 1062 } 1063 #endif /* FSS_STATISTICS */ 1064 kthread_exit(0); 1065 } 1066 1067 /* 1068 * Process I/O requests (persistent) 1069 */ 1070 1071 if (sc->sc_flags & FSS_PERSISTENT) { 1072 nfreed = nio = 0; 1073 1074 if ((bp = BUFQ_GET(sc->sc_bufq)) == NULL) 1075 continue; 1076 1077 nio++; 1078 1079 if (FSS_ISVALID(sc)) { 1080 FSS_UNLOCK(sc, s); 1081 1082 error = fss_bs_io(sc, FSS_READ, 0, 1083 dbtob(bp->b_blkno), bp->b_bcount, 1084 bp->b_data); 1085 1086 FSS_LOCK(sc, s); 1087 } else 1088 error = ENXIO; 1089 1090 if (error) { 1091 bp->b_error = error; 1092 bp->b_flags |= B_ERROR; 1093 bp->b_resid = bp->b_bcount; 1094 } 1095 biodone(bp); 1096 1097 continue; 1098 } 1099 1100 /* 1101 * Clean the cache 1102 */ 1103 nfreed = 0; 1104 for (scp = sc->sc_cache; scp < scl; scp++) { 1105 if (scp->fc_type != FSS_CACHE_VALID) 1106 continue; 1107 1108 FSS_UNLOCK(sc, s); 1109 1110 indirp = fss_bs_indir(sc, scp->fc_cluster); 1111 if (indirp != NULL) { 1112 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, 1113 0, FSS_CLSIZE(sc), scp->fc_data); 1114 } else 1115 error = EIO; 1116 1117 FSS_LOCK(sc, s); 1118 1119 if (error == 0) { 1120 *indirp = sc->sc_clnext++; 1121 sc->sc_indir_dirty = 1; 1122 } else 1123 fss_error(sc, "write bs error %d", error); 1124 1125 scp->fc_type = FSS_CACHE_FREE; 1126 nfreed++; 1127 wakeup(&scp->fc_type); 1128 } 1129 1130 if (nfreed) 1131 wakeup(&sc->sc_cache); 1132 1133 /* 1134 * Process I/O requests 1135 */ 1136 nio = 0; 1137 1138 if ((bp = BUFQ_GET(sc->sc_bufq)) == NULL) 1139 continue; 1140 1141 nio++; 1142 1143 if (!FSS_ISVALID(sc)) { 1144 bp->b_error = ENXIO; 1145 bp->b_flags |= B_ERROR; 1146 bp->b_resid = bp->b_bcount; 1147 biodone(bp); 1148 continue; 1149 } 1150 1151 /* 1152 * First read from the snapshotted block device. 1153 * XXX Split to only read those parts that have not 1154 * been saved to backing store? 1155 */ 1156 1157 FSS_UNLOCK(sc, s); 1158 1159 BUF_INIT(nbp); 1160 nbp->b_flags = B_READ; 1161 nbp->b_bcount = bp->b_bcount; 1162 nbp->b_bufsize = bp->b_bcount; 1163 nbp->b_error = 0; 1164 nbp->b_data = bp->b_data; 1165 nbp->b_blkno = bp->b_blkno; 1166 nbp->b_proc = bp->b_proc; 1167 nbp->b_dev = sc->sc_bdev; 1168 nbp->b_vp = NULLVP; 1169 1170 DEV_STRATEGY(nbp); 1171 1172 if (biowait(nbp) != 0) { 1173 bp->b_resid = bp->b_bcount; 1174 bp->b_error = nbp->b_error; 1175 bp->b_flags |= B_ERROR; 1176 biodone(bp); 1177 continue; 1178 } 1179 1180 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 1181 off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); 1182 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 1183 bp->b_resid = bp->b_bcount; 1184 addr = bp->b_data; 1185 1186 FSS_LOCK(sc, s); 1187 1188 /* 1189 * Replace those parts that have been saved to backing store. 1190 */ 1191 1192 for (c = cl; c <= ch; 1193 c++, off = 0, bp->b_resid -= len, addr += len) { 1194 len = FSS_CLSIZE(sc)-off; 1195 if (len > bp->b_resid) 1196 len = bp->b_resid; 1197 1198 if (isclr(sc->sc_copied, c)) 1199 continue; 1200 1201 FSS_UNLOCK(sc, s); 1202 1203 indirp = fss_bs_indir(sc, c); 1204 1205 FSS_LOCK(sc, s); 1206 1207 if (indirp == NULL || *indirp == 0) { 1208 /* 1209 * Not on backing store. Either in cache 1210 * or hole in the snapshotted block device. 1211 */ 1212 for (scp = sc->sc_cache; scp < scl; scp++) 1213 if (scp->fc_type == FSS_CACHE_VALID && 1214 scp->fc_cluster == c) 1215 break; 1216 if (scp < scl) 1217 memcpy(addr, scp->fc_data+off, len); 1218 else 1219 memset(addr, 0, len); 1220 continue; 1221 } 1222 /* 1223 * Read from backing store. 1224 */ 1225 1226 FSS_UNLOCK(sc, s); 1227 1228 if ((error = fss_bs_io(sc, FSS_READ, *indirp, 1229 off, len, addr)) != 0) { 1230 bp->b_resid = bp->b_bcount; 1231 bp->b_error = error; 1232 bp->b_flags |= B_ERROR; 1233 break; 1234 } 1235 1236 FSS_LOCK(sc, s); 1237 1238 } 1239 1240 biodone(bp); 1241 } 1242 } 1243