1 /* $NetBSD: fss.c,v 1.5 2004/01/25 18:06:48 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * File system snapshot disk driver. 41 * 42 * Block/character interface to the snapshot of a mounted file system. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.5 2004/01/25 18:06:48 hannken Exp $"); 47 48 #include "fss.h" 49 #include "opt_ddb.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/namei.h> 54 #include <sys/proc.h> 55 #include <sys/errno.h> 56 #include <sys/buf.h> 57 #include <sys/malloc.h> 58 #include <sys/ioctl.h> 59 #include <sys/disklabel.h> 60 #include <sys/device.h> 61 #include <sys/disk.h> 62 #include <sys/stat.h> 63 #include <sys/mount.h> 64 #include <sys/vnode.h> 65 #include <sys/file.h> 66 #include <sys/uio.h> 67 #include <sys/conf.h> 68 #include <sys/kthread.h> 69 70 #include <miscfs/specfs/specdev.h> 71 72 #include <dev/fssvar.h> 73 74 #include <machine/stdarg.h> 75 76 #if defined(DEBUG) && defined(DDB) 77 #include <ddb/ddbvar.h> 78 #include <machine/db_machdep.h> 79 #include <ddb/db_command.h> 80 #include <ddb/db_interface.h> 81 #endif 82 83 #ifdef DEBUG 84 #define FSS_STATISTICS 85 #endif 86 87 #ifdef FSS_STATISTICS 88 struct fss_stat { 89 u_int64_t cow_calls; 90 u_int64_t cow_copied; 91 u_int64_t cow_cache_full; 92 u_int64_t indir_read; 93 u_int64_t indir_write; 94 }; 95 96 static struct fss_stat fss_stat[NFSS]; 97 98 #define FSS_STAT_INC(sc, field) \ 99 do { \ 100 fss_stat[sc->sc_unit].field++; \ 101 } while (0) 102 #define FSS_STAT_SET(sc, field, value) \ 103 do { \ 104 fss_stat[sc->sc_unit].field = value; \ 105 } while (0) 106 #define FSS_STAT_ADD(sc, field, value) \ 107 do { \ 108 fss_stat[sc->sc_unit].field += value; \ 109 } while (0) 110 #define FSS_STAT_VAL(sc, field) fss_stat[sc->sc_unit].field 111 #define FSS_STAT_CLEAR(sc) \ 112 do { \ 113 memset(&fss_stat[sc->sc_unit], 0, \ 114 sizeof(struct fss_stat)); \ 115 } while (0) 116 #else /* FSS_STATISTICS */ 117 #define FSS_STAT_INC(sc, field) 118 #define FSS_STAT_SET(sc, field, value) 119 #define FSS_STAT_ADD(sc, field, value) 120 #define FSS_STAT_CLEAR(sc) 121 #endif /* FSS_STATISTICS */ 122 123 typedef enum { 124 FSS_READ, 125 FSS_WRITE 126 } fss_io_type; 127 128 void fssattach(int); 129 130 dev_type_open(fss_open); 131 dev_type_close(fss_close); 132 dev_type_read(fss_read); 133 dev_type_write(fss_write); 134 dev_type_ioctl(fss_ioctl); 135 dev_type_strategy(fss_strategy); 136 dev_type_dump(fss_dump); 137 dev_type_size(fss_size); 138 139 static inline void fss_error(struct fss_softc *, const char *, ...); 140 static int fss_create_snapshot(struct fss_softc *, struct fss_set *, 141 struct proc *); 142 static int fss_delete_snapshot(struct fss_softc *, struct proc *); 143 static int fss_softc_alloc(struct fss_softc *); 144 static void fss_softc_free(struct fss_softc *); 145 static void fss_cluster_iodone(struct buf *); 146 static void fss_read_cluster(struct fss_softc *, u_int32_t); 147 static int fss_write_cluster(struct fss_cache *, u_int32_t); 148 static void fss_bs_thread(void *); 149 static int fss_bmap(struct fss_softc *, off_t, int, 150 struct vnode **, daddr_t *, int *); 151 static int fss_bs_io(struct fss_softc *, fss_io_type, 152 u_int32_t, long, int, caddr_t); 153 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t); 154 155 const struct bdevsw fss_bdevsw = { 156 fss_open, fss_close, fss_strategy, fss_ioctl, 157 fss_dump, fss_size, D_DISK 158 }; 159 160 const struct cdevsw fss_cdevsw = { 161 fss_open, fss_close, fss_read, fss_write, fss_ioctl, 162 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 163 }; 164 165 void 166 fssattach(int num) 167 { 168 int i; 169 struct fss_softc *sc; 170 171 for (i = 0; i < NFSS; i++) { 172 sc = &fss_softc[i]; 173 sc->sc_unit = i; 174 sc->sc_bdev = NODEV; 175 simple_lock_init(&sc->sc_slock); 176 bufq_alloc(&sc->sc_bufq, BUFQ_FCFS|BUFQ_SORT_RAWBLOCK); 177 } 178 } 179 180 int 181 fss_open(dev_t dev, int flags, int mode, struct proc *p) 182 { 183 struct fss_softc *sc; 184 185 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 186 return ENODEV; 187 188 return 0; 189 } 190 191 int 192 fss_close(dev_t dev, int flags, int mode, struct proc *p) 193 { 194 struct fss_softc *sc; 195 196 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 197 return ENODEV; 198 199 return 0; 200 } 201 202 void 203 fss_strategy(struct buf *bp) 204 { 205 int s; 206 struct fss_softc *sc; 207 208 sc = FSS_DEV_TO_SOFTC(bp->b_dev); 209 210 FSS_LOCK(sc, s); 211 212 if ((bp->b_flags & B_READ) != B_READ || 213 sc == NULL || !FSS_ISVALID(sc)) { 214 215 FSS_UNLOCK(sc, s); 216 217 bp->b_error = (sc == NULL ? ENODEV : EROFS); 218 bp->b_flags |= B_ERROR; 219 bp->b_resid = bp->b_bcount; 220 biodone(bp); 221 return; 222 } 223 224 bp->b_rawblkno = bp->b_blkno; 225 BUFQ_PUT(&sc->sc_bufq, bp); 226 wakeup(&sc->sc_bs_proc); 227 228 FSS_UNLOCK(sc, s); 229 } 230 231 int 232 fss_read(dev_t dev, struct uio *uio, int flags) 233 { 234 return physio(fss_strategy, NULL, dev, B_READ, minphys, uio); 235 } 236 237 int 238 fss_write(dev_t dev, struct uio *uio, int flags) 239 { 240 return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio); 241 } 242 243 int 244 fss_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 245 { 246 int s, error; 247 struct fss_softc *sc; 248 struct fss_set *fss = (struct fss_set *)data; 249 struct fss_get *fsg = (struct fss_get *)data; 250 251 if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL) 252 return ENODEV; 253 254 FSS_LOCK(sc, s); 255 while ((sc->sc_flags & FSS_EXCL) == FSS_EXCL) { 256 error = ltsleep(sc, PRIBIO|PCATCH, "fsslock", 0, &sc->sc_slock); 257 if (error) { 258 FSS_UNLOCK(sc, s); 259 return error; 260 } 261 } 262 sc->sc_flags |= FSS_EXCL; 263 FSS_UNLOCK(sc, s); 264 265 error = EINVAL; 266 267 switch (cmd) { 268 case FSSIOCSET: 269 if ((flag & FWRITE) == 0) 270 error = EPERM; 271 else if ((sc->sc_flags & FSS_ACTIVE) != 0) 272 error = EBUSY; 273 else 274 error = fss_create_snapshot(sc, fss, p); 275 break; 276 277 case FSSIOCCLR: 278 if ((flag & FWRITE) == 0) 279 error = EPERM; 280 else if ((sc->sc_flags & FSS_ACTIVE) == 0) 281 error = ENXIO; 282 else 283 error = fss_delete_snapshot(sc, p); 284 break; 285 286 case FSSIOCGET: 287 if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE) { 288 memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN); 289 fsg->fsg_csize = FSS_CLSIZE(sc); 290 fsg->fsg_time = sc->sc_time; 291 fsg->fsg_mount_size = sc->sc_clcount; 292 fsg->fsg_bs_size = sc->sc_clnext; 293 error = 0; 294 } else 295 error = ENXIO; 296 break; 297 } 298 299 FSS_LOCK(sc, s); 300 sc->sc_flags &= ~FSS_EXCL; 301 FSS_UNLOCK(sc, s); 302 wakeup(sc); 303 304 return error; 305 } 306 307 int 308 fss_size(dev_t dev) 309 { 310 return -1; 311 } 312 313 int 314 fss_dump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 315 { 316 return EROFS; 317 } 318 319 /* 320 * An error occured reading or writing the snapshot or backing store. 321 * If it is the first error log to console. 322 * The caller holds the simplelock. 323 */ 324 static inline void 325 fss_error(struct fss_softc *sc, const char *fmt, ...) 326 { 327 va_list ap; 328 329 if ((sc->sc_flags & (FSS_ACTIVE|FSS_ERROR)) == FSS_ACTIVE) { 330 va_start(ap, fmt); 331 printf("fss%d: snapshot invalid: ", sc->sc_unit); 332 vprintf(fmt, ap); 333 printf("\n"); 334 va_end(ap); 335 } 336 if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE) 337 sc->sc_flags |= FSS_ERROR; 338 } 339 340 /* 341 * Allocate the variable sized parts of the softc and 342 * fork the kernel thread. 343 * 344 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size 345 * must be initialized. 346 */ 347 static int 348 fss_softc_alloc(struct fss_softc *sc) 349 { 350 int i, len, error; 351 352 len = (sc->sc_clcount+NBBY-1)/NBBY; 353 sc->sc_copied = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 354 if (sc->sc_copied == NULL) 355 return(ENOMEM); 356 357 len = sc->sc_cache_size*sizeof(struct fss_cache); 358 sc->sc_cache = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 359 if (sc->sc_cache == NULL) 360 return(ENOMEM); 361 362 len = FSS_CLSIZE(sc); 363 for (i = 0; i < sc->sc_cache_size; i++) { 364 sc->sc_cache[i].fc_type = FSS_CACHE_FREE; 365 sc->sc_cache[i].fc_softc = sc; 366 sc->sc_cache[i].fc_xfercount = 0; 367 sc->sc_cache[i].fc_data = malloc(len, M_TEMP, 368 M_WAITOK|M_CANFAIL); 369 if (sc->sc_cache[i].fc_data == NULL) 370 return(ENOMEM); 371 } 372 373 len = (sc->sc_indir_size+NBBY-1)/NBBY; 374 sc->sc_indir_valid = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 375 if (sc->sc_indir_valid == NULL) 376 return(ENOMEM); 377 378 len = FSS_CLSIZE(sc); 379 sc->sc_indir_data = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL); 380 if (sc->sc_indir_data == NULL) 381 return(ENOMEM); 382 383 if ((error = kthread_create1(fss_bs_thread, sc, &sc->sc_bs_proc, 384 "fssbs%d", sc->sc_unit)) != 0) 385 return error; 386 387 sc->sc_flags |= FSS_BS_THREAD; 388 return 0; 389 } 390 391 /* 392 * Free the variable sized parts of the softc. 393 */ 394 static void 395 fss_softc_free(struct fss_softc *sc) 396 { 397 int s, i; 398 399 if ((sc->sc_flags & FSS_BS_THREAD) != 0) { 400 FSS_LOCK(sc, s); 401 sc->sc_flags &= ~FSS_BS_THREAD; 402 wakeup(&sc->sc_bs_proc); 403 while (sc->sc_bs_proc != NULL) 404 ltsleep(&sc->sc_bs_proc, PRIBIO, "fssthread", 0, 405 &sc->sc_slock); 406 FSS_UNLOCK(sc, s); 407 } 408 409 if (sc->sc_copied != NULL) 410 free(sc->sc_copied, M_TEMP); 411 sc->sc_copied = NULL; 412 413 if (sc->sc_cache != NULL) { 414 for (i = 0; i < sc->sc_cache_size; i++) 415 if (sc->sc_cache[i].fc_data != NULL) 416 free(sc->sc_cache[i].fc_data, M_TEMP); 417 free(sc->sc_cache, M_TEMP); 418 } 419 sc->sc_cache = NULL; 420 421 if (sc->sc_indir_valid != NULL) 422 free(sc->sc_indir_valid, M_TEMP); 423 sc->sc_indir_valid = NULL; 424 425 if (sc->sc_indir_data != NULL) 426 free(sc->sc_indir_data, M_TEMP); 427 sc->sc_indir_data = NULL; 428 } 429 430 /* 431 * Check if an unmount is ok. If forced, set this snapshot into ERROR state. 432 */ 433 int 434 fss_umount_hook(struct mount *mp, int forced) 435 { 436 int i, s; 437 438 for (i = 0; i < NFSS; i++) { 439 FSS_LOCK(&fss_softc[i], s); 440 if ((fss_softc[i].sc_flags & FSS_ACTIVE) != 0 && 441 fss_softc[i].sc_mount == mp) { 442 if (forced) 443 fss_error(&fss_softc[i], "forced unmount"); 444 else { 445 FSS_UNLOCK(&fss_softc[i], s); 446 return EBUSY; 447 } 448 } 449 FSS_UNLOCK(&fss_softc[i], s); 450 } 451 452 return 0; 453 } 454 455 /* 456 * A buffer is written to the snapshotted block device. Copy to 457 * backing store if needed. 458 */ 459 void 460 fss_copy_on_write(struct fss_softc *sc, struct buf *bp) 461 { 462 int s; 463 u_int32_t cl, ch, c; 464 465 #ifdef DIAGNOSTIC 466 /* 467 * Buffer written on a suspended file system. This is always an error. 468 */ 469 if (sc->sc_mount && 470 (sc->sc_mount->mnt_iflag & IMNT_SUSPENDED) == IMNT_SUSPENDED) { 471 printf_nolog("fss%d: write while suspended, %lu@%" PRId64 "\n", 472 sc->sc_unit, bp->b_bcount, bp->b_blkno); 473 #if defined(DEBUG) && defined(DDB) 474 db_stack_trace_print((db_expr_t)__builtin_frame_address(0), 475 TRUE, 65535, "", printf_nolog); 476 #endif /* DEBUG && DDB */ 477 } 478 #endif /* DIAGNOSTIC */ 479 480 FSS_LOCK(sc, s); 481 if (!FSS_ISVALID(sc)) { 482 FSS_UNLOCK(sc, s); 483 return; 484 } 485 486 sc->sc_cowcount++; 487 488 FSS_UNLOCK(sc, s); 489 490 FSS_STAT_INC(sc, cow_calls); 491 492 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 493 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 494 495 for (c = cl; c <= ch; c++) 496 fss_read_cluster(sc, c); 497 498 FSS_LOCK(sc, s); 499 500 if (--sc->sc_cowcount == 0 && !FSS_ISVALID(sc)) 501 wakeup(&sc->sc_cowcount); 502 503 FSS_UNLOCK(sc, s); 504 } 505 506 /* 507 * Lookup and open needed files. 508 * 509 * Returns dev and size of the underlying block device. 510 * Initializes the fields sc_mntname, sc_bs_vp and sc_mount 511 */ 512 static int 513 fss_create_files(struct fss_softc *sc, struct fss_set *fss, 514 dev_t *bdev, off_t *bsize, struct proc *p) 515 { 516 int error, fsbsize; 517 struct partinfo dpart; 518 struct vattr va; 519 struct nameidata nd; 520 521 /* 522 * Get the mounted file system. 523 */ 524 525 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_mount, p); 526 if ((error = namei(&nd)) != 0) 527 return error; 528 529 vrele(nd.ni_vp); 530 531 if ((nd.ni_vp->v_flag & VROOT) != VROOT) 532 return EINVAL; 533 534 sc->sc_mount = nd.ni_vp->v_mount; 535 536 /* 537 * Get the block device it is mounted on. 538 */ 539 540 memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); 541 542 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, 543 sc->sc_mount->mnt_stat.f_mntfromname, p); 544 if ((error = namei(&nd)) != 0) 545 return error; 546 547 if (nd.ni_vp->v_type != VBLK) { 548 vrele(nd.ni_vp); 549 return EINVAL; 550 } 551 552 error = VOP_IOCTL(nd.ni_vp, DIOCGPART, &dpart, FREAD, p->p_ucred, p); 553 if (error) { 554 vrele(nd.ni_vp); 555 return error; 556 } 557 558 *bdev = nd.ni_vp->v_rdev; 559 *bsize = (off_t)dpart.disklab->d_secsize*dpart.part->p_size; 560 vrele(nd.ni_vp); 561 562 /* 563 * Get the backing store 564 */ 565 566 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, p); 567 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 568 return error; 569 VOP_UNLOCK(nd.ni_vp, 0); 570 571 sc->sc_bs_vp = nd.ni_vp; 572 573 if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR) 574 return EINVAL; 575 576 if (sc->sc_bs_vp->v_type == VREG) { 577 error = VOP_GETATTR(sc->sc_bs_vp, &va, p->p_ucred, p); 578 if (error != 0) 579 return error; 580 sc->sc_bs_size = va.va_size; 581 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; 582 if (fsbsize & (fsbsize-1)) /* No power of two */ 583 return EINVAL; 584 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; 585 sc->sc_bs_bshift++) 586 if (FSS_FSBSIZE(sc) == fsbsize) 587 break; 588 if (sc->sc_bs_bshift >= 32) 589 return EINVAL; 590 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 591 sc->sc_flags |= FSS_BS_ALLOC; 592 } else { 593 sc->sc_bs_bshift = DEV_BSHIFT; 594 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; 595 sc->sc_flags &= ~FSS_BS_ALLOC; 596 } 597 598 /* 599 * As all IO to from/to the backing store goes through 600 * VOP_STRATEGY() clean the buffer cache to prevent 601 * cache incoherencies. 602 */ 603 if ((error = vinvalbuf(sc->sc_bs_vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) 604 return error; 605 606 return 0; 607 } 608 609 /* 610 * Create a snapshot. 611 */ 612 static int 613 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct proc *p) 614 { 615 int len, error; 616 u_int32_t csize; 617 dev_t bdev; 618 off_t bsize; 619 620 /* 621 * Open needed files. 622 */ 623 if ((error = fss_create_files(sc, fss, &bdev, &bsize, p)) != 0) 624 goto bad; 625 626 if (sc->sc_bs_vp->v_type == VREG && 627 sc->sc_bs_vp->v_mount == sc->sc_mount) { 628 /* XXX need persistent snapshot inside the file system: 629 * VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp); 630 * sc->sc_time = xtime(sc->sc_bs_vp); 631 * sc->sc_flags |= FSS_PERSISTENT; 632 * fss_softc_alloc(sc); 633 * sc->sc_flags |= FSS_ACTIVE; 634 */ 635 error = EDEADLK; 636 goto bad; 637 } 638 639 /* 640 * Set cluster size. Must be a power of two and 641 * a multiple of backing store block size. 642 */ 643 if (fss->fss_csize <= 0) 644 csize = MAXPHYS; 645 else 646 csize = fss->fss_csize; 647 if (bsize/csize > FSS_CLUSTER_MAX) 648 csize = bsize/FSS_CLUSTER_MAX+1; 649 650 for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; 651 sc->sc_clshift++) 652 if (FSS_CLSIZE(sc) >= csize) 653 break; 654 if (sc->sc_clshift >= 32) { 655 error = EINVAL; 656 goto bad; 657 } 658 sc->sc_clmask = FSS_CLSIZE(sc)-1; 659 660 /* 661 * Set number of cache slots. 662 */ 663 if (FSS_CLSIZE(sc) <= 8192) 664 sc->sc_cache_size = 32; 665 else if (FSS_CLSIZE(sc) <= 65536) 666 sc->sc_cache_size = 8; 667 else 668 sc->sc_cache_size = 4; 669 670 /* 671 * Set number of clusters and size of last cluster. 672 */ 673 sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1; 674 sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1; 675 676 /* 677 * Set size of indirect table. 678 */ 679 len = sc->sc_clcount*sizeof(u_int32_t); 680 sc->sc_indir_size = FSS_BTOCL(sc, len)+1; 681 sc->sc_clnext = sc->sc_indir_size; 682 sc->sc_indir_cur = 0; 683 684 if ((error = fss_softc_alloc(sc)) != 0) 685 goto bad; 686 687 /* 688 * Activate the snapshot. 689 */ 690 691 if ((error = vfs_write_suspend(sc->sc_mount, PUSER|PCATCH, 0)) != 0) 692 goto bad; 693 694 microtime(&sc->sc_time); 695 696 if (error == 0) { 697 sc->sc_flags |= FSS_ACTIVE; 698 sc->sc_bdev = bdev; 699 } 700 701 vfs_write_resume(sc->sc_mount); 702 703 if (error != 0) 704 goto bad; 705 706 #ifdef DEBUG 707 printf("fss%d: %s snapshot active\n", sc->sc_unit, sc->sc_mntname); 708 printf("fss%d: %u clusters of %u, %u cache slots, %u indir clusters\n", 709 sc->sc_unit, sc->sc_clcount, FSS_CLSIZE(sc), 710 sc->sc_cache_size, sc->sc_indir_size); 711 #endif 712 713 return 0; 714 715 bad: 716 fss_softc_free(sc); 717 if (sc->sc_bs_vp != NULL) 718 vn_close(sc->sc_bs_vp, FREAD|FWRITE, p->p_ucred, p); 719 sc->sc_bs_vp = NULL; 720 721 return error; 722 } 723 724 /* 725 * Delete a snapshot. 726 */ 727 static int 728 fss_delete_snapshot(struct fss_softc *sc, struct proc *p) 729 { 730 int s; 731 732 FSS_LOCK(sc, s); 733 734 sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR); 735 736 while (sc->sc_cowcount > 0) { 737 ltsleep(&sc->sc_cowcount, PRIBIO, "cowwait1", 0, &sc->sc_slock); 738 } 739 740 sc->sc_mount = NULL; 741 sc->sc_bdev = NODEV; 742 FSS_UNLOCK(sc, s); 743 744 fss_softc_free(sc); 745 vn_close(sc->sc_bs_vp, FREAD|FWRITE, p->p_ucred, p); 746 sc->sc_bs_vp = NULL; 747 748 FSS_STAT_CLEAR(sc); 749 750 return 0; 751 } 752 753 /* 754 * Get the block address and number of contiguous blocks. 755 * If the file contains a hole, try to allocate. 756 */ 757 static int 758 fss_bmap(struct fss_softc *sc, off_t start, int len, 759 struct vnode **vpp, daddr_t *bnp, int *runp) 760 { 761 int l, s, error; 762 struct buf *bp, **bpp; 763 764 if ((sc->sc_bs_vp->v_mount->mnt_flag & MNT_SOFTDEP) != 0) 765 bpp = &bp; 766 else 767 bpp = NULL; 768 769 vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); 770 771 error = VOP_BMAP(sc->sc_bs_vp, FSS_BTOFSB(sc, start), vpp, bnp, runp); 772 if ((error == 0 && *bnp != (daddr_t)-1) || 773 (sc->sc_flags & FSS_BS_ALLOC) == 0) 774 goto out; 775 776 if (start+len >= sc->sc_bs_size) { 777 error = ENOSPC; 778 goto out; 779 } 780 781 for (l = 0; l < len; l += FSS_FSBSIZE(sc)) { 782 error = VOP_BALLOC(sc->sc_bs_vp, start+l, FSS_FSBSIZE(sc), 783 sc->sc_bs_proc->p_ucred, 0, bpp); 784 if (error) 785 goto out; 786 787 if (bpp == NULL) 788 continue; 789 790 s = splbio(); 791 simple_lock(&bp->b_interlock); 792 793 if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_start) 794 (*bioops.io_start)(bp); 795 if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) 796 (*bioops.io_complete)(bp); 797 798 bp->b_flags |= B_INVAL; 799 simple_unlock(&bp->b_interlock); 800 splx(s); 801 802 brelse(bp); 803 } 804 805 error = VOP_BMAP(sc->sc_bs_vp, FSS_BTOFSB(sc, start), vpp, bnp, runp); 806 807 out: 808 809 VOP_UNLOCK(sc->sc_bs_vp, 0); 810 if (error == 0 && *bnp == (daddr_t)-1) 811 error = ENOSPC; 812 813 return error; 814 } 815 816 /* 817 * A read from the snapshotted block device has completed. 818 */ 819 static void 820 fss_cluster_iodone(struct buf *bp) 821 { 822 int s; 823 struct fss_cache *scp = bp->b_private; 824 825 FSS_LOCK(scp->fc_softc, s); 826 827 if (bp->b_flags & B_EINTR) 828 fss_error(scp->fc_softc, "fs read interrupted"); 829 if (bp->b_flags & B_ERROR) 830 fss_error(scp->fc_softc, "fs read error %d", bp->b_error); 831 832 if (bp->b_vp != NULL) 833 brelvp(bp); 834 835 if (--scp->fc_xfercount == 0) 836 wakeup(&scp->fc_data); 837 838 FSS_UNLOCK(scp->fc_softc, s); 839 840 s = splbio(); 841 pool_put(&bufpool, bp); 842 splx(s); 843 } 844 845 /* 846 * Read a cluster from the snapshotted block device to the cache. 847 */ 848 static void 849 fss_read_cluster(struct fss_softc *sc, u_int32_t cl) 850 { 851 int s, todo, len; 852 caddr_t addr; 853 daddr_t dblk; 854 struct buf *bp; 855 struct fss_cache *scp, *scl; 856 857 /* 858 * Get a free cache slot. 859 */ 860 scl = sc->sc_cache+sc->sc_cache_size; 861 862 FSS_LOCK(sc, s); 863 864 restart: 865 if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { 866 FSS_UNLOCK(sc, s); 867 return; 868 } 869 870 for (scp = sc->sc_cache; scp < scl; scp++) 871 if (scp->fc_type != FSS_CACHE_FREE && 872 scp->fc_cluster == cl) { 873 ltsleep(&scp->fc_type, PRIBIO, "cowwait2", 0, 874 &sc->sc_slock); 875 goto restart; 876 } 877 878 for (scp = sc->sc_cache; scp < scl; scp++) 879 if (scp->fc_type == FSS_CACHE_FREE) { 880 scp->fc_type = FSS_CACHE_BUSY; 881 scp->fc_cluster = cl; 882 break; 883 } 884 if (scp >= scl) { 885 FSS_STAT_INC(sc, cow_cache_full); 886 ltsleep(&sc->sc_cache, PRIBIO, "cowwait3", 0, &sc->sc_slock); 887 goto restart; 888 } 889 890 FSS_UNLOCK(sc, s); 891 892 /* 893 * Start the read. 894 */ 895 FSS_STAT_INC(sc, cow_copied); 896 897 dblk = btodb(FSS_CLTOB(sc, cl)); 898 addr = scp->fc_data; 899 if (cl == sc->sc_clcount-1) { 900 todo = sc->sc_clresid; 901 memset(addr+todo, 0, FSS_CLSIZE(sc)-todo); 902 } else 903 todo = FSS_CLSIZE(sc); 904 while (todo > 0) { 905 len = todo; 906 if (len > MAXPHYS) 907 len = MAXPHYS; 908 909 s = splbio(); 910 bp = pool_get(&bufpool, PR_WAITOK); 911 splx(s); 912 913 BUF_INIT(bp); 914 bp->b_flags = B_READ|B_CALL; 915 bp->b_bcount = len; 916 bp->b_bufsize = bp->b_bcount; 917 bp->b_error = 0; 918 bp->b_data = addr; 919 bp->b_blkno = bp->b_rawblkno = dblk; 920 bp->b_proc = NULL; 921 bp->b_dev = sc->sc_bdev; 922 bp->b_vp = NULLVP; 923 bp->b_private = scp; 924 bp->b_iodone = fss_cluster_iodone; 925 926 DEV_STRATEGY(bp); 927 928 FSS_LOCK(sc, s); 929 scp->fc_xfercount++; 930 FSS_UNLOCK(sc, s); 931 932 dblk += btodb(len); 933 addr += len; 934 todo -= len; 935 } 936 937 /* 938 * Wait for all read requests to complete. 939 */ 940 FSS_LOCK(sc, s); 941 while (scp->fc_xfercount > 0) 942 ltsleep(&scp->fc_data, PRIBIO, "cowwait", 0, &sc->sc_slock); 943 944 scp->fc_type = FSS_CACHE_VALID; 945 setbit(sc->sc_copied, scp->fc_cluster); 946 FSS_UNLOCK(sc, s); 947 948 wakeup(&sc->sc_bs_proc); 949 } 950 951 /* 952 * Write a cluster from the cache to the backing store. 953 */ 954 static int 955 fss_write_cluster(struct fss_cache *scp, u_int32_t cl) 956 { 957 int s, error, todo, len, nra; 958 daddr_t nbn; 959 caddr_t addr; 960 off_t pos; 961 struct buf *bp; 962 struct vnode *vp; 963 struct fss_softc *sc; 964 965 error = 0; 966 sc = scp->fc_softc; 967 968 pos = FSS_CLTOB(sc, cl); 969 addr = scp->fc_data; 970 todo = FSS_CLSIZE(sc); 971 972 while (todo > 0) { 973 error = fss_bmap(sc, pos, todo, &vp, &nbn, &nra); 974 if (error) 975 break; 976 977 len = FSS_FSBTOB(sc, nra+1)-FSS_FSBOFF(sc, pos); 978 if (len > todo) 979 len = todo; 980 981 s = splbio(); 982 bp = pool_get(&bufpool, PR_WAITOK); 983 splx(s); 984 985 BUF_INIT(bp); 986 bp->b_flags = B_CALL; 987 bp->b_bcount = len; 988 bp->b_bufsize = bp->b_bcount; 989 bp->b_error = 0; 990 bp->b_data = addr; 991 bp->b_blkno = bp->b_rawblkno = nbn+btodb(FSS_FSBOFF(sc, pos)); 992 bp->b_proc = NULL; 993 bp->b_vp = NULLVP; 994 bp->b_private = scp; 995 bp->b_iodone = fss_cluster_iodone; 996 bgetvp(vp, bp); 997 bp->b_vp->v_numoutput++; 998 999 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 1000 VOP_STRATEGY(vp, bp); 1001 1002 FSS_LOCK(sc, s); 1003 scp->fc_xfercount++; 1004 FSS_UNLOCK(sc, s); 1005 1006 pos += len; 1007 addr += len; 1008 todo -= len; 1009 } 1010 1011 /* 1012 * Wait for all write requests to complete. 1013 */ 1014 FSS_LOCK(sc, s); 1015 while (scp->fc_xfercount > 0) 1016 ltsleep(&scp->fc_data, PRIBIO, "bswwait", 0, &sc->sc_slock); 1017 FSS_UNLOCK(sc, s); 1018 1019 return error; 1020 } 1021 1022 /* 1023 * Read/write clusters from/to backing store. 1024 */ 1025 static int 1026 fss_bs_io(struct fss_softc *sc, fss_io_type rw, 1027 u_int32_t cl, long off, int len, caddr_t data) 1028 { 1029 int s, error, todo, count, nra; 1030 off_t pos; 1031 daddr_t nbn; 1032 struct buf *bp; 1033 struct vnode *vp; 1034 1035 todo = len; 1036 pos = FSS_CLTOB(sc, cl)+off; 1037 error = 0; 1038 1039 while (todo > 0) { 1040 error = fss_bmap(sc, pos, todo, &vp, &nbn, &nra); 1041 if (error) 1042 break; 1043 1044 count = FSS_FSBTOB(sc, nra+1)-FSS_FSBOFF(sc, pos); 1045 if (count > todo) 1046 count = todo; 1047 1048 s = splbio(); 1049 bp = pool_get(&bufpool, PR_WAITOK); 1050 splx(s); 1051 1052 BUF_INIT(bp); 1053 bp->b_flags = (rw == FSS_READ ? B_READ : 0); 1054 bp->b_bcount = count; 1055 bp->b_bufsize = bp->b_bcount; 1056 bp->b_error = 0; 1057 bp->b_data = data; 1058 bp->b_blkno = bp->b_rawblkno = nbn+btodb(FSS_FSBOFF(sc, pos)); 1059 bp->b_proc = NULL; 1060 bp->b_vp = NULLVP; 1061 bgetvp(vp, bp); 1062 if ((bp->b_flags & B_READ) == 0) 1063 bp->b_vp->v_numoutput++; 1064 1065 if ((bp->b_flags & B_READ) == 0 || cl < sc->sc_indir_size) 1066 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 1067 VOP_STRATEGY(vp, bp); 1068 1069 error = biowait(bp); 1070 1071 if (bp->b_vp != NULL) 1072 brelvp(bp); 1073 1074 s = splbio(); 1075 pool_put(&bufpool, bp); 1076 splx(s); 1077 1078 if (error) 1079 break; 1080 1081 todo -= count; 1082 data += count; 1083 pos += count; 1084 } 1085 1086 return error; 1087 } 1088 1089 /* 1090 * Get a pointer to the indirect slot for this cluster. 1091 */ 1092 static u_int32_t * 1093 fss_bs_indir(struct fss_softc *sc, u_int32_t cl) 1094 { 1095 u_int32_t icl; 1096 int ioff; 1097 1098 icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1099 ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t)); 1100 1101 if (sc->sc_indir_cur == icl) 1102 return &sc->sc_indir_data[ioff]; 1103 1104 if (sc->sc_indir_dirty) { 1105 FSS_STAT_INC(sc, indir_write); 1106 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0, 1107 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 1108 return NULL; 1109 setbit(sc->sc_indir_valid, sc->sc_indir_cur); 1110 } 1111 1112 sc->sc_indir_dirty = 0; 1113 sc->sc_indir_cur = icl; 1114 1115 if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) { 1116 FSS_STAT_INC(sc, indir_read); 1117 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0, 1118 FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0) 1119 return NULL; 1120 } else 1121 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc)); 1122 1123 return &sc->sc_indir_data[ioff]; 1124 } 1125 1126 /* 1127 * The kernel thread (one for every active snapshot). 1128 * 1129 * After wakeup it cleans the cache and runs the I/O requests. 1130 */ 1131 static void 1132 fss_bs_thread(void *arg) 1133 { 1134 int error, len, nfreed, nio, s; 1135 long off; 1136 caddr_t addr; 1137 u_int32_t c, cl, ch, *indirp; 1138 struct buf *bp, *nbp; 1139 struct fss_softc *sc; 1140 struct fss_cache *scp, *scl; 1141 1142 sc = arg; 1143 1144 scl = sc->sc_cache+sc->sc_cache_size; 1145 1146 s = splbio(); 1147 nbp = pool_get(&bufpool, PR_WAITOK); 1148 splx(s); 1149 1150 nfreed = nio = 1; /* Dont sleep the first time */ 1151 1152 FSS_LOCK(sc, s); 1153 1154 for (;;) { 1155 if (nfreed == 0 && nio == 0) 1156 ltsleep(&sc->sc_bs_proc, PVM-1, "fssbs", 0, 1157 &sc->sc_slock); 1158 1159 if ((sc->sc_flags & FSS_BS_THREAD) == 0) { 1160 sc->sc_bs_proc = NULL; 1161 wakeup(&sc->sc_bs_proc); 1162 1163 FSS_UNLOCK(sc, s); 1164 1165 s = splbio(); 1166 pool_put(&bufpool, nbp); 1167 splx(s); 1168 #ifdef FSS_STATISTICS 1169 printf("fss%d: cow called %" PRId64 " times," 1170 " copied %" PRId64 " clusters," 1171 " cache full %" PRId64 " times\n", 1172 sc->sc_unit, 1173 FSS_STAT_VAL(sc, cow_calls), 1174 FSS_STAT_VAL(sc, cow_copied), 1175 FSS_STAT_VAL(sc, cow_cache_full)); 1176 printf("fss%d: %" PRId64 " indir reads," 1177 " %" PRId64 " indir writes\n", 1178 sc->sc_unit, 1179 FSS_STAT_VAL(sc, indir_read), 1180 FSS_STAT_VAL(sc, indir_write)); 1181 #endif /* FSS_STATISTICS */ 1182 kthread_exit(0); 1183 } 1184 1185 /* 1186 * Clean the cache 1187 */ 1188 nfreed = 0; 1189 for (scp = sc->sc_cache; scp < scl; scp++) { 1190 if (scp->fc_type != FSS_CACHE_VALID) 1191 continue; 1192 1193 FSS_UNLOCK(sc, s); 1194 1195 indirp = fss_bs_indir(sc, scp->fc_cluster); 1196 if (indirp != NULL) { 1197 error = fss_write_cluster(scp, sc->sc_clnext); 1198 } else 1199 error = EIO; 1200 1201 FSS_LOCK(sc, s); 1202 1203 if (error == 0) { 1204 *indirp = sc->sc_clnext++; 1205 sc->sc_indir_dirty = 1; 1206 } else 1207 fss_error(sc, "write bs error %d", error); 1208 1209 scp->fc_type = FSS_CACHE_FREE; 1210 nfreed++; 1211 wakeup(&scp->fc_type); 1212 } 1213 1214 if (nfreed) 1215 wakeup(&sc->sc_cache); 1216 1217 /* 1218 * Process I/O requests 1219 */ 1220 nio = 0; 1221 1222 if ((bp = BUFQ_GET(&sc->sc_bufq)) == NULL) 1223 continue; 1224 1225 nio++; 1226 1227 if (!FSS_ISVALID(sc)) { 1228 bp->b_error = ENXIO; 1229 bp->b_flags |= B_ERROR; 1230 bp->b_resid = bp->b_bcount; 1231 biodone(bp); 1232 continue; 1233 } 1234 1235 /* 1236 * First read from the snapshotted block device. 1237 * XXX Split to only read those parts that have not 1238 * been saved to backing store? 1239 */ 1240 1241 FSS_UNLOCK(sc, s); 1242 1243 BUF_INIT(nbp); 1244 nbp->b_flags = B_READ; 1245 nbp->b_bcount = bp->b_bcount; 1246 nbp->b_bufsize = bp->b_bcount; 1247 nbp->b_error = 0; 1248 nbp->b_data = bp->b_data; 1249 nbp->b_blkno = nbp->b_rawblkno = bp->b_blkno; 1250 nbp->b_proc = bp->b_proc; 1251 nbp->b_dev = sc->sc_bdev; 1252 nbp->b_vp = NULLVP; 1253 1254 DEV_STRATEGY(nbp); 1255 1256 if (biowait(nbp) != 0) { 1257 bp->b_resid = bp->b_bcount; 1258 bp->b_error = nbp->b_error; 1259 bp->b_flags |= B_ERROR; 1260 biodone(bp); 1261 continue; 1262 } 1263 1264 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); 1265 off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); 1266 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); 1267 bp->b_resid = bp->b_bcount; 1268 addr = bp->b_data; 1269 1270 FSS_LOCK(sc, s); 1271 1272 /* 1273 * Replace those parts that have been saved to backing store. 1274 */ 1275 1276 for (c = cl; c <= ch; 1277 c++, off = 0, bp->b_resid -= len, addr += len) { 1278 len = FSS_CLSIZE(sc)-off; 1279 if (len > bp->b_resid) 1280 len = bp->b_resid; 1281 1282 if (isclr(sc->sc_copied, c)) 1283 continue; 1284 1285 FSS_UNLOCK(sc, s); 1286 1287 indirp = fss_bs_indir(sc, c); 1288 1289 FSS_LOCK(sc, s); 1290 1291 if (indirp == NULL || *indirp == 0) { 1292 /* 1293 * Not on backing store. Either in cache 1294 * or hole in the snapshotted block device. 1295 */ 1296 for (scp = sc->sc_cache; scp < scl; scp++) 1297 if (scp->fc_type == FSS_CACHE_VALID && 1298 scp->fc_cluster == c) 1299 break; 1300 if (scp < scl) 1301 memcpy(addr, scp->fc_data+off, len); 1302 else 1303 memset(addr, 0, len); 1304 continue; 1305 } 1306 /* 1307 * Read from backing store. 1308 */ 1309 1310 FSS_UNLOCK(sc, s); 1311 1312 if ((error = fss_bs_io(sc, FSS_READ, *indirp, 1313 off, len, addr)) != 0) { 1314 bp->b_resid = bp->b_bcount; 1315 bp->b_error = error; 1316 bp->b_flags |= B_ERROR; 1317 break; 1318 } 1319 1320 FSS_LOCK(sc, s); 1321 1322 } 1323 1324 biodone(bp); 1325 } 1326 } 1327