1 /* $NetBSD: lfs_vfsops.c,v 1.71 2001/12/18 07:51:18 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /*- 39 * Copyright (c) 1989, 1991, 1993, 1994 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_vfsops.c 8.20 (Berkeley) 6/10/95 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.71 2001/12/18 07:51:18 chs Exp $"); 75 76 #if defined(_KERNEL_OPT) 77 #include "opt_quota.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/proc.h> 84 #include <sys/kernel.h> 85 #include <sys/vnode.h> 86 #include <sys/mount.h> 87 #include <sys/buf.h> 88 #include <sys/device.h> 89 #include <sys/mbuf.h> 90 #include <sys/file.h> 91 #include <sys/disklabel.h> 92 #include <sys/ioctl.h> 93 #include <sys/errno.h> 94 #include <sys/malloc.h> 95 #include <sys/pool.h> 96 #include <sys/socket.h> 97 #include <uvm/uvm_extern.h> 98 #include <sys/sysctl.h> 99 100 #include <miscfs/specfs/specdev.h> 101 102 #include <ufs/ufs/quota.h> 103 #include <ufs/ufs/inode.h> 104 #include <ufs/ufs/ufsmount.h> 105 #include <ufs/ufs/ufs_extern.h> 106 107 #include <ufs/lfs/lfs.h> 108 #include <ufs/lfs/lfs_extern.h> 109 110 int lfs_mountfs(struct vnode *, struct mount *, struct proc *); 111 112 extern const struct vnodeopv_desc lfs_vnodeop_opv_desc; 113 extern const struct vnodeopv_desc lfs_specop_opv_desc; 114 extern const struct vnodeopv_desc lfs_fifoop_opv_desc; 115 116 const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = { 117 &lfs_vnodeop_opv_desc, 118 &lfs_specop_opv_desc, 119 &lfs_fifoop_opv_desc, 120 NULL, 121 }; 122 123 struct vfsops lfs_vfsops = { 124 MOUNT_LFS, 125 lfs_mount, 126 ufs_start, 127 lfs_unmount, 128 ufs_root, 129 ufs_quotactl, 130 lfs_statfs, 131 lfs_sync, 132 lfs_vget, 133 lfs_fhtovp, 134 lfs_vptofh, 135 lfs_init, 136 lfs_reinit, 137 lfs_done, 138 lfs_sysctl, 139 lfs_mountroot, 140 ufs_check_export, 141 lfs_vnodeopv_descs, 142 }; 143 144 struct genfs_ops lfs_genfsops = { 145 NULL, 146 NULL, 147 genfs_compat_gop_write, 148 }; 149 150 struct pool lfs_inode_pool; 151 152 extern int locked_queue_count; 153 extern long locked_queue_bytes; 154 155 /* 156 * Initialize the filesystem, most work done by ufs_init. 157 */ 158 void 159 lfs_init() 160 { 161 ufs_init(); 162 163 /* 164 * XXX Same structure as FFS inodes? Should we share a common pool? 165 */ 166 pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0, 167 "lfsinopl", 0, pool_page_alloc_nointr, pool_page_free_nointr, 168 M_LFSNODE); 169 } 170 171 void 172 lfs_reinit() 173 { 174 ufs_reinit(); 175 } 176 177 void 178 lfs_done() 179 { 180 ufs_done(); 181 pool_destroy(&lfs_inode_pool); 182 } 183 184 /* 185 * Called by main() when ufs is going to be mounted as root. 186 */ 187 int 188 lfs_mountroot() 189 { 190 extern struct vnode *rootvp; 191 struct mount *mp; 192 struct proc *p = curproc; /* XXX */ 193 int error; 194 195 if (root_device->dv_class != DV_DISK) 196 return (ENODEV); 197 198 if (rootdev == NODEV) 199 return (ENODEV); 200 /* 201 * Get vnodes for swapdev and rootdev. 202 */ 203 if ((error = bdevvp(rootdev, &rootvp))) { 204 printf("lfs_mountroot: can't setup bdevvp's"); 205 return (error); 206 } 207 if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) { 208 vrele(rootvp); 209 return (error); 210 } 211 if ((error = lfs_mountfs(rootvp, mp, p))) { 212 mp->mnt_op->vfs_refcount--; 213 vfs_unbusy(mp); 214 free(mp, M_MOUNT); 215 vrele(rootvp); 216 return (error); 217 } 218 simple_lock(&mountlist_slock); 219 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 220 simple_unlock(&mountlist_slock); 221 (void)lfs_statfs(mp, &mp->mnt_stat, p); 222 vfs_unbusy(mp); 223 inittodr(VFSTOUFS(mp)->um_lfs->lfs_tstamp); 224 return (0); 225 } 226 227 /* 228 * VFS Operations. 229 * 230 * mount system call 231 */ 232 int 233 lfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) 234 { 235 struct vnode *devvp; 236 struct ufs_args args; 237 struct ufsmount *ump = NULL; 238 struct lfs *fs = NULL; /* LFS */ 239 size_t size; 240 int error; 241 mode_t accessmode; 242 243 error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); 244 if (error) 245 return (error); 246 247 #if 0 248 /* Until LFS can do NFS right. XXX */ 249 if (args.export.ex_flags & MNT_EXPORTED) 250 return (EINVAL); 251 #endif 252 253 /* 254 * If updating, check whether changing from read-only to 255 * read/write; if there is no device name, that's all we do. 256 */ 257 if (mp->mnt_flag & MNT_UPDATE) { 258 ump = VFSTOUFS(mp); 259 fs = ump->um_lfs; 260 if (fs->lfs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) { 261 /* 262 * If upgrade to read-write by non-root, then verify 263 * that user has necessary permissions on the device. 264 */ 265 if (p->p_ucred->cr_uid != 0) { 266 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 267 error = VOP_ACCESS(ump->um_devvp, VREAD|VWRITE, 268 p->p_ucred, p); 269 VOP_UNLOCK(ump->um_devvp, 0); 270 if (error) 271 return (error); 272 } 273 fs->lfs_ronly = 0; 274 } 275 if (args.fspec == 0) { 276 /* 277 * Process export requests. 278 */ 279 return (vfs_export(mp, &ump->um_export, &args.export)); 280 } 281 } 282 /* 283 * Not an update, or updating the name: look up the name 284 * and verify that it refers to a sensible block device. 285 */ 286 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 287 if ((error = namei(ndp)) != 0) 288 return (error); 289 devvp = ndp->ni_vp; 290 if (devvp->v_type != VBLK) { 291 vrele(devvp); 292 return (ENOTBLK); 293 } 294 if (major(devvp->v_rdev) >= nblkdev) { 295 vrele(devvp); 296 return (ENXIO); 297 } 298 /* 299 * If mount by non-root, then verify that user has necessary 300 * permissions on the device. 301 */ 302 if (p->p_ucred->cr_uid != 0) { 303 accessmode = VREAD; 304 if ((mp->mnt_flag & MNT_RDONLY) == 0) 305 accessmode |= VWRITE; 306 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 307 error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); 308 if (error) { 309 vput(devvp); 310 return (error); 311 } 312 VOP_UNLOCK(devvp, 0); 313 } 314 if ((mp->mnt_flag & MNT_UPDATE) == 0) 315 error = lfs_mountfs(devvp, mp, p); /* LFS */ 316 else { 317 if (devvp != ump->um_devvp) 318 error = EINVAL; /* needs translation */ 319 else 320 vrele(devvp); 321 } 322 if (error) { 323 vrele(devvp); 324 return (error); 325 } 326 ump = VFSTOUFS(mp); 327 fs = ump->um_lfs; /* LFS */ 328 (void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size); 329 bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size); 330 bcopy(fs->lfs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); 331 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 332 &size); 333 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 334 return (0); 335 } 336 337 /* 338 * Roll-forward code. 339 */ 340 341 /* 342 * Load the appropriate indirect block, and change the appropriate pointer. 343 * Mark the block dirty. Do segment and avail accounting. 344 */ 345 static int 346 update_meta(struct lfs *fs, ino_t ino, int version, ufs_daddr_t lbn, 347 daddr_t ndaddr, size_t size, struct proc *p) 348 { 349 int error; 350 struct vnode *vp; 351 struct inode *ip; 352 daddr_t odaddr, ooff; 353 struct indir a[NIADDR], *ap; 354 struct buf *bp; 355 SEGUSE *sup; 356 int num; 357 358 if ((error = lfs_rf_valloc(fs, ino, version, p, &vp)) != 0) { 359 #ifdef DEBUG_LFS_RFW 360 printf("update_meta: ino %d: lfs_rf_valloc returned %d\n", ino, 361 error); 362 #endif 363 return error; 364 } 365 366 if ((error = VOP_BALLOC(vp, (lbn << fs->lfs_bshift), size, 367 NOCRED, 0, &bp)) != 0) { 368 vput(vp); 369 return (error); 370 } 371 /* No need to write, the block is already on disk */ 372 if (bp->b_flags & B_DELWRI) { 373 LFS_UNLOCK_BUF(bp); 374 fs->lfs_avail += btofsb(fs, bp->b_bcount); 375 } 376 bp->b_flags |= B_INVAL; 377 brelse(bp); 378 379 /* 380 * Extend the file, if it is not large enough already. 381 * XXX this is not exactly right, we don't know how much of the 382 * XXX last block is actually used. We hope that an inode will 383 * XXX appear later to give the correct size. 384 */ 385 ip = VTOI(vp); 386 if (ip->i_ffs_size <= (lbn << fs->lfs_bshift)) { 387 if (lbn < NDADDR) 388 ip->i_ffs_size = (lbn << fs->lfs_bshift) + 389 (size - fs->lfs_fsize) + 1; 390 else 391 ip->i_ffs_size = (lbn << fs->lfs_bshift) + 1; 392 } 393 394 error = ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL); 395 if (error) { 396 #ifdef DEBUG_LFS_RFW 397 printf("update_meta: ufs_bmaparray returned %d\n", error); 398 #endif 399 vput(vp); 400 return error; 401 } 402 switch (num) { 403 case 0: 404 ooff = ip->i_ffs_db[lbn]; 405 if (ooff == UNWRITTEN) 406 ip->i_ffs_blocks += btofsb(fs, size); 407 ip->i_ffs_db[lbn] = ndaddr; 408 break; 409 case 1: 410 ooff = ip->i_ffs_ib[a[0].in_off]; 411 if (ooff == UNWRITTEN) 412 ip->i_ffs_blocks += btofsb(fs, size); 413 ip->i_ffs_ib[a[0].in_off] = ndaddr; 414 break; 415 default: 416 ap = &a[num - 1]; 417 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 418 panic("update_meta: bread bno %d", ap->in_lbn); 419 420 ooff = ((ufs_daddr_t *)bp->b_data)[ap->in_off]; 421 if (ooff == UNWRITTEN) 422 ip->i_ffs_blocks += btofsb(fs, size); 423 ((ufs_daddr_t *)bp->b_data)[ap->in_off] = ndaddr; 424 (void) VOP_BWRITE(bp); 425 } 426 LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE); 427 428 /* Update segment usage information. */ 429 if (odaddr > 0) { 430 LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, odaddr)), bp); 431 #ifdef DIAGNOSTIC 432 if (sup->su_nbytes < size) { 433 panic("update_meta: negative bytes " 434 "(segment %d short by %ld)\n", 435 dtosn(fs, dbtofsb(fs, odaddr)), (long)size - sup->su_nbytes); 436 sup->su_nbytes = size; 437 } 438 #endif 439 sup->su_nbytes -= size; 440 VOP_BWRITE(bp); 441 } 442 LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp); 443 sup->su_nbytes += size; 444 VOP_BWRITE(bp); 445 446 /* Fix this so it can be released */ 447 /* ip->i_lfs_effnblks = ip->i_ffs_blocks; */ 448 449 #ifdef DEBUG_LFS_RFW 450 /* Now look again to make sure it worked */ 451 ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL ); 452 if (dbtofsb(fs, odaddr) != ndaddr) 453 printf("update_meta: failed setting ino %d lbn %d to %x\n", 454 ino, lbn, ndaddr); 455 #endif 456 vput(vp); 457 return 0; 458 } 459 460 static int 461 update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred, 462 struct proc *p) 463 { 464 struct vnode *devvp, *vp; 465 struct inode *ip; 466 struct dinode *dip; 467 struct buf *dbp, *ibp; 468 int error; 469 daddr_t daddr; 470 IFILE *ifp; 471 SEGUSE *sup; 472 473 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 474 475 /* 476 * Get the inode, update times and perms. 477 * DO NOT update disk blocks, we do that separately. 478 */ 479 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_ibsize, cred, &dbp); 480 if (error) { 481 #ifdef DEBUG_LFS_RFW 482 printf("update_inoblk: bread returned %d\n", error); 483 #endif 484 return error; 485 } 486 dip = ((struct dinode *)(dbp->b_data)) + INOPB(fs); 487 while (--dip >= (struct dinode *)dbp->b_data) { 488 if (dip->di_inumber > LFS_IFILE_INUM) { 489 /* printf("ino %d version %d\n", dip->di_inumber, 490 dip->di_gen); */ 491 error = lfs_rf_valloc(fs, dip->di_inumber, dip->di_gen, 492 p, &vp); 493 if (error) { 494 #ifdef DEBUG_LFS_RFW 495 printf("update_inoblk: lfs_rf_valloc returned %d\n", error); 496 #endif 497 continue; 498 } 499 ip = VTOI(vp); 500 if (dip->di_size != ip->i_ffs_size) 501 VOP_TRUNCATE(vp, dip->di_size, 0, NOCRED, p); 502 /* Get mode, link count, size, and times */ 503 memcpy(&ip->i_din.ffs_din, dip, 504 offsetof(struct dinode, di_db[0])); 505 506 /* Then the rest, except di_blocks */ 507 ip->i_ffs_flags = dip->di_flags; 508 ip->i_ffs_gen = dip->di_gen; 509 ip->i_ffs_uid = dip->di_uid; 510 ip->i_ffs_gid = dip->di_gid; 511 512 ip->i_ffs_effnlink = dip->di_nlink; 513 514 LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE); 515 516 /* Re-initialize to get type right */ 517 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, 518 &vp); 519 vput(vp); 520 521 /* Record change in location */ 522 LFS_IENTRY(ifp, fs, dip->di_inumber, ibp); 523 daddr = ifp->if_daddr; 524 ifp->if_daddr = dbtofsb(fs, dbp->b_blkno); 525 error = VOP_BWRITE(ibp); /* Ifile */ 526 /* And do segment accounting */ 527 if (dtosn(fs, daddr) != dtosn(fs, dbtofsb(fs, dbp->b_blkno))) { 528 if (daddr > 0) { 529 LFS_SEGENTRY(sup, fs, dtosn(fs, daddr), 530 ibp); 531 sup->su_nbytes -= DINODE_SIZE; 532 VOP_BWRITE(ibp); 533 } 534 LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)), 535 ibp); 536 sup->su_nbytes += DINODE_SIZE; 537 VOP_BWRITE(ibp); 538 } 539 } 540 } 541 dbp->b_flags |= B_AGE; 542 brelse(dbp); 543 544 return 0; 545 } 546 547 #define CHECK_CKSUM 0x0001 /* Check the checksum to make sure it's valid */ 548 #define CHECK_UPDATE 0x0002 /* Update Ifile for new data blocks / inodes */ 549 550 static daddr_t 551 check_segsum(struct lfs *fs, daddr_t offset, 552 struct ucred *cred, int flags, int *pseg_flags, struct proc *p) 553 { 554 struct vnode *devvp; 555 struct buf *bp, *dbp; 556 int error, nblocks, ninos, i, j; 557 SEGSUM *ssp; 558 u_long *dp, *datap; /* XXX u_int32_t */ 559 daddr_t *iaddr, oldoffset; 560 FINFO *fip; 561 SEGUSE *sup; 562 size_t size; 563 u_int64_t serial; 564 565 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 566 /* 567 * If the segment has a superblock and we're at the top 568 * of the segment, skip the superblock. 569 */ 570 if (sntod(fs, dtosn(fs, offset)) == offset) { 571 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp); 572 if (sup->su_flags & SEGUSE_SUPERBLOCK) 573 offset += btofsb(fs, LFS_SBPAD); 574 brelse(bp); 575 } 576 577 /* Read in the segment summary */ 578 error = bread(devvp, offset, fs->lfs_sumsize, cred, &bp); 579 if (error) 580 return -1; 581 582 /* Check summary checksum */ 583 ssp = (SEGSUM *)bp->b_data; 584 if (flags & CHECK_CKSUM) { 585 if (ssp->ss_sumsum != cksum(&ssp->ss_datasum, 586 fs->lfs_sumsize - 587 sizeof(ssp->ss_sumsum))) { 588 #ifdef DEBUG_LFS_RFW 589 printf("Sumsum error at 0x%x\n", offset); 590 #endif 591 offset = -1; 592 goto err1; 593 } 594 if (ssp->ss_nfinfo == 0 && ssp->ss_ninos == 0) { 595 #ifdef DEBUG_LFS_RFW 596 printf("Empty pseg at 0x%x\n", offset); 597 #endif 598 offset = -1; 599 goto err1; 600 } 601 if (ssp->ss_create < fs->lfs_tstamp) { 602 #ifdef DEBUG_LFS_RFW 603 printf("Old data at 0x%x\n", offset); 604 #endif 605 offset = -1; 606 goto err1; 607 } 608 } 609 if (fs->lfs_version > 1) { 610 serial = ssp->ss_serial; 611 if (serial != fs->lfs_serial + 1) { 612 #ifdef DEBUG_LFS_RFW 613 printf("Unexpected serial number at 0x%x\n", offset); 614 #endif 615 offset = -1; 616 goto err1; 617 } 618 if (ssp->ss_ident != fs->lfs_ident) { 619 #ifdef DEBUG_LFS_RFW 620 printf("Incorrect fsid (0x%x vs 0x%x) at 0x%x\n", 621 ssp->ss_ident, fs->lfs_ident, offset); 622 #endif 623 offset = -1; 624 goto err1; 625 } 626 } 627 if (pseg_flags) 628 *pseg_flags = ssp->ss_flags; 629 oldoffset = offset; 630 offset += btofsb(fs, fs->lfs_sumsize); 631 632 ninos = howmany(ssp->ss_ninos, INOPB(fs)); 633 iaddr = (daddr_t *)(bp->b_data + fs->lfs_sumsize - sizeof(daddr_t)); 634 if (flags & CHECK_CKSUM) { 635 /* Count blocks */ 636 nblocks = 0; 637 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs)); 638 for (i = 0; i < ssp->ss_nfinfo; ++i) { 639 nblocks += fip->fi_nblocks; 640 if (fip->fi_nblocks <= 0) 641 break; 642 fip = (FINFO *)(((char *)fip) + sizeof(FINFO) + 643 (fip->fi_nblocks - 1) * 644 sizeof(ufs_daddr_t)); 645 } 646 nblocks += ninos; 647 /* Create the sum array */ 648 datap = dp = (u_long *)malloc(nblocks * sizeof(u_long), 649 M_SEGMENT, M_WAITOK); 650 } 651 652 /* Handle individual blocks */ 653 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs)); 654 for (i = 0; i < ssp->ss_nfinfo || ninos; ++i) { 655 /* Inode block? */ 656 if (ninos && *iaddr == offset) { 657 if (flags & CHECK_CKSUM) { 658 /* Read in the head and add to the buffer */ 659 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_bsize, 660 cred, &dbp); 661 if (error) { 662 offset = -1; 663 goto err2; 664 } 665 (*dp++) = ((u_long *)(dbp->b_data))[0]; 666 dbp->b_flags |= B_AGE; 667 brelse(dbp); 668 } 669 if (flags & CHECK_UPDATE) { 670 if ((error = update_inoblk(fs, offset, cred, p)) 671 != 0) { 672 offset = -1; 673 goto err2; 674 } 675 } 676 offset += btofsb(fs, fs->lfs_ibsize); 677 --iaddr; 678 --ninos; 679 --i; /* compensate */ 680 continue; 681 } 682 /* printf("check: blocks from ino %d version %d\n", 683 fip->fi_ino, fip->fi_version); */ 684 size = fs->lfs_bsize; 685 for (j = 0; j < fip->fi_nblocks; ++j) { 686 if (j == fip->fi_nblocks - 1) 687 size = fip->fi_lastlength; 688 if (flags & CHECK_CKSUM) { 689 error = bread(devvp, fsbtodb(fs, offset), size, cred, &dbp); 690 if (error) { 691 offset = -1; 692 goto err2; 693 } 694 (*dp++) = ((u_long *)(dbp->b_data))[0]; 695 dbp->b_flags |= B_AGE; 696 brelse(dbp); 697 } 698 /* Account for and update any direct blocks */ 699 if ((flags & CHECK_UPDATE) && 700 fip->fi_ino > LFS_IFILE_INUM && 701 fip->fi_blocks[j] >= 0) { 702 update_meta(fs, fip->fi_ino, fip->fi_version, 703 fip->fi_blocks[j], offset, size, p); 704 } 705 offset += btofsb(fs, size); 706 } 707 fip = (FINFO *)(((char *)fip) + sizeof(FINFO) 708 + (fip->fi_nblocks - 1) * sizeof(ufs_daddr_t)); 709 } 710 /* Checksum the array, compare */ 711 if ((flags & CHECK_CKSUM) && 712 ssp->ss_datasum != cksum(datap, nblocks * sizeof(u_long))) 713 { 714 #ifdef DEBUG_LFS_RFW 715 printf("Datasum error at 0x%x (wanted %x got %x)\n", offset, 716 ssp->ss_datasum, cksum(datap, nblocks * 717 sizeof(u_long))); 718 #endif 719 offset = -1; 720 goto err2; 721 } 722 723 /* If we're at the end of the segment, move to the next */ 724 if (dtosn(fs, offset + btofsb(fs, fs->lfs_sumsize + fs->lfs_bsize)) != 725 dtosn(fs, offset)) { 726 if (dtosn(fs, offset) == dtosn(fs, ssp->ss_next)) { 727 offset = -1; 728 goto err2; 729 } 730 offset = ssp->ss_next; 731 #ifdef DEBUG_LFS_RFW 732 printf("LFS roll forward: moving on to offset 0x%x " 733 " -> segment %d\n", offset, dtosn(fs,offset)); 734 #endif 735 } 736 737 if (flags & CHECK_UPDATE) { 738 fs->lfs_avail -= (offset - oldoffset); 739 /* Don't clog the buffer queue */ 740 if (locked_queue_count > LFS_MAX_BUFS || 741 locked_queue_bytes > LFS_MAX_BYTES) { 742 ++fs->lfs_writer; 743 lfs_flush(fs, SEGM_CKP); 744 if (--fs->lfs_writer == 0) 745 wakeup(&fs->lfs_dirops); 746 } 747 } 748 749 err2: 750 if (flags & CHECK_CKSUM) 751 free(datap, M_SEGMENT); 752 err1: 753 bp->b_flags |= B_AGE; 754 brelse(bp); 755 756 /* XXX should we update the serial number even for bad psegs? */ 757 if ((flags & CHECK_UPDATE) && offset > 0 && fs->lfs_version > 1) 758 fs->lfs_serial = serial; 759 return offset; 760 } 761 762 /* 763 * Common code for mount and mountroot 764 * LFS specific 765 */ 766 int 767 lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) 768 { 769 extern struct vnode *rootvp; 770 struct dlfs *tdfs, *dfs, *adfs; 771 struct lfs *fs; 772 struct ufsmount *ump; 773 struct vnode *vp; 774 struct buf *bp, *abp; 775 struct partinfo dpart; 776 dev_t dev; 777 int error, i, ronly, secsize, fsbsize; 778 struct ucred *cred; 779 CLEANERINFO *cip; 780 SEGUSE *sup; 781 int flags, dirty, do_rollforward; 782 daddr_t offset, oldoffset, lastgoodpseg, sb_addr; 783 int sn, curseg; 784 785 cred = p ? p->p_ucred : NOCRED; 786 /* 787 * Disallow multiple mounts of the same device. 788 * Disallow mounting of a device that is currently in use 789 * (except for root, which might share swap device for miniroot). 790 * Flush out any old buffers remaining from a previous use. 791 */ 792 if ((error = vfs_mountedon(devvp)) != 0) 793 return (error); 794 if (vcount(devvp) > 1 && devvp != rootvp) 795 return (EBUSY); 796 if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0) 797 return (error); 798 799 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 800 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 801 if (error) 802 return (error); 803 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 804 secsize = DEV_BSIZE; 805 else 806 secsize = dpart.disklab->d_secsize; 807 808 /* Don't free random space on error. */ 809 bp = NULL; 810 abp = NULL; 811 ump = NULL; 812 813 sb_addr = LFS_LABELPAD / secsize; 814 while (1) { 815 /* Read in the superblock. */ 816 error = bread(devvp, sb_addr, LFS_SBPAD, cred, &bp); 817 if (error) 818 goto out; 819 dfs = (struct dlfs *)bp->b_data; 820 821 /* Check the basics. */ 822 if (dfs->dlfs_magic != LFS_MAGIC || dfs->dlfs_bsize >= MAXBSIZE || 823 dfs->dlfs_version > LFS_VERSION || 824 dfs->dlfs_bsize < sizeof(struct dlfs)) { 825 #ifdef DEBUG_LFS 826 printf("lfs_mountfs: primary superblock sanity failed\n"); 827 #endif 828 error = EINVAL; /* XXX needs translation */ 829 goto out; 830 } 831 if (dfs->dlfs_inodefmt > LFS_MAXINODEFMT) 832 printf("lfs_mountfs: warning: unknown inode format %d\n", 833 dfs->dlfs_inodefmt); 834 835 if (dfs->dlfs_version == 1) 836 fsbsize = secsize; 837 else { 838 fsbsize = 1 << (dfs->dlfs_bshift - dfs->dlfs_blktodb + 839 dfs->dlfs_fsbtodb); 840 /* 841 * Could be, if the frag size is large enough, that we 842 * don't have the "real" primary superblock. If that's 843 * the case, get the real one, and try again. 844 */ 845 if (sb_addr != dfs->dlfs_sboffs[0] << 846 dfs->dlfs_fsbtodb) { 847 /* #ifdef DEBUG_LFS */ 848 printf("lfs_mountfs: sb daddr 0x%x is not right, trying 0x%x\n", 849 sb_addr, dfs->dlfs_sboffs[0] << 850 dfs->dlfs_fsbtodb); 851 /* #endif */ 852 sb_addr = dfs->dlfs_sboffs[0] << 853 dfs->dlfs_fsbtodb; 854 brelse(bp); 855 continue; 856 } 857 } 858 break; 859 } 860 861 /* 862 * Check the second superblock to see which is newer; then mount 863 * using the older of the two. This is necessary to ensure that 864 * the filesystem is valid if it was not unmounted cleanly. 865 */ 866 867 if (dfs->dlfs_sboffs[1] && 868 dfs->dlfs_sboffs[1] - LFS_LABELPAD / fsbsize > LFS_SBPAD / fsbsize) 869 { 870 error = bread(devvp, dfs->dlfs_sboffs[1] * (fsbsize / secsize), 871 LFS_SBPAD, cred, &abp); 872 if (error) 873 goto out; 874 adfs = (struct dlfs *)abp->b_data; 875 876 if (dfs->dlfs_version == 1) { 877 /* 1s resolution comparison */ 878 if (adfs->dlfs_tstamp < dfs->dlfs_tstamp) 879 tdfs = adfs; 880 else 881 tdfs = dfs; 882 } else { 883 /* monotonic infinite-resolution comparison */ 884 if (adfs->dlfs_serial < dfs->dlfs_serial) 885 tdfs = adfs; 886 else 887 tdfs = dfs; 888 } 889 890 /* Check the basics. */ 891 if (tdfs->dlfs_magic != LFS_MAGIC || 892 tdfs->dlfs_bsize > MAXBSIZE || 893 tdfs->dlfs_version > LFS_VERSION || 894 tdfs->dlfs_bsize < sizeof(struct dlfs)) { 895 #ifdef DEBUG_LFS 896 printf("lfs_mountfs: alt superblock sanity failed\n"); 897 #endif 898 error = EINVAL; /* XXX needs translation */ 899 goto out; 900 } 901 } else { 902 #ifdef DEBUG_LFS 903 printf("lfs_mountfs: invalid alt superblock daddr=0x%x\n", 904 dfs->dlfs_sboffs[1]); 905 #endif 906 error = EINVAL; 907 goto out; 908 } 909 910 /* Allocate the mount structure, copy the superblock into it. */ 911 fs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK); 912 memcpy(&fs->lfs_dlfs, tdfs, sizeof(struct dlfs)); 913 914 /* Compatibility */ 915 if (fs->lfs_version < 2) { 916 fs->lfs_sumsize = LFS_V1_SUMMARY_SIZE; 917 fs->lfs_ibsize = fs->lfs_bsize; 918 fs->lfs_start = fs->lfs_sboffs[0]; 919 fs->lfs_tstamp = fs->lfs_otstamp; 920 fs->lfs_fsbtodb = 0; 921 } 922 923 /* Before rolling forward, lock so vget will sleep for other procs */ 924 fs->lfs_flags = LFS_NOTYET; 925 fs->lfs_rfpid = p->p_pid; 926 927 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 928 memset((caddr_t)ump, 0, sizeof *ump); 929 ump->um_lfs = fs; 930 if (sizeof(struct lfs) < LFS_SBPAD) { /* XXX why? */ 931 bp->b_flags |= B_INVAL; 932 abp->b_flags |= B_INVAL; 933 } 934 brelse(bp); 935 bp = NULL; 936 brelse(abp); 937 abp = NULL; 938 939 /* Set up the I/O information */ 940 fs->lfs_devbsize = secsize; 941 fs->lfs_iocount = 0; 942 fs->lfs_diropwait = 0; 943 fs->lfs_activesb = 0; 944 fs->lfs_uinodes = 0; 945 fs->lfs_ravail = 0; 946 fs->lfs_sbactive = 0; 947 #ifdef LFS_TRACK_IOS 948 for (i = 0; i < LFS_THROTTLE; i++) 949 fs->lfs_pending[i] = LFS_UNUSED_DADDR; 950 #endif 951 952 /* Set up the ifile and lock aflags */ 953 fs->lfs_doifile = 0; 954 fs->lfs_writer = 0; 955 fs->lfs_dirops = 0; 956 fs->lfs_nadirop = 0; 957 fs->lfs_seglock = 0; 958 lockinit(&fs->lfs_freelock, PINOD, "lfs_freelock", 0, 0); 959 960 /* Set the file system readonly/modify bits. */ 961 fs->lfs_ronly = ronly; 962 if (ronly == 0) 963 fs->lfs_fmod = 1; 964 965 /* Initialize the mount structure. */ 966 dev = devvp->v_rdev; 967 mp->mnt_data = (qaddr_t)ump; 968 mp->mnt_stat.f_fsid.val[0] = (long)dev; 969 mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_LFS); 970 mp->mnt_stat.f_iosize = fs->lfs_bsize; 971 mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen; 972 mp->mnt_flag |= MNT_LOCAL; 973 ump->um_flags = 0; 974 ump->um_mountp = mp; 975 ump->um_dev = dev; 976 ump->um_devvp = devvp; 977 ump->um_bptrtodb = fs->lfs_fsbtodb; 978 ump->um_seqinc = fragstofsb(fs, fs->lfs_frag); 979 ump->um_nindir = fs->lfs_nindir; 980 ump->um_lognindir = ffs(fs->lfs_nindir) - 1; 981 for (i = 0; i < MAXQUOTAS; i++) 982 ump->um_quotas[i] = NULLVP; 983 devvp->v_specmountpoint = mp; 984 985 /* 986 * We use the ifile vnode for almost every operation. Instead of 987 * retrieving it from the hash table each time we retrieve it here, 988 * artificially increment the reference count and keep a pointer 989 * to it in the incore copy of the superblock. 990 */ 991 if ((error = VFS_VGET(mp, LFS_IFILE_INUM, &vp)) != 0) { 992 #ifdef DEBUG 993 printf("lfs_mountfs: ifile vget failed, error=%d\n", error); 994 #endif 995 goto out; 996 } 997 fs->lfs_ivnode = vp; 998 VREF(vp); 999 vput(vp); 1000 1001 /* 1002 * Roll forward. 1003 * 1004 * We don't automatically roll forward for v1 filesystems, because 1005 * of the danger that the clock was turned back between the last 1006 * checkpoint and crash. This would roll forward garbage. 1007 * 1008 * v2 filesystems don't have this problem because they use a 1009 * monotonically increasing serial number instead of a timestamp. 1010 */ 1011 #ifdef LFS_DO_ROLLFORWARD 1012 do_rollforward = !fs->lfs_ronly; 1013 #else 1014 do_rollforward = (fs->lfs_version > 1 && !fs->lfs_ronly && 1015 !(fs->lfs_pflags & LFS_PF_CLEAN)); 1016 #endif 1017 if (do_rollforward) { 1018 /* 1019 * Phase I: Find the address of the last good partial 1020 * segment that was written after the checkpoint. Mark 1021 * the segments in question dirty, so they won't be 1022 * reallocated. 1023 */ 1024 lastgoodpseg = oldoffset = offset = fs->lfs_offset; 1025 flags = 0x0; 1026 #ifdef DEBUG_LFS_RFW 1027 printf("LFS roll forward phase 1: starting at offset 0x%x\n", 1028 offset); 1029 #endif 1030 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp); 1031 if (!(sup->su_flags & SEGUSE_DIRTY)) 1032 --fs->lfs_nclean; 1033 sup->su_flags |= SEGUSE_DIRTY; 1034 (void) VOP_BWRITE(bp); 1035 while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM, 1036 &flags, p)) > 0) 1037 { 1038 if (sntod(fs, oldoffset) != sntod(fs, offset)) { 1039 LFS_SEGENTRY(sup, fs, dtosn(fs, oldoffset), 1040 bp); 1041 if (!(sup->su_flags & SEGUSE_DIRTY)) 1042 --fs->lfs_nclean; 1043 sup->su_flags |= SEGUSE_DIRTY; 1044 (void) VOP_BWRITE(bp); 1045 } 1046 1047 #ifdef DEBUG_LFS_RFW 1048 printf("LFS roll forward phase 1: offset=0x%x\n", 1049 offset); 1050 if (flags & SS_DIROP) { 1051 printf("lfs_mountfs: dirops at 0x%x\n", 1052 oldoffset); 1053 if (!(flags & SS_CONT)) 1054 printf("lfs_mountfs: dirops end " 1055 "at 0x%x\n", oldoffset); 1056 } 1057 #endif 1058 if (!(flags & SS_CONT)) 1059 lastgoodpseg = offset; 1060 oldoffset = offset; 1061 } 1062 #ifdef DEBUG_LFS_RFW 1063 if (flags & SS_CONT) { 1064 printf("LFS roll forward: warning: incomplete " 1065 "dirops discarded\n"); 1066 } 1067 printf("LFS roll forward phase 1: completed: " 1068 "lastgoodpseg=0x%x\n", lastgoodpseg); 1069 #endif 1070 oldoffset = fs->lfs_offset; 1071 if (fs->lfs_offset != lastgoodpseg) { 1072 /* Don't overwrite what we're trying to preserve */ 1073 offset = fs->lfs_offset; 1074 fs->lfs_offset = lastgoodpseg; 1075 fs->lfs_curseg = sntod(fs, dtosn(fs, fs->lfs_offset)); 1076 for (sn = curseg = dtosn(fs, fs->lfs_curseg);;) { 1077 sn = (sn + 1) % fs->lfs_nseg; 1078 if (sn == curseg) 1079 panic("lfs_mountfs: no clean segments"); 1080 LFS_SEGENTRY(sup, fs, sn, bp); 1081 dirty = (sup->su_flags & SEGUSE_DIRTY); 1082 brelse(bp); 1083 if (!dirty) 1084 break; 1085 } 1086 fs->lfs_nextseg = sntod(fs, sn); 1087 1088 /* 1089 * Phase II: Roll forward from the first superblock. 1090 */ 1091 while (offset != lastgoodpseg) { 1092 #ifdef DEBUG_LFS_RFW 1093 printf("LFS roll forward phase 2: 0x%x\n", 1094 offset); 1095 #endif 1096 offset = check_segsum(fs, offset, cred, 1097 CHECK_UPDATE, NULL, p); 1098 } 1099 1100 /* 1101 * Finish: flush our changes to disk. 1102 */ 1103 lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); 1104 printf("lfs_mountfs: roll forward recovered %d blocks\n", 1105 lastgoodpseg - oldoffset); 1106 } 1107 #ifdef DEBUG_LFS_RFW 1108 printf("LFS roll forward complete\n"); 1109 #endif 1110 } 1111 /* If writing, sb is not clean; record in case of immediate crash */ 1112 if (!fs->lfs_ronly) { 1113 fs->lfs_pflags &= ~LFS_PF_CLEAN; 1114 lfs_writesuper(fs, fs->lfs_sboffs[0]); 1115 } 1116 1117 /* Allow vget now that roll-forward is complete */ 1118 fs->lfs_flags &= ~(LFS_NOTYET); 1119 wakeup(&fs->lfs_flags); 1120 1121 /* 1122 * Initialize the ifile cleaner info with information from 1123 * the superblock. 1124 */ 1125 LFS_CLEANERINFO(cip, fs, bp); 1126 cip->clean = fs->lfs_nclean; 1127 cip->dirty = fs->lfs_nseg - fs->lfs_nclean; 1128 cip->avail = fs->lfs_avail; 1129 cip->bfree = fs->lfs_bfree; 1130 (void) VOP_BWRITE(bp); /* Ifile */ 1131 1132 /* 1133 * Mark the current segment as ACTIVE, since we're going to 1134 * be writing to it. 1135 */ 1136 LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); 1137 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 1138 (void) VOP_BWRITE(bp); /* Ifile */ 1139 1140 return (0); 1141 out: 1142 if (bp) 1143 brelse(bp); 1144 if (abp) 1145 brelse(abp); 1146 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1147 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 1148 VOP_UNLOCK(devvp, 0); 1149 if (ump) { 1150 free(ump->um_lfs, M_UFSMNT); 1151 free(ump, M_UFSMNT); 1152 mp->mnt_data = (qaddr_t)0; 1153 } 1154 return (error); 1155 } 1156 1157 /* 1158 * unmount system call 1159 */ 1160 int 1161 lfs_unmount(struct mount *mp, int mntflags, struct proc *p) 1162 { 1163 struct ufsmount *ump; 1164 struct lfs *fs; 1165 int error, flags, ronly, s; 1166 extern int lfs_allclean_wakeup; 1167 1168 flags = 0; 1169 if (mntflags & MNT_FORCE) 1170 flags |= FORCECLOSE; 1171 1172 ump = VFSTOUFS(mp); 1173 fs = ump->um_lfs; 1174 #ifdef QUOTA 1175 if (mp->mnt_flag & MNT_QUOTA) { 1176 int i; 1177 error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags); 1178 if (error) 1179 return (error); 1180 for (i = 0; i < MAXQUOTAS; i++) { 1181 if (ump->um_quotas[i] == NULLVP) 1182 continue; 1183 quotaoff(p, mp, i); 1184 } 1185 /* 1186 * Here we fall through to vflush again to ensure 1187 * that we have gotten rid of all the system vnodes. 1188 */ 1189 } 1190 #endif 1191 if ((error = vflush(mp, fs->lfs_ivnode, flags)) != 0) 1192 return (error); 1193 if ((error = VFS_SYNC(mp, 1, p->p_ucred, p)) != 0) 1194 return (error); 1195 if (fs->lfs_ivnode->v_dirtyblkhd.lh_first) 1196 panic("lfs_unmount: still dirty blocks on ifile vnode\n"); 1197 1198 /* Explicitly write the superblock, to update serial and pflags */ 1199 fs->lfs_pflags |= LFS_PF_CLEAN; 1200 lfs_writesuper(fs, fs->lfs_sboffs[0]); 1201 lfs_writesuper(fs, fs->lfs_sboffs[1]); 1202 1203 /* Finish with the Ifile, now that we're done with it */ 1204 vrele(fs->lfs_ivnode); 1205 vgone(fs->lfs_ivnode); 1206 1207 /* Wait for superblock writes to complete */ 1208 s = splbio(); 1209 while (fs->lfs_iocount) 1210 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0); 1211 splx(s); 1212 1213 ronly = !fs->lfs_ronly; 1214 if (ump->um_devvp->v_type != VBAD) 1215 ump->um_devvp->v_specmountpoint = NULL; 1216 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1217 error = VOP_CLOSE(ump->um_devvp, 1218 ronly ? FREAD : FREAD|FWRITE, NOCRED, p); 1219 vput(ump->um_devvp); 1220 1221 /* XXX KS - wake up the cleaner so it can die */ 1222 wakeup(&fs->lfs_nextseg); 1223 wakeup(&lfs_allclean_wakeup); 1224 1225 free(fs, M_UFSMNT); 1226 free(ump, M_UFSMNT); 1227 mp->mnt_data = (qaddr_t)0; 1228 mp->mnt_flag &= ~MNT_LOCAL; 1229 return (error); 1230 } 1231 1232 /* 1233 * Get file system statistics. 1234 */ 1235 int 1236 lfs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p) 1237 { 1238 struct lfs *fs; 1239 struct ufsmount *ump; 1240 1241 ump = VFSTOUFS(mp); 1242 fs = ump->um_lfs; 1243 if (fs->lfs_magic != LFS_MAGIC) 1244 panic("lfs_statfs: magic"); 1245 1246 sbp->f_type = 0; 1247 sbp->f_bsize = fs->lfs_fsize; 1248 sbp->f_iosize = fs->lfs_bsize; 1249 sbp->f_blocks = fsbtofrags(fs, LFS_EST_NONMETA(fs)); 1250 sbp->f_bfree = fsbtofrags(fs, LFS_EST_BFREE(fs)); 1251 sbp->f_bavail = fsbtofrags(fs, (long)LFS_EST_BFREE(fs) - 1252 (long)LFS_EST_RSVD(fs)); 1253 1254 sbp->f_files = fs->lfs_bfree / btofsb(fs, fs->lfs_ibsize) * INOPB(fs); 1255 sbp->f_ffree = sbp->f_files - fs->lfs_nfiles; 1256 if (sbp != &mp->mnt_stat) { 1257 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); 1258 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 1259 } 1260 strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); 1261 return (0); 1262 } 1263 1264 /* 1265 * Go through the disk queues to initiate sandbagged IO; 1266 * go through the inodes to write those that have been modified; 1267 * initiate the writing of the super block if it has been modified. 1268 * 1269 * Note: we are always called with the filesystem marked `MPBUSY'. 1270 */ 1271 int 1272 lfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) 1273 { 1274 int error; 1275 struct lfs *fs; 1276 1277 fs = ((struct ufsmount *)mp->mnt_data)->ufsmount_u.lfs; 1278 if (fs->lfs_ronly) 1279 return 0; 1280 while (fs->lfs_dirops) 1281 error = tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_dirops", 0); 1282 fs->lfs_writer++; 1283 1284 /* All syncs must be checkpoints until roll-forward is implemented. */ 1285 error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0)); 1286 if (--fs->lfs_writer == 0) 1287 wakeup(&fs->lfs_dirops); 1288 #ifdef QUOTA 1289 qsync(mp); 1290 #endif 1291 return (error); 1292 } 1293 1294 extern struct lock ufs_hashlock; 1295 1296 /* 1297 * Look up an LFS dinode number to find its incore vnode. If not already 1298 * in core, read it in from the specified device. Return the inode locked. 1299 * Detection and handling of mount points must be done by the calling routine. 1300 */ 1301 int 1302 lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) 1303 { 1304 struct lfs *fs; 1305 struct inode *ip; 1306 struct buf *bp; 1307 struct ifile *ifp; 1308 struct vnode *vp; 1309 struct ufsmount *ump; 1310 ufs_daddr_t daddr; 1311 dev_t dev; 1312 int error; 1313 struct timespec ts; 1314 1315 ump = VFSTOUFS(mp); 1316 dev = ump->um_dev; 1317 fs = ump->um_lfs; 1318 1319 /* 1320 * If the filesystem is not completely mounted yet, suspend 1321 * any access requests (wait for roll-forward to complete). 1322 */ 1323 while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid) 1324 tsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0); 1325 1326 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) 1327 return (0); 1328 1329 if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) { 1330 *vpp = NULL; 1331 return (error); 1332 } 1333 1334 do { 1335 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) { 1336 ungetnewvnode(vp); 1337 return (0); 1338 } 1339 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0)); 1340 1341 /* Translate the inode number to a disk address. */ 1342 if (ino == LFS_IFILE_INUM) 1343 daddr = fs->lfs_idaddr; 1344 else { 1345 /* XXX bounds-check this too */ 1346 LFS_IENTRY(ifp, fs, ino, bp); 1347 daddr = ifp->if_daddr; 1348 if (fs->lfs_version > 1) { 1349 ts.tv_sec = ifp->if_atime_sec; 1350 ts.tv_nsec = ifp->if_atime_nsec; 1351 } 1352 1353 brelse(bp); 1354 if (daddr == LFS_UNUSED_DADDR) { 1355 *vpp = NULLVP; 1356 ungetnewvnode(vp); 1357 lockmgr(&ufs_hashlock, LK_RELEASE, 0); 1358 return (ENOENT); 1359 } 1360 } 1361 1362 /* Allocate/init new vnode/inode. */ 1363 lfs_vcreate(mp, ino, vp); 1364 1365 /* 1366 * Put it onto its hash chain and lock it so that other requests for 1367 * this inode will block if they arrive while we are sleeping waiting 1368 * for old data structures to be purged or for the contents of the 1369 * disk portion of this inode to be read. 1370 */ 1371 ip = VTOI(vp); 1372 ufs_ihashins(ip); 1373 lockmgr(&ufs_hashlock, LK_RELEASE, 0); 1374 1375 /* 1376 * XXX 1377 * This may not need to be here, logically it should go down with 1378 * the i_devvp initialization. 1379 * Ask Kirk. 1380 */ 1381 ip->i_lfs = ump->um_lfs; 1382 1383 /* Read in the disk contents for the inode, copy into the inode. */ 1384 error = bread(ump->um_devvp, fsbtodb(fs, daddr), 1385 (fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_fsize), 1386 NOCRED, &bp); 1387 if (error) { 1388 /* 1389 * The inode does not contain anything useful, so it would 1390 * be misleading to leave it on its hash chain. With mode 1391 * still zero, it will be unlinked and returned to the free 1392 * list by vput(). 1393 */ 1394 vput(vp); 1395 brelse(bp); 1396 *vpp = NULL; 1397 return (error); 1398 } 1399 ip->i_din.ffs_din = *lfs_ifind(fs, ino, bp); 1400 ip->i_ffs_effnlink = ip->i_ffs_nlink; 1401 ip->i_lfs_effnblks = ip->i_ffs_blocks; 1402 if (fs->lfs_version > 1) { 1403 ip->i_ffs_atime = ts.tv_sec; 1404 ip->i_ffs_atimensec = ts.tv_nsec; 1405 } 1406 brelse(bp); 1407 1408 /* 1409 * Initialize the vnode from the inode, check for aliases. In all 1410 * cases re-init ip, the underlying vnode/inode may have changed. 1411 */ 1412 ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp); 1413 #ifdef DIAGNOSTIC 1414 if (vp->v_type == VNON) { 1415 panic("lfs_vget: ino %d is type VNON! (ifmt %o)\n", 1416 ip->i_number, (ip->i_ffs_mode & IFMT) >> 12); 1417 } 1418 #endif 1419 /* 1420 * Finish inode initialization now that aliasing has been resolved. 1421 */ 1422 1423 genfs_node_init(vp, &lfs_genfsops); 1424 ip->i_devvp = ump->um_devvp; 1425 VREF(ip->i_devvp); 1426 *vpp = vp; 1427 1428 uvm_vnp_setsize(vp, ip->i_ffs_size); 1429 1430 return (0); 1431 } 1432 1433 /* 1434 * File handle to vnode 1435 * 1436 * Have to be really careful about stale file handles: 1437 * - check that the inode number is valid 1438 * - call lfs_vget() to get the locked inode 1439 * - check for an unallocated inode (i_mode == 0) 1440 * 1441 * XXX 1442 * use ifile to see if inode is allocated instead of reading off disk 1443 * what is the relationship between my generational number and the NFS 1444 * generational number. 1445 */ 1446 int 1447 lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 1448 { 1449 struct ufid *ufhp; 1450 1451 ufhp = (struct ufid *)fhp; 1452 if (ufhp->ufid_ino < ROOTINO) 1453 return (ESTALE); 1454 return (ufs_fhtovp(mp, ufhp, vpp)); 1455 } 1456 1457 /* 1458 * Vnode pointer to File handle 1459 */ 1460 /* ARGSUSED */ 1461 int 1462 lfs_vptofh(struct vnode *vp, struct fid *fhp) 1463 { 1464 struct inode *ip; 1465 struct ufid *ufhp; 1466 1467 ip = VTOI(vp); 1468 ufhp = (struct ufid *)fhp; 1469 ufhp->ufid_len = sizeof(struct ufid); 1470 ufhp->ufid_ino = ip->i_number; 1471 ufhp->ufid_gen = ip->i_ffs_gen; 1472 return (0); 1473 } 1474 1475 int 1476 lfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p) 1477 { 1478 extern int lfs_writeindir, lfs_dostats, lfs_clean_vnhead; 1479 extern struct lfs_stats lfs_stats; 1480 int error; 1481 1482 /* all sysctl names at this level are terminal */ 1483 if (namelen != 1) 1484 return (ENOTDIR); 1485 1486 switch (name[0]) { 1487 case LFS_WRITEINDIR: 1488 return (sysctl_int(oldp, oldlenp, newp, newlen, 1489 &lfs_writeindir)); 1490 case LFS_CLEAN_VNHEAD: 1491 return (sysctl_int(oldp, oldlenp, newp, newlen, 1492 &lfs_clean_vnhead)); 1493 case LFS_DOSTATS: 1494 if ((error = sysctl_int(oldp, oldlenp, newp, newlen, 1495 &lfs_dostats))) 1496 return error; 1497 if (lfs_dostats == 0) 1498 memset(&lfs_stats,0,sizeof(lfs_stats)); 1499 return 0; 1500 case LFS_STATS: 1501 return (sysctl_rdstruct(oldp, oldlenp, newp, 1502 &lfs_stats, sizeof(lfs_stats))); 1503 default: 1504 return (EOPNOTSUPP); 1505 } 1506 /* NOTREACHED */ 1507 } 1508