1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf2.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/stat.h> 56 #include <sys/mutex2.h> 57 #include <sys/nlookup.h> 58 59 #include <vfs/ext2fs/fs.h> 60 #include <vfs/ext2fs/ext2_mount.h> 61 #include <vfs/ext2fs/inode.h> 62 63 #include <vfs/ext2fs/ext2fs.h> 64 #include <vfs/ext2fs/ext2_dinode.h> 65 #include <vfs/ext2fs/ext2_extern.h> 66 #include <vfs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 79 static int ext2_flushfiles(struct mount *mp, int flags); 80 static int ext2_mountfs(struct vnode *, struct mount *); 81 static int ext2_reload(struct mount *mp); 82 static int ext2_sbupdate(struct ext2mount *, int); 83 static int ext2_cgupdate(struct ext2mount *, int); 84 static int ext2_init(struct vfsconf *); 85 static int ext2_uninit(struct vfsconf *); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_statvfs_t ext2_statvfs; 90 static vfs_sync_t ext2_sync; 91 static vfs_vget_t ext2_vget; 92 static vfs_fhtovp_t ext2_fhtovp; 93 static vfs_vptofh_t ext2_vptofh; 94 static vfs_checkexp_t ext2_check_export; 95 static vfs_mount_t ext2_mount; 96 97 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100 static struct vfsops ext2fs_vfsops = { 101 .vfs_flags = 0, 102 .vfs_mount = ext2_mount, 103 .vfs_unmount = ext2_unmount, 104 .vfs_root = ext2_root, /* root inode via vget */ 105 .vfs_statfs = ext2_statfs, 106 .vfs_statvfs = ext2_statvfs, 107 .vfs_sync = ext2_sync, 108 .vfs_vget = ext2_vget, 109 .vfs_fhtovp = ext2_fhtovp, 110 .vfs_vptofh = ext2_vptofh, 111 .vfs_checkexp = ext2_check_export, 112 .vfs_init = ext2_init, 113 .vfs_uninit = ext2_uninit 114 }; 115 116 VFS_SET(ext2fs_vfsops, ext2fs, 0); 117 MODULE_VERSION(ext2fs, 1); 118 119 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 120 int ronly); 121 static int ext2_compute_sb_data(struct vnode * devvp, 122 struct ext2fs * es, struct m_ext2fs * fs); 123 124 static int ext2fs_inode_hash_lock; 125 126 /* 127 * VFS Operations. 128 * 129 * mount system call 130 */ 131 static int 132 ext2_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 133 { 134 struct ext2_args args; 135 struct vnode *devvp; 136 struct ext2mount *ump = NULL; 137 struct m_ext2fs *fs; 138 struct nlookupdata nd; 139 mode_t accmode; 140 int error, flags; 141 size_t size; 142 143 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args))) != 0) 144 return (error); 145 146 /* 147 * If updating, check whether changing from read-only to 148 * read/write; if there is no device name, that's all we do. 149 */ 150 if (mp->mnt_flag & MNT_UPDATE) { 151 ump = VFSTOEXT2(mp); 152 fs = ump->um_e2fs; 153 devvp = ump->um_devvp; 154 error = 0; 155 if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 156 error = VFS_SYNC(mp, MNT_WAIT); 157 if (error) 158 return (error); 159 flags = WRITECLOSE; 160 if (mp->mnt_flag & MNT_FORCE) 161 flags |= FORCECLOSE; 162 if (vfs_busy(mp, LK_NOWAIT)) 163 return (EBUSY); 164 error = ext2_flushfiles(mp, flags); 165 vfs_unbusy(mp); 166 if (error == 0 && fs->e2fs_wasvalid && 167 ext2_cgupdate(ump, MNT_WAIT) == 0) { 168 fs->e2fs->e2fs_state = 169 htole16((le16toh(fs->e2fs->e2fs_state) | 170 E2FS_ISCLEAN)); 171 ext2_sbupdate(ump, MNT_WAIT); 172 } 173 fs->e2fs_ronly = 1; 174 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 175 VOP_OPEN(devvp, FREAD, FSCRED, NULL); 176 VOP_CLOSE(devvp, FREAD | FWRITE, NULL); 177 vn_unlock(devvp); 178 } 179 if (!error && (mp->mnt_flag & MNT_RELOAD)) 180 error = ext2_reload(mp); 181 if (error) 182 return (error); 183 devvp = ump->um_devvp; 184 if (fs->e2fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 185 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 186 return (EPERM); 187 188 /* 189 * If upgrade to read-write by non-root, then verify 190 * that user has necessary permissions on the device. 191 */ 192 if (cred->cr_uid != 0) { 193 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 194 error = VOP_EACCESS(devvp, VREAD | VWRITE, cred); 195 if (error) { 196 vn_unlock(devvp); 197 return (error); 198 } 199 vn_unlock(devvp); 200 } 201 202 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 203 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 204 if (mp->mnt_flag & MNT_FORCE) { 205 printf( 206 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 207 } else { 208 printf( 209 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 210 fs->e2fs_fsmnt); 211 return (EPERM); 212 } 213 } 214 fs->e2fs->e2fs_state = 215 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 216 (void)ext2_cgupdate(ump, MNT_WAIT); 217 fs->e2fs_ronly = 0; 218 mp->mnt_flag &= ~MNT_RDONLY; 219 220 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 221 VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, NULL); 222 VOP_CLOSE(devvp, FREAD, NULL); 223 vn_unlock(devvp); 224 } 225 if (args.fspec == NULL) { 226 /* 227 * Process export requests. 228 */ 229 return (vfs_export(mp, &ump->um_export, &args.export)); 230 } 231 } 232 233 /* 234 * Not an update, or updating the name: look up the name 235 * and verify that it refers to a sensible disk device. 236 */ 237 devvp = NULL; 238 error = nlookup_init(&nd, args.fspec, UIO_USERSPACE, NLC_FOLLOW); 239 if (error == 0) 240 error = nlookup(&nd); 241 if (error == 0) 242 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 243 nlookup_done(&nd); 244 if (error) 245 return (error); 246 247 if (!vn_isdisk(devvp, &error)) { 248 vrele(devvp); 249 return (error); 250 } 251 252 /* 253 * If mount by non-root, then verify that user has necessary 254 * permissions on the device. 255 * 256 * XXXRW: VOP_ACCESS() enough? 257 */ 258 if (cred->cr_uid != 0) { 259 accmode = VREAD; 260 if ((mp->mnt_flag & MNT_RDONLY) == 0) 261 accmode |= VWRITE; 262 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 263 if ((error = VOP_EACCESS(devvp, accmode, cred)) != 0) { 264 vput(devvp); 265 return (error); 266 } 267 vn_unlock(devvp); 268 } 269 270 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 271 error = ext2_mountfs(devvp, mp); 272 } else { 273 if (devvp != ump->um_devvp) 274 error = EINVAL; /* needs translation */ 275 else 276 vrele(devvp); 277 } 278 if (error) { 279 vrele(devvp); 280 return (error); 281 } 282 ump = VFSTOEXT2(mp); 283 fs = ump->um_e2fs; 284 285 /* 286 * Note that this strncpy() is ok because of a check at the start 287 * of ext2_mount(). 288 */ 289 copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1, &size); 290 bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size); 291 copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 292 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 293 ext2_statfs(mp, &mp->mnt_stat, cred); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 401 } 402 if (b_bitmap <= last_cg_block) { 403 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 404 "block bitmap overlaps gds", i); 405 return (EINVAL); 406 } 407 if (b_bitmap < first_block || b_bitmap > last_block) { 408 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 409 "block bitmap not in group", i); 410 return (EINVAL); 411 } 412 413 i_bitmap = e2fs_gd_get_i_bitmap(gd); 414 if (i_bitmap == 0) { 415 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 416 "inode bitmap is zero", i); 417 return (EINVAL); 418 } 419 if (i_bitmap <= last_cg_block) { 420 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 421 "inode bitmap overlaps gds", i); 422 return (EINVAL); 423 } 424 if (i_bitmap < first_block || i_bitmap > last_block) { 425 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 426 "inode bitmap not in group blk", i); 427 return (EINVAL); 428 } 429 430 i_tables = e2fs_gd_get_i_tables(gd); 431 if (i_tables == 0) { 432 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 433 "inode table is zero", i); 434 return (EINVAL); 435 } 436 if (i_tables <= last_cg_block) { 437 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 438 "inode talbes overlaps gds", i); 439 return (EINVAL); 440 } 441 if (i_tables < first_block || 442 i_tables + fs->e2fs_itpg - 1 > last_block) { 443 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 444 "inode tables not in group blk", i); 445 return (EINVAL); 446 } 447 448 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 449 first_block += EXT2_BLOCKS_PER_GROUP(fs); 450 } 451 452 return (0); 453 } 454 455 /* 456 * This computes the fields of the m_ext2fs structure from the 457 * data in the ext2fs structure read in. 458 */ 459 static int 460 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 461 struct m_ext2fs *fs) 462 { 463 struct buf *bp; 464 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 465 int i, j; 466 int g_count = 0; 467 int error; 468 469 /* Check checksum features */ 470 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 471 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 472 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 473 "incorrect checksum features combination"); 474 return (EINVAL); 475 } 476 477 /* Precompute checksum seed for all metadata */ 478 ext2_sb_csum_set_seed(fs); 479 480 /* Verify sb csum if possible */ 481 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 482 error = ext2_sb_csum_verify(fs); 483 if (error) { 484 return (error); 485 } 486 } 487 488 /* Check for block size = 1K|2K|4K */ 489 if (le32toh(es->e2fs_log_bsize) > 2) { 490 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 491 "bad block size"); 492 return (EINVAL); 493 } 494 495 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 496 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 497 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 498 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 499 500 /* Check for fragment size */ 501 if (le32toh(es->e2fs_log_fsize) > 502 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 503 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 504 "invalid log cluster size"); 505 return (EINVAL); 506 } 507 508 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 509 if (fs->e2fs_fsize != fs->e2fs_bsize) { 510 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 511 "fragment size != block size"); 512 return (EINVAL); 513 } 514 515 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 516 517 /* Check reserved gdt blocks for future filesystem expansion */ 518 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 519 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 520 "number of reserved GDT blocks too large"); 521 return (EINVAL); 522 } 523 524 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 525 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 526 } else { 527 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 528 529 /* 530 * Check first ino. 531 */ 532 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 533 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 534 "invalid first ino"); 535 return (EINVAL); 536 } 537 538 /* 539 * Simple sanity check for superblock inode size value. 540 */ 541 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 542 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 543 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 544 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 545 "invalid inode size"); 546 return (EINVAL); 547 } 548 } 549 550 /* Check group descriptors */ 551 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 552 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 553 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 554 "unsupported 64bit descriptor size"); 555 return (EINVAL); 556 } 557 558 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 559 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 560 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 561 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 562 "zero blocks/fragments per group"); 563 return (EINVAL); 564 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 565 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 566 "blocks per group not equal fragments per group"); 567 return (EINVAL); 568 } 569 570 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 571 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 572 "non-standard group size unsupported"); 573 return (EINVAL); 574 } 575 576 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 577 if (fs->e2fs_ipb == 0 || 578 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 579 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 580 "bad inodes per block size"); 581 return (EINVAL); 582 } 583 584 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 585 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 586 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 587 "invalid inodes per group"); 588 return (EINVAL); 589 } 590 591 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 592 593 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 594 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 595 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 596 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 597 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 598 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 599 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 600 } 601 if (fs->e2fs_rbcount > fs->e2fs_bcount || 602 fs->e2fs_fbcount > fs->e2fs_bcount) { 603 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 604 "invalid block count"); 605 return (EINVAL); 606 } 607 608 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 609 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 610 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 611 "invalid number of free inodes"); 612 return (EINVAL); 613 } 614 615 if (le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 617 "first data block out of range"); 618 return (EINVAL); 619 } 620 621 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 622 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 623 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 624 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 625 "groups count too large"); 626 return (EINVAL); 627 } 628 629 /* Check for extra isize in big inodes. */ 630 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 631 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 632 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 633 "no space for extra inode timestamps"); 634 return (EINVAL); 635 } 636 637 /* s_resuid / s_resgid ? */ 638 639 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 640 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 642 } else { 643 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 644 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 645 fs->e2fs_bsize / sizeof(struct ext2_gd)); 646 } 647 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 648 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 649 M_EXT2MNT, M_WAITOK | M_ZERO); 650 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 651 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 652 653 for (i = 0; i < fs->e2fs_gdbcount; i++) { 654 error = bread(devvp, fsbtodoff(fs, ext2_cg_location(fs, i)), 655 fs->e2fs_bsize, &bp); 656 if (error) { 657 /* 658 * fs->e2fs_gd and fs->e2fs_contigdirs 659 * will be freed later by the caller, 660 * because this function could be called from 661 * MNT_UPDATE path. 662 */ 663 brelse(bp); 664 return (error); 665 } 666 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 667 memcpy(&fs->e2fs_gd[ 668 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 669 bp->b_data, fs->e2fs_bsize); 670 } else { 671 for (j = 0; j < e2fs_descpb && 672 g_count < fs->e2fs_gcount; j++, g_count++) 673 memcpy(&fs->e2fs_gd[g_count], 674 bp->b_data + j * E2FS_REV0_GD_SIZE, 675 E2FS_REV0_GD_SIZE); 676 } 677 brelse(bp); 678 bp = NULL; 679 } 680 681 /* Validate cgs consistency */ 682 error = ext2_cg_validate(fs); 683 if (error) 684 return (error); 685 686 /* Verfy cgs csum */ 687 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 688 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 689 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 690 if (error) 691 return (error); 692 } 693 /* Initialization for the ext2 Orlov allocator variant. */ 694 fs->e2fs_total_dir = 0; 695 for (i = 0; i < fs->e2fs_gcount; i++) 696 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 697 698 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 699 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 700 fs->e2fs_maxfilesize = 0x7fffffff; 701 else { 702 fs->e2fs_maxfilesize = 0xffffffffffff; 703 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 704 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 705 } 706 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 707 fs->e2fs_uhash = 3; 708 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 709 #ifdef __CHAR_UNSIGNED__ 710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 711 fs->e2fs_uhash = 3; 712 #else 713 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 714 #endif 715 } 716 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 717 error = ext2_sb_csum_verify(fs); 718 719 return (error); 720 } 721 722 struct scaninfo { 723 int rescan; 724 int allerror; 725 int waitfor; 726 struct vnode *devvp; 727 struct m_ext2fs *fs; 728 }; 729 730 static int 731 ext2_reload_scan(struct mount *mp, struct vnode *vp, void *data) 732 { 733 struct scaninfo *info = data; 734 struct inode *ip; 735 struct buf *bp; 736 int error; 737 738 /* 739 * Try to recycle 740 */ 741 if (vrecycle(vp)) 742 return (0); 743 744 /* 745 * Step 1: invalidate all cached file data. 746 */ 747 if (vinvalbuf(vp, 0, 0, 0)) 748 panic("ext2_reload: dirty2"); 749 /* 750 * Step 2: re-read inode data for all active vnodes. 751 */ 752 ip = VTOI(vp); 753 error = bread(info->devvp, 754 fsbtodoff(info->fs, ino_to_fsba(info->fs, ip->i_number)), 755 (int)info->fs->e2fs_bsize, &bp); 756 if (error) { 757 brelse(bp); 758 return (error); 759 } 760 761 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 762 EXT2_INODE_SIZE(info->fs) * ino_to_fsbo(info->fs, ip->i_number)), 763 ip); 764 765 brelse(bp); 766 return (error); 767 } 768 769 /* 770 * Reload all incore data for a filesystem (used after running fsck on 771 * the root filesystem and finding things to fix). The filesystem must 772 * be mounted read-only. 773 * 774 * Things to do to update the mount: 775 * 1) invalidate all cached meta-data. 776 * 2) re-read superblock from disk. 777 * 3) invalidate all cluster summary information. 778 * 4) invalidate all inactive vnodes. 779 * 5) invalidate all cached file data. 780 * 6) re-read inode data for all active vnodes. 781 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 782 */ 783 static int 784 ext2_reload(struct mount *mp) 785 { 786 struct vnode *devvp; 787 struct buf *bp; 788 struct ext2fs *es; 789 struct m_ext2fs *fs; 790 struct csum *sump; 791 struct scaninfo scaninfo; 792 int error, i; 793 int32_t *lp; 794 795 if ((mp->mnt_flag & MNT_RDONLY) == 0) 796 return (EINVAL); 797 /* 798 * Step 1: invalidate all cached meta-data. 799 */ 800 devvp = VFSTOEXT2(mp)->um_devvp; 801 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 802 if (vinvalbuf(devvp, 0, 0, 0) != 0) 803 panic("ext2_reload: dirty1"); 804 vn_unlock(devvp); 805 806 /* 807 * Step 2: re-read superblock from disk. 808 * constants have been adjusted for ext2 809 */ 810 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) { 811 brelse(bp); 812 return (error); 813 } 814 es = (struct ext2fs *)bp->b_data; 815 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 816 brelse(bp); 817 return (EIO); /* XXX needs translation */ 818 } 819 fs = VFSTOEXT2(mp)->um_e2fs; 820 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 821 822 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 823 brelse(bp); 824 return (error); 825 } 826 #ifdef UNKLAR 827 if (fs->fs_sbsize < SBSIZE) 828 bp->b_flags |= B_INVAL; 829 #endif 830 brelse(bp); 831 832 /* 833 * Step 3: invalidate all cluster summary information. 834 */ 835 if (fs->e2fs_contigsumsize > 0) { 836 lp = fs->e2fs_maxcluster; 837 sump = fs->e2fs_clustersum; 838 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 839 *lp++ = fs->e2fs_contigsumsize; 840 sump->cs_init = 0; 841 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 842 } 843 } 844 845 scaninfo.rescan = 1; 846 scaninfo.devvp = devvp; 847 scaninfo.fs = fs; 848 while (error == 0 && scaninfo.rescan) { 849 scaninfo.rescan = 0; 850 error = vmntvnodescan(mp, VMSC_GETVX, NULL, ext2_reload_scan, 851 &scaninfo); 852 } 853 return (error); 854 } 855 856 /* 857 * Common code for mount and mountroot. 858 */ 859 static int 860 ext2_mountfs(struct vnode *devvp, struct mount *mp) 861 { 862 struct ext2mount *ump; 863 struct buf *bp; 864 struct m_ext2fs *fs; 865 struct ext2fs *es; 866 struct cdev *dev = devvp->v_rdev; 867 struct csum *sump; 868 int error; 869 int ronly; 870 int i; 871 u_long size; 872 int32_t *lp; 873 int32_t e2fs_maxcontig; 874 875 /* 876 * Disallow multiple mounts of the same device. 877 * Disallow mounting of a device that is currently in use 878 * (except for root, which might share swap device for miniroot). 879 * Flush out any old buffers remaining from a previous use. 880 */ 881 if ((error = vfs_mountedon(devvp)) != 0) 882 return (error); 883 if (vcount(devvp) > 0) 884 return (EBUSY); 885 if ((error = vinvalbuf(devvp, V_SAVE, 0, 0)) != 0) 886 return (error); 887 #ifdef READONLY 888 /* Turn on this to force it to be read-only. */ 889 mp->mnt_flag |= MNT_RDONLY; 890 #endif 891 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 892 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 893 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, NULL); 894 vn_unlock(devvp); 895 if (error) 896 return (error); 897 898 if (devvp->v_rdev->si_iosize_max != 0) 899 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 900 if (mp->mnt_iosize_max > MAXPHYS) 901 mp->mnt_iosize_max = MAXPHYS; 902 903 bp = NULL; 904 ump = NULL; 905 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 906 goto out; 907 es = (struct ext2fs *)bp->b_data; 908 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 909 error = EINVAL; /* XXX needs translation */ 910 goto out; 911 } 912 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 913 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 914 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 915 printf( 916 "WARNING: Filesystem was not properly dismounted\n"); 917 } else { 918 printf( 919 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 920 error = EPERM; 921 goto out; 922 } 923 } 924 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 925 926 /* 927 * I don't know whether this is the right strategy. Note that 928 * we dynamically allocate both an m_ext2fs and an ext2fs 929 * while Linux keeps the super block in a locked buffer. 930 */ 931 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 932 M_EXT2MNT, M_WAITOK | M_ZERO); 933 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 934 M_EXT2MNT, M_WAITOK); 935 mtx_init(EXT2_MTX(ump), "EXT2FS Lock"); 936 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 937 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 938 goto out; 939 940 /* 941 * Calculate the maximum contiguous blocks and size of cluster summary 942 * array. In FFS this is done by newfs; however, the superblock 943 * in ext2fs doesn't have these variables, so we can calculate 944 * them here. 945 */ 946 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 947 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 948 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 949 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 950 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 951 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 952 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 953 lp = ump->um_e2fs->e2fs_maxcluster; 954 sump = ump->um_e2fs->e2fs_clustersum; 955 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 956 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 957 sump->cs_init = 0; 958 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 959 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 960 } 961 } 962 963 brelse(bp); 964 bp = NULL; 965 fs = ump->um_e2fs; 966 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 967 968 /* 969 * If the fs is not mounted read-only, make sure the super block is 970 * always written back on a sync(). 971 */ 972 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 973 if (ronly == 0) { 974 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 975 fs->e2fs->e2fs_state = 976 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 977 } 978 mp->mnt_data = (qaddr_t)ump; 979 mp->mnt_stat.f_fsid.val[0] = devid_from_dev(dev); 980 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 981 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 982 mp->mnt_flag |= MNT_LOCAL; 983 ump->um_mountp = mp; 984 ump->um_dev = dev; 985 ump->um_devvp = devvp; 986 987 /* 988 * Setting those two parameters allowed us to use 989 * ufs_bmap w/o changse! 990 */ 991 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 992 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 993 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 994 dev->si_mountpoint = mp; 995 996 vfs_add_vnodeops(mp, &ext2_vnodeops, &mp->mnt_vn_norm_ops); 997 vfs_add_vnodeops(mp, &ext2_specops, &mp->mnt_vn_spec_ops); 998 vfs_add_vnodeops(mp, &ext2_fifoops, &mp->mnt_vn_fifo_ops); 999 1000 if (ronly == 0) 1001 ext2_sbupdate(ump, MNT_WAIT); 1002 return (0); 1003 out: 1004 if (bp) 1005 brelse(bp); 1006 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1007 VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1008 vn_unlock(devvp); 1009 if (ump) { 1010 mtx_uninit(EXT2_MTX(ump)); 1011 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1012 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1013 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1014 free(ump->um_e2fs, M_EXT2MNT); 1015 free(ump, M_EXT2MNT); 1016 mp->mnt_data = NULL; 1017 } 1018 return (error); 1019 } 1020 1021 /* 1022 * Unmount system call. 1023 */ 1024 static int 1025 ext2_unmount(struct mount *mp, int mntflags) 1026 { 1027 struct ext2mount *ump; 1028 struct m_ext2fs *fs; 1029 struct csum *sump; 1030 int error, flags, i, ronly; 1031 1032 flags = 0; 1033 if (mntflags & MNT_FORCE) { 1034 if (mp->mnt_flag & MNT_ROOTFS) 1035 return (EINVAL); 1036 flags |= FORCECLOSE; 1037 } 1038 if ((error = ext2_flushfiles(mp, flags)) != 0) 1039 return (error); 1040 ump = VFSTOEXT2(mp); 1041 fs = ump->um_e2fs; 1042 ronly = fs->e2fs_ronly; 1043 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1044 if (fs->e2fs_wasvalid) 1045 fs->e2fs->e2fs_state = 1046 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1047 ext2_sbupdate(ump, MNT_WAIT); 1048 } 1049 1050 ump->um_devvp->v_rdev->si_mountpoint = NULL; 1051 1052 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1053 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1054 vn_unlock(ump->um_devvp); 1055 1056 vrele(ump->um_devvp); 1057 sump = fs->e2fs_clustersum; 1058 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1059 free(sump->cs_sum, M_EXT2MNT); 1060 free(fs->e2fs_clustersum, M_EXT2MNT); 1061 free(fs->e2fs_maxcluster, M_EXT2MNT); 1062 free(fs->e2fs_gd, M_EXT2MNT); 1063 free(fs->e2fs_contigdirs, M_EXT2MNT); 1064 free(fs->e2fs, M_EXT2MNT); 1065 free(fs, M_EXT2MNT); 1066 free(ump, M_EXT2MNT); 1067 mp->mnt_data = NULL; 1068 mp->mnt_flag &= ~MNT_LOCAL; 1069 return (error); 1070 } 1071 1072 /* 1073 * Flush out all the files in a filesystem. 1074 */ 1075 static int 1076 ext2_flushfiles(struct mount *mp, int flags) 1077 { 1078 int error; 1079 1080 error = vflush(mp, 0, flags); 1081 return (error); 1082 } 1083 1084 /* 1085 * Get filesystem statistics. 1086 */ 1087 static int 1088 ext2_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1089 { 1090 struct ext2mount *ump; 1091 struct m_ext2fs *fs; 1092 uint32_t overhead, overhead_per_group, ngdb; 1093 int i, ngroups; 1094 1095 ump = VFSTOEXT2(mp); 1096 fs = ump->um_e2fs; 1097 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1098 panic("ext2_statfs"); 1099 1100 /* 1101 * Compute the overhead (FS structures) 1102 */ 1103 overhead_per_group = 1104 1 /* block bitmap */ + 1105 1 /* inode bitmap */ + 1106 fs->e2fs_itpg; 1107 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1108 fs->e2fs_gcount * overhead_per_group; 1109 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1110 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1111 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1112 if (ext2_cg_has_sb(fs, i)) 1113 ngroups++; 1114 } 1115 } else { 1116 ngroups = fs->e2fs_gcount; 1117 } 1118 ngdb = fs->e2fs_gdbcount; 1119 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1120 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1121 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1122 overhead += ngroups * (1 /* superblock */ + ngdb); 1123 1124 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1125 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1126 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1127 sbp->f_blocks = fs->e2fs_bcount - overhead; 1128 sbp->f_bfree = fs->e2fs_fbcount; 1129 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1130 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1131 sbp->f_ffree = fs->e2fs_ficount; 1132 if (sbp != &mp->mnt_stat) { 1133 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 1134 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 1135 } 1136 return (0); 1137 } 1138 1139 static int 1140 ext2_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1141 { 1142 struct ext2mount *ump; 1143 struct m_ext2fs *fs; 1144 uint32_t overhead, overhead_per_group, ngdb; 1145 int i, ngroups; 1146 1147 ump = VFSTOEXT2(mp); 1148 fs = ump->um_e2fs; 1149 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1150 panic("ext2_statfs"); 1151 1152 /* 1153 * Compute the overhead (FS structures) 1154 */ 1155 overhead_per_group = 1156 1 /* block bitmap */ + 1157 1 /* inode bitmap */ + 1158 fs->e2fs_itpg; 1159 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1160 fs->e2fs_gcount * overhead_per_group; 1161 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1162 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1163 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1164 if (ext2_cg_has_sb(fs, i)) 1165 ngroups++; 1166 } 1167 } else { 1168 ngroups = fs->e2fs_gcount; 1169 } 1170 ngdb = fs->e2fs_gdbcount; 1171 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1172 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1173 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1174 overhead += ngroups * (1 /* superblock */ + ngdb); 1175 1176 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1177 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1178 sbp->f_frsize = EXT2_BLOCK_SIZE(fs); 1179 sbp->f_blocks = fs->e2fs_bcount - overhead; 1180 sbp->f_bfree = fs->e2fs_fbcount; 1181 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1182 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1183 sbp->f_ffree = fs->e2fs_ficount; 1184 return (0); 1185 } 1186 1187 static int 1188 ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) 1189 { 1190 struct scaninfo *info = data; 1191 struct inode *ip; 1192 int error; 1193 1194 ip = VTOI(vp); 1195 if (vp->v_type == VNON || 1196 ((ip->i_flag & 1197 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1198 (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { 1199 return (0); 1200 } 1201 if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) 1202 info->allerror = error; 1203 return (0); 1204 } 1205 1206 /* 1207 * Go through the disk queues to initiate sandbagged IO; 1208 * go through the inodes to write those that have been modified; 1209 * initiate the writing of the super block if it has been modified. 1210 * 1211 * Note: we are always called with the filesystem marked `MPBUSY'. 1212 */ 1213 static int 1214 ext2_sync(struct mount *mp, int waitfor) 1215 { 1216 struct ext2mount *ump = VFSTOEXT2(mp); 1217 struct m_ext2fs *fs; 1218 struct scaninfo scaninfo; 1219 int error; 1220 1221 fs = ump->um_e2fs; 1222 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1223 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1224 } 1225 1226 /* 1227 * Write back each (modified) inode. 1228 */ 1229 scaninfo.allerror = 0; 1230 scaninfo.rescan = 1; 1231 scaninfo.waitfor = waitfor; 1232 while (scaninfo.rescan) { 1233 scaninfo.rescan = 0; 1234 vmntvnodescan(mp, VMSC_GETVP | VMSC_NOWAIT, 1235 NULL, ext2_sync_scan, &scaninfo); 1236 } 1237 1238 /* 1239 * Force stale filesystem control information to be flushed. 1240 */ 1241 if ((waitfor & MNT_LAZY) == 0) { 1242 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1243 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) 1244 scaninfo.allerror = error; 1245 vn_unlock(ump->um_devvp); 1246 } 1247 1248 /* 1249 * Write back modified superblock. 1250 */ 1251 if (fs->e2fs_fmod != 0) { 1252 fs->e2fs_fmod = 0; 1253 fs->e2fs->e2fs_wtime = htole32(time_second); 1254 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1255 scaninfo.allerror = error; 1256 } 1257 return (scaninfo.allerror); 1258 } 1259 1260 int 1261 ext2_alloc_vnode(struct mount *mp, ino_t ino, struct vnode **vpp) 1262 { 1263 struct ext2mount *ump; 1264 struct vnode *vp; 1265 struct inode *ip; 1266 int error; 1267 1268 ump = VFSTOEXT2(mp); 1269 /* 1270 * Lock out the creation of new entries in the FFS hash table in 1271 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1272 * may occur! 1273 */ 1274 if (ext2fs_inode_hash_lock) { 1275 while (ext2fs_inode_hash_lock) { 1276 ext2fs_inode_hash_lock = -1; 1277 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 1278 } 1279 return (-1); 1280 } 1281 ext2fs_inode_hash_lock = 1; 1282 1283 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1284 if (ip == NULL) 1285 return (ENOMEM); 1286 1287 /* Allocate a new vnode/inode. */ 1288 if ((error = getnewvnode(VT_EXT2FS, mp, &vp, VLKTIMEOUT, 1289 LK_CANRECURSE)) != 0) { 1290 if (ext2fs_inode_hash_lock < 0) 1291 wakeup(&ext2fs_inode_hash_lock); 1292 ext2fs_inode_hash_lock = 0; 1293 *vpp = NULL; 1294 free(ip, M_EXT2NODE); 1295 return (error); 1296 } 1297 //lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1298 vp->v_data = ip; 1299 ip->i_vnode = vp; 1300 ip->i_e2fs = ump->um_e2fs; 1301 ip->i_dev = ump->um_dev; 1302 ip->i_ump = ump; 1303 ip->i_number = ino; 1304 ip->i_block_group = ino_to_cg(ip->i_e2fs, ino); 1305 ip->i_next_alloc_block = 0; 1306 ip->i_next_alloc_goal = 0; 1307 1308 /* 1309 * Put it onto its hash chain. Since our vnode is locked, other 1310 * requests for this inode will block if they arrive while we are 1311 * sleeping waiting for old data structures to be purged or for the 1312 * contents of the disk portion of this inode to be read. 1313 */ 1314 if (ext2_ihashins(ip)) { 1315 printf("ext2_alloc_vnode: ihashins collision, retrying inode %ld\n", 1316 (long)ip->i_number); 1317 *vpp = NULL; 1318 vp->v_type = VBAD; 1319 vx_put(vp); 1320 free(ip, M_EXT2NODE); 1321 return (-1); 1322 } 1323 1324 if (ext2fs_inode_hash_lock < 0) 1325 wakeup(&ext2fs_inode_hash_lock); 1326 ext2fs_inode_hash_lock = 0; 1327 *vpp = vp; 1328 1329 return (0); 1330 } 1331 1332 /* 1333 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1334 * in from disk. If it is in core, wait for the lock bit to clear, then 1335 * return the inode locked. Detection and handling of mount points must be 1336 * done by the calling routine. 1337 */ 1338 static int 1339 ext2_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) 1340 { 1341 struct m_ext2fs *fs; 1342 struct inode *ip; 1343 struct ext2mount *ump; 1344 struct buf *bp; 1345 struct vnode *vp; 1346 unsigned int i, used_blocks; 1347 int error; 1348 1349 ump = VFSTOEXT2(mp); 1350 restart: 1351 if ((*vpp = ext2_ihashget(ump->um_dev, ino)) != NULL) 1352 return (0); 1353 if (ext2_alloc_vnode(mp, ino, &vp) == -1) 1354 goto restart; 1355 ip = VTOI(vp); 1356 fs = ip->i_e2fs; 1357 1358 /* Read in the disk contents for the inode, copy into the inode. */ 1359 if ((error = bread(ump->um_devvp, fsbtodoff(fs, ino_to_fsba(fs, ino)), 1360 (int)fs->e2fs_bsize, &bp)) != 0) { 1361 /* 1362 * The inode does not contain anything useful, so it would 1363 * be misleading to leave it on its hash chain. With mode 1364 * still zero, it will be unlinked and returned to the free 1365 * list by vput(). 1366 */ 1367 vp->v_type = VBAD; 1368 brelse(bp); 1369 vx_put(vp); 1370 *vpp = NULL; 1371 return (error); 1372 } 1373 /* convert ext2 inode to dinode */ 1374 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1375 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1376 if (error) { 1377 brelse(bp); 1378 vx_put(vp); 1379 *vpp = NULL; 1380 return (error); 1381 } 1382 1383 /* 1384 * Now we want to make sure that block pointers for unused 1385 * blocks are zeroed out - ext2_balloc depends on this 1386 * although for regular files and directories only 1387 * 1388 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1389 * out because we could corrupt the extent tree. 1390 */ 1391 if (!(ip->i_flag & IN_E4EXTENTS) && 1392 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1393 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1394 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1395 ip->i_db[i] = 0; 1396 } 1397 #ifdef EXT2FS_PRINT_EXTENTS 1398 ext2_print_inode(ip); 1399 ext4_ext_print_extent_tree_status(ip); 1400 #endif 1401 bqrelse(bp); 1402 1403 /* 1404 * Initialize the vnode from the inode, check for aliases. 1405 * Note that the underlying vnode may have changed. 1406 */ 1407 if ((error = ext2_vinit(mp, &vp)) != 0) { 1408 vx_put(vp); 1409 *vpp = NULL; 1410 return (error); 1411 } 1412 1413 /* 1414 * Finish inode initialization now that aliasing has been resolved. 1415 */ 1416 vref(ip->i_devvp); 1417 /* 1418 * Set up a generation number for this inode if it does not 1419 * already have one. This should only happen on old filesystems. 1420 */ 1421 if (ip->i_gen == 0) { 1422 ip->i_gen = krandom() / 2 + 1; 1423 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1424 ip->i_flag |= IN_MODIFIED; 1425 } 1426 /* 1427 * Return the locked and refd vnode. 1428 */ 1429 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 1430 *vpp = vp; 1431 1432 return (0); 1433 } 1434 1435 /* 1436 * File handle to vnode 1437 * 1438 * Have to be really careful about stale file handles: 1439 * - check that the inode number is valid 1440 * - call ext2_vget() to get the locked inode 1441 * - check for an unallocated inode (i_mode == 0) 1442 * - check that the given client host has export rights and return 1443 * those rights via. exflagsp and credanonp 1444 */ 1445 static int 1446 ext2_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 1447 struct vnode **vpp) 1448 { 1449 struct inode *ip; 1450 struct ufid *ufhp; 1451 struct vnode *nvp; 1452 struct m_ext2fs *fs; 1453 int error; 1454 1455 ufhp = (struct ufid *)fhp; 1456 fs = VFSTOEXT2(mp)->um_e2fs; 1457 if (ufhp->ufid_ino < EXT2_ROOTINO || 1458 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1459 return (ESTALE); 1460 1461 error = VFS_VGET(mp, NULL, LK_EXCLUSIVE, &nvp); 1462 if (error) { 1463 *vpp = NULLVP; 1464 return (error); 1465 } 1466 ip = VTOI(nvp); 1467 if (ip->i_mode == 0 || 1468 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1469 vput(nvp); 1470 *vpp = NULLVP; 1471 return (ESTALE); 1472 } 1473 *vpp = nvp; 1474 return (0); 1475 } 1476 1477 /* 1478 * Vnode pointer to File handle 1479 */ 1480 /* ARGSUSED */ 1481 static int 1482 ext2_vptofh(struct vnode *vp, struct fid *fhp) 1483 { 1484 struct inode *ip; 1485 struct ufid *ufhp; 1486 1487 ip = VTOI(vp); 1488 ufhp = (struct ufid *)fhp; 1489 ufhp->ufid_len = sizeof(struct ufid); 1490 ufhp->ufid_ino = ip->i_number; 1491 ufhp->ufid_gen = ip->i_gen; 1492 return (0); 1493 } 1494 1495 /* 1496 * This is the generic part of fhtovp called after the underlying 1497 * filesystem has validated the file handle. 1498 * 1499 * Verify that a host should have access to a filesystem. 1500 */ 1501 static int 1502 ext2_check_export(struct mount *mp, struct sockaddr *nam, int *exflagsp, 1503 struct ucred **credanonp) 1504 { 1505 struct netcred *np; 1506 struct ext2mount *ump; 1507 1508 ump = VFSTOEXT2(mp); 1509 /* 1510 * Get the export permission structure for this <mp, client> tuple. 1511 */ 1512 np = vfs_export_lookup(mp, &ump->um_export, nam); 1513 if (np == NULL) 1514 return (EACCES); 1515 1516 *exflagsp = np->netc_exflags; 1517 *credanonp = &np->netc_anon; 1518 return (0); 1519 } 1520 1521 /* 1522 * Write a superblock and associated information back to disk. 1523 */ 1524 static int 1525 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1526 { 1527 struct m_ext2fs *fs = mp->um_e2fs; 1528 struct ext2fs *es = fs->e2fs; 1529 struct buf *bp; 1530 int error = 0; 1531 1532 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1533 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1534 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1535 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1536 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1537 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1538 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1539 } 1540 1541 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1542 1543 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1544 ext2_sb_csum_set(fs); 1545 1546 bp = getblk(mp->um_devvp, SBOFF, SBSIZE, 0, 0); 1547 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1548 if (waitfor == MNT_WAIT) 1549 error = bwrite(bp); 1550 else 1551 bawrite(bp); 1552 1553 /* 1554 * The buffers for group descriptors, inode bitmaps and block bitmaps 1555 * are not busy at this point and are (hopefully) written by the 1556 * usual sync mechanism. No need to write them here. 1557 */ 1558 return (error); 1559 } 1560 1561 static int 1562 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1563 { 1564 struct m_ext2fs *fs = mp->um_e2fs; 1565 struct buf *bp; 1566 int i, j, g_count = 0, error = 0, allerror = 0; 1567 1568 allerror = ext2_sbupdate(mp, waitfor); 1569 1570 /* Update gd csums */ 1571 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1572 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1573 ext2_gd_csum_set(fs); 1574 1575 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1576 bp = getblk(mp->um_devvp, fsbtodoff(fs, 1577 ext2_cg_location(fs, i)), 1578 fs->e2fs_bsize, 0, 0); 1579 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1580 memcpy(bp->b_data, &fs->e2fs_gd[ 1581 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1582 fs->e2fs_bsize); 1583 } else { 1584 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1585 g_count < fs->e2fs_gcount; j++, g_count++) 1586 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1587 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1588 } 1589 if (waitfor == MNT_WAIT) 1590 error = bwrite(bp); 1591 else 1592 bawrite(bp); 1593 } 1594 1595 if (!allerror && error) 1596 allerror = error; 1597 return (allerror); 1598 } 1599 1600 /* 1601 * Return the root of a filesystem. 1602 */ 1603 static int 1604 ext2_root(struct mount *mp, struct vnode **vpp) 1605 { 1606 struct vnode *nvp; 1607 int error; 1608 1609 error = VFS_VGET(mp, NULL, (ino_t)EXT2_ROOTINO, &nvp); 1610 if (error) 1611 return (error); 1612 *vpp = nvp; 1613 return (0); 1614 } 1615 1616 /* 1617 * Initialize ext2 filesystems, done only once. 1618 */ 1619 static int 1620 ext2_init(struct vfsconf *vfsp) 1621 { 1622 static int done; 1623 1624 if (done) 1625 return (0); 1626 done = 1; 1627 ext2_ihashinit(); 1628 1629 return (0); 1630 } 1631 1632 static int 1633 ext2_uninit(struct vfsconf *vfsp) 1634 { 1635 1636 ext2_ihashuninit(); 1637 1638 return (0); 1639 } 1640