1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf2.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/stat.h> 56 #include <sys/mutex2.h> 57 #include <sys/nlookup.h> 58 59 #include <vfs/ext2fs/fs.h> 60 #include <vfs/ext2fs/ext2_mount.h> 61 #include <vfs/ext2fs/inode.h> 62 63 #include <vfs/ext2fs/ext2fs.h> 64 #include <vfs/ext2fs/ext2_dinode.h> 65 #include <vfs/ext2fs/ext2_extern.h> 66 #include <vfs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 79 static int ext2_flushfiles(struct mount *mp, int flags); 80 static int ext2_mountfs(struct vnode *, struct mount *); 81 static int ext2_reload(struct mount *mp); 82 static int ext2_sbupdate(struct ext2mount *, int); 83 static int ext2_cgupdate(struct ext2mount *, int); 84 static int ext2_init(struct vfsconf *); 85 static int ext2_uninit(struct vfsconf *); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_statvfs_t ext2_statvfs; 90 static vfs_sync_t ext2_sync; 91 static vfs_vget_t ext2_vget; 92 static vfs_fhtovp_t ext2_fhtovp; 93 static vfs_vptofh_t ext2_vptofh; 94 static vfs_checkexp_t ext2_check_export; 95 static vfs_mount_t ext2_mount; 96 97 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100 static struct vfsops ext2fs_vfsops = { 101 .vfs_flags = 0, 102 .vfs_mount = ext2_mount, 103 .vfs_unmount = ext2_unmount, 104 .vfs_root = ext2_root, /* root inode via vget */ 105 .vfs_statfs = ext2_statfs, 106 .vfs_statvfs = ext2_statvfs, 107 .vfs_sync = ext2_sync, 108 .vfs_vget = ext2_vget, 109 .vfs_fhtovp = ext2_fhtovp, 110 .vfs_vptofh = ext2_vptofh, 111 .vfs_checkexp = ext2_check_export, 112 .vfs_init = ext2_init, 113 .vfs_uninit = ext2_uninit 114 }; 115 116 VFS_SET(ext2fs_vfsops, ext2fs, 0); 117 MODULE_VERSION(ext2fs, 1); 118 119 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 120 int ronly); 121 static int ext2_compute_sb_data(struct vnode * devvp, 122 struct ext2fs * es, struct m_ext2fs * fs); 123 124 int ext2fs_inode_hash_lock; 125 126 /* 127 * VFS Operations. 128 * 129 * mount system call 130 */ 131 static int 132 ext2_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 133 { 134 struct ext2_args args; 135 struct vnode *devvp; 136 struct ext2mount *ump = NULL; 137 struct m_ext2fs *fs; 138 struct nlookupdata nd; 139 mode_t accmode; 140 int error, flags; 141 size_t size; 142 143 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args))) != 0) 144 return (error); 145 146 /* 147 * If updating, check whether changing from read-only to 148 * read/write; if there is no device name, that's all we do. 149 */ 150 if (mp->mnt_flag & MNT_UPDATE) { 151 ump = VFSTOEXT2(mp); 152 fs = ump->um_e2fs; 153 devvp = ump->um_devvp; 154 error = 0; 155 if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 156 error = VFS_SYNC(mp, MNT_WAIT); 157 if (error) 158 return (error); 159 flags = WRITECLOSE; 160 if (mp->mnt_flag & MNT_FORCE) 161 flags |= FORCECLOSE; 162 if (vfs_busy(mp, LK_NOWAIT)) 163 return (EBUSY); 164 error = ext2_flushfiles(mp, flags); 165 vfs_unbusy(mp); 166 if (error == 0 && fs->e2fs_wasvalid && 167 ext2_cgupdate(ump, MNT_WAIT) == 0) { 168 fs->e2fs->e2fs_state = 169 htole16((le16toh(fs->e2fs->e2fs_state) | 170 E2FS_ISCLEAN)); 171 ext2_sbupdate(ump, MNT_WAIT); 172 } 173 fs->e2fs_ronly = 1; 174 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 175 VOP_OPEN(devvp, FREAD, FSCRED, NULL); 176 VOP_CLOSE(devvp, FREAD | FWRITE, NULL); 177 vn_unlock(devvp); 178 } 179 if (!error && (mp->mnt_flag & MNT_RELOAD)) 180 error = ext2_reload(mp); 181 if (error) 182 return (error); 183 devvp = ump->um_devvp; 184 if (fs->e2fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 185 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 186 return (EPERM); 187 188 /* 189 * If upgrade to read-write by non-root, then verify 190 * that user has necessary permissions on the device. 191 */ 192 if (cred->cr_uid != 0) { 193 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 194 error = VOP_EACCESS(devvp, VREAD | VWRITE, cred); 195 if (error) { 196 vn_unlock(devvp); 197 return (error); 198 } 199 vn_unlock(devvp); 200 } 201 202 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 203 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 204 if (mp->mnt_flag & MNT_FORCE) { 205 printf( 206 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 207 } else { 208 printf( 209 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 210 fs->e2fs_fsmnt); 211 return (EPERM); 212 } 213 } 214 fs->e2fs->e2fs_state = 215 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 216 (void)ext2_cgupdate(ump, MNT_WAIT); 217 fs->e2fs_ronly = 0; 218 mp->mnt_flag &= ~MNT_RDONLY; 219 220 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 221 VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, NULL); 222 VOP_CLOSE(devvp, FREAD, NULL); 223 vn_unlock(devvp); 224 } 225 if (args.fspec == NULL) { 226 /* 227 * Process export requests. 228 */ 229 return (vfs_export(mp, &ump->um_export, &args.export)); 230 } 231 } 232 233 /* 234 * Not an update, or updating the name: look up the name 235 * and verify that it refers to a sensible disk device. 236 */ 237 devvp = NULL; 238 error = nlookup_init(&nd, args.fspec, UIO_USERSPACE, NLC_FOLLOW); 239 if (error == 0) 240 error = nlookup(&nd); 241 if (error == 0) 242 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 243 nlookup_done(&nd); 244 if (error) 245 return (error); 246 247 if (!vn_isdisk(devvp, &error)) { 248 vrele(devvp); 249 return (error); 250 } 251 252 /* 253 * If mount by non-root, then verify that user has necessary 254 * permissions on the device. 255 * 256 * XXXRW: VOP_ACCESS() enough? 257 */ 258 if (cred->cr_uid != 0) { 259 accmode = VREAD; 260 if ((mp->mnt_flag & MNT_RDONLY) == 0) 261 accmode |= VWRITE; 262 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 263 if ((error = VOP_EACCESS(devvp, accmode, cred)) != 0) { 264 vput(devvp); 265 return (error); 266 } 267 vn_unlock(devvp); 268 } 269 270 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 271 error = ext2_mountfs(devvp, mp); 272 } else { 273 if (devvp != ump->um_devvp) 274 error = EINVAL; /* needs translation */ 275 else 276 vrele(devvp); 277 } 278 if (error) { 279 vrele(devvp); 280 return (error); 281 } 282 ump = VFSTOEXT2(mp); 283 fs = ump->um_e2fs; 284 285 /* 286 * Note that this strncpy() is ok because of a check at the start 287 * of ext2_mount(). 288 */ 289 copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1, &size); 290 bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size); 291 copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 292 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 293 ext2_statfs(mp, &mp->mnt_stat, cred); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 401 } 402 if (b_bitmap <= last_cg_block) { 403 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 404 "block bitmap overlaps gds", i); 405 return (EINVAL); 406 } 407 if (b_bitmap < first_block || b_bitmap > last_block) { 408 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 409 "block bitmap not in group", i); 410 return (EINVAL); 411 } 412 413 i_bitmap = e2fs_gd_get_i_bitmap(gd); 414 if (i_bitmap == 0) { 415 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 416 "inode bitmap is zero", i); 417 return (EINVAL); 418 } 419 if (i_bitmap <= last_cg_block) { 420 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 421 "inode bitmap overlaps gds", i); 422 return (EINVAL); 423 } 424 if (i_bitmap < first_block || i_bitmap > last_block) { 425 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 426 "inode bitmap not in group blk", i); 427 return (EINVAL); 428 } 429 430 i_tables = e2fs_gd_get_i_tables(gd); 431 if (i_tables == 0) { 432 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 433 "inode table is zero", i); 434 return (EINVAL); 435 } 436 if (i_tables <= last_cg_block) { 437 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 438 "inode talbes overlaps gds", i); 439 return (EINVAL); 440 } 441 if (i_tables < first_block || 442 i_tables + fs->e2fs_itpg - 1 > last_block) { 443 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 444 "inode tables not in group blk", i); 445 return (EINVAL); 446 } 447 448 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 449 first_block += EXT2_BLOCKS_PER_GROUP(fs); 450 } 451 452 return (0); 453 } 454 455 /* 456 * This computes the fields of the m_ext2fs structure from the 457 * data in the ext2fs structure read in. 458 */ 459 static int 460 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 461 struct m_ext2fs *fs) 462 { 463 struct buf *bp; 464 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 465 int i, j; 466 int g_count = 0; 467 int error; 468 469 /* Check checksum features */ 470 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 471 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 472 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 473 "incorrect checksum features combination"); 474 return (EINVAL); 475 } 476 477 /* Precompute checksum seed for all metadata */ 478 ext2_sb_csum_set_seed(fs); 479 480 /* Verify sb csum if possible */ 481 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 482 error = ext2_sb_csum_verify(fs); 483 if (error) { 484 return (error); 485 } 486 } 487 488 /* Check for block size = 1K|2K|4K */ 489 if (le32toh(es->e2fs_log_bsize) > 2) { 490 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 491 "bad block size"); 492 return (EINVAL); 493 } 494 495 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 496 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 497 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 498 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 499 500 /* Check for fragment size */ 501 if (le32toh(es->e2fs_log_fsize) > 502 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 503 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 504 "invalid log cluster size"); 505 return (EINVAL); 506 } 507 508 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 509 if (fs->e2fs_fsize != fs->e2fs_bsize) { 510 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 511 "fragment size != block size"); 512 return (EINVAL); 513 } 514 515 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 516 517 /* Check reserved gdt blocks for future filesystem expansion */ 518 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 519 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 520 "number of reserved GDT blocks too large"); 521 return (EINVAL); 522 } 523 524 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 525 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 526 } else { 527 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 528 529 /* 530 * Check first ino. 531 */ 532 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 533 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 534 "invalid first ino"); 535 return (EINVAL); 536 } 537 538 /* 539 * Simple sanity check for superblock inode size value. 540 */ 541 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 542 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 543 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 544 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 545 "invalid inode size"); 546 return (EINVAL); 547 } 548 } 549 550 /* Check group descriptors */ 551 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 552 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 553 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 554 "unsupported 64bit descriptor size"); 555 return (EINVAL); 556 } 557 558 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 559 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 560 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 561 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 562 "zero blocks/fragments per group"); 563 return (EINVAL); 564 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 565 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 566 "blocks per group not equal fragments per group"); 567 return (EINVAL); 568 } 569 570 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 571 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 572 "non-standard group size unsupported"); 573 return (EINVAL); 574 } 575 576 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 577 if (fs->e2fs_ipb == 0 || 578 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 579 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 580 "bad inodes per block size"); 581 return (EINVAL); 582 } 583 584 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 585 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 586 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 587 "invalid inodes per group"); 588 return (EINVAL); 589 } 590 591 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 592 593 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 594 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 595 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 596 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 597 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 598 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 599 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 600 } 601 if (fs->e2fs_rbcount > fs->e2fs_bcount || 602 fs->e2fs_fbcount > fs->e2fs_bcount) { 603 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 604 "invalid block count"); 605 return (EINVAL); 606 } 607 608 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 609 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 610 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 611 "invalid number of free inodes"); 612 return (EINVAL); 613 } 614 615 if (le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 617 "first data block out of range"); 618 return (EINVAL); 619 } 620 621 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 622 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 623 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 624 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 625 "groups count too large"); 626 return (EINVAL); 627 } 628 629 /* Check for extra isize in big inodes. */ 630 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 631 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 632 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 633 "no space for extra inode timestamps"); 634 return (EINVAL); 635 } 636 637 /* s_resuid / s_resgid ? */ 638 639 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 640 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 642 } else { 643 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 644 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 645 fs->e2fs_bsize / sizeof(struct ext2_gd)); 646 } 647 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 648 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 649 M_EXT2MNT, M_WAITOK | M_ZERO); 650 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 651 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 652 653 for (i = 0; i < fs->e2fs_gdbcount; i++) { 654 error = ext2_bread(devvp, 655 fsbtodoff(fs, ext2_cg_location(fs, i)), 656 fs->e2fs_bsize, &bp); 657 if (error) { 658 /* 659 * fs->e2fs_gd and fs->e2fs_contigdirs 660 * will be freed later by the caller, 661 * because this function could be called from 662 * MNT_UPDATE path. 663 */ 664 return (error); 665 } 666 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 667 memcpy(&fs->e2fs_gd[ 668 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 669 bp->b_data, fs->e2fs_bsize); 670 } else { 671 for (j = 0; j < e2fs_descpb && 672 g_count < fs->e2fs_gcount; j++, g_count++) 673 memcpy(&fs->e2fs_gd[g_count], 674 bp->b_data + j * E2FS_REV0_GD_SIZE, 675 E2FS_REV0_GD_SIZE); 676 } 677 ext2_brelse(bp); 678 bp = NULL; 679 } 680 681 /* Validate cgs consistency */ 682 error = ext2_cg_validate(fs); 683 if (error) 684 return (error); 685 686 /* Verfy cgs csum */ 687 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 688 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 689 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 690 if (error) 691 return (error); 692 } 693 /* Initialization for the ext2 Orlov allocator variant. */ 694 fs->e2fs_total_dir = 0; 695 for (i = 0; i < fs->e2fs_gcount; i++) 696 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 697 698 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 699 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 700 fs->e2fs_maxfilesize = 0x7fffffff; 701 else { 702 fs->e2fs_maxfilesize = 0xffffffffffff; 703 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 704 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 705 } 706 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 707 fs->e2fs_uhash = 3; 708 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 709 #ifdef __CHAR_UNSIGNED__ 710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 711 fs->e2fs_uhash = 3; 712 #else 713 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 714 #endif 715 } 716 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 717 error = ext2_sb_csum_verify(fs); 718 719 return (error); 720 } 721 722 struct scaninfo { 723 int rescan; 724 int allerror; 725 int waitfor; 726 struct vnode *devvp; 727 struct m_ext2fs *fs; 728 }; 729 730 static int 731 ext2_reload_scan(struct mount *mp, struct vnode *vp, void *data) 732 { 733 struct scaninfo *info = data; 734 struct inode *ip; 735 struct buf *bp; 736 int error; 737 738 /* 739 * Try to recycle 740 */ 741 if (vrecycle(vp)) 742 return (0); 743 744 /* 745 * Step 1: invalidate all cached file data. 746 */ 747 if (vinvalbuf(vp, 0, 0, 0)) 748 panic("ext2_reload: dirty2"); 749 /* 750 * Step 2: re-read inode data for all active vnodes. 751 */ 752 ip = VTOI(vp); 753 error = ext2_bread(info->devvp, 754 fsbtodoff(info->fs, ino_to_fsba(info->fs, ip->i_number)), 755 (int)info->fs->e2fs_bsize, &bp); 756 if (error) 757 return (error); 758 759 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 760 EXT2_INODE_SIZE(info->fs) * ino_to_fsbo(info->fs, ip->i_number)), 761 ip); 762 763 ext2_brelse(bp); 764 return (error); 765 } 766 767 /* 768 * Reload all incore data for a filesystem (used after running fsck on 769 * the root filesystem and finding things to fix). The filesystem must 770 * be mounted read-only. 771 * 772 * Things to do to update the mount: 773 * 1) invalidate all cached meta-data. 774 * 2) re-read superblock from disk. 775 * 3) invalidate all cluster summary information. 776 * 4) invalidate all inactive vnodes. 777 * 5) invalidate all cached file data. 778 * 6) re-read inode data for all active vnodes. 779 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 780 */ 781 static int 782 ext2_reload(struct mount *mp) 783 { 784 struct vnode *devvp; 785 struct buf *bp; 786 struct ext2fs *es; 787 struct m_ext2fs *fs; 788 struct csum *sump; 789 struct scaninfo scaninfo; 790 int error, i; 791 int32_t *lp; 792 793 if ((mp->mnt_flag & MNT_RDONLY) == 0) 794 return (EINVAL); 795 /* 796 * Step 1: invalidate all cached meta-data. 797 */ 798 devvp = VFSTOEXT2(mp)->um_devvp; 799 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 800 if (vinvalbuf(devvp, 0, 0, 0) != 0) 801 panic("ext2_reload: dirty1"); 802 vn_unlock(devvp); 803 804 /* 805 * Step 2: re-read superblock from disk. 806 * constants have been adjusted for ext2 807 */ 808 if ((error = ext2_bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 809 return (error); 810 es = (struct ext2fs *)bp->b_data; 811 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 812 ext2_brelse(bp); 813 return (EIO); /* XXX needs translation */ 814 } 815 fs = VFSTOEXT2(mp)->um_e2fs; 816 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 817 818 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 819 ext2_brelse(bp); 820 return (error); 821 } 822 #ifdef UNKLAR 823 if (fs->fs_sbsize < SBSIZE) 824 bp->b_flags |= B_INVAL; 825 #endif 826 ext2_brelse(bp); 827 828 /* 829 * Step 3: invalidate all cluster summary information. 830 */ 831 if (fs->e2fs_contigsumsize > 0) { 832 lp = fs->e2fs_maxcluster; 833 sump = fs->e2fs_clustersum; 834 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 835 *lp++ = fs->e2fs_contigsumsize; 836 sump->cs_init = 0; 837 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 838 } 839 } 840 841 scaninfo.rescan = 1; 842 scaninfo.devvp = devvp; 843 scaninfo.fs = fs; 844 while (error == 0 && scaninfo.rescan) { 845 scaninfo.rescan = 0; 846 error = vmntvnodescan(mp, VMSC_GETVX, NULL, ext2_reload_scan, 847 &scaninfo); 848 } 849 return (error); 850 } 851 852 /* 853 * Common code for mount and mountroot. 854 */ 855 static int 856 ext2_mountfs(struct vnode *devvp, struct mount *mp) 857 { 858 struct ext2mount *ump; 859 struct buf *bp; 860 struct m_ext2fs *fs; 861 struct ext2fs *es; 862 struct cdev *dev = devvp->v_rdev; 863 struct csum *sump; 864 int error; 865 int ronly; 866 int i; 867 u_long size; 868 int32_t *lp; 869 int32_t e2fs_maxcontig; 870 871 /* 872 * Disallow multiple mounts of the same device. 873 * Disallow mounting of a device that is currently in use 874 * (except for root, which might share swap device for miniroot). 875 * Flush out any old buffers remaining from a previous use. 876 */ 877 if ((error = vfs_mountedon(devvp)) != 0) 878 return (error); 879 if (vcount(devvp) > 0) 880 return (EBUSY); 881 if ((error = vinvalbuf(devvp, V_SAVE, 0, 0)) != 0) 882 return (error); 883 #ifdef READONLY 884 /* Turn on this to force it to be read-only. */ 885 mp->mnt_flag |= MNT_RDONLY; 886 #endif 887 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 888 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 889 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, NULL); 890 vn_unlock(devvp); 891 if (error) 892 return (error); 893 894 if (devvp->v_rdev->si_iosize_max != 0) 895 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 896 if (mp->mnt_iosize_max > MAXPHYS) 897 mp->mnt_iosize_max = MAXPHYS; 898 899 bp = NULL; 900 ump = NULL; 901 if ((error = ext2_bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 902 goto out; 903 es = (struct ext2fs *)bp->b_data; 904 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 905 error = EINVAL; /* XXX needs translation */ 906 goto out; 907 } 908 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 909 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 910 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 911 printf( 912 "WARNING: Filesystem was not properly dismounted\n"); 913 } else { 914 printf( 915 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 916 error = EPERM; 917 goto out; 918 } 919 } 920 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 921 922 /* 923 * I don't know whether this is the right strategy. Note that 924 * we dynamically allocate both an m_ext2fs and an ext2fs 925 * while Linux keeps the super block in a locked buffer. 926 */ 927 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 928 M_EXT2MNT, M_WAITOK | M_ZERO); 929 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 930 M_EXT2MNT, M_WAITOK); 931 mtx_init(EXT2_MTX(ump), "EXT2FS Lock"); 932 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 933 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 934 goto out; 935 936 /* 937 * Calculate the maximum contiguous blocks and size of cluster summary 938 * array. In FFS this is done by newfs; however, the superblock 939 * in ext2fs doesn't have these variables, so we can calculate 940 * them here. 941 */ 942 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 943 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 944 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 945 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 946 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 947 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 948 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 949 lp = ump->um_e2fs->e2fs_maxcluster; 950 sump = ump->um_e2fs->e2fs_clustersum; 951 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 952 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 953 sump->cs_init = 0; 954 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 955 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 956 } 957 } 958 959 ext2_brelse(bp); 960 bp = NULL; 961 fs = ump->um_e2fs; 962 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 963 964 /* 965 * If the fs is not mounted read-only, make sure the super block is 966 * always written back on a sync(). 967 */ 968 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 969 if (ronly == 0) { 970 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 971 fs->e2fs->e2fs_state = 972 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 973 } 974 mp->mnt_data = (qaddr_t)ump; 975 mp->mnt_stat.f_fsid.val[0] = devid_from_dev(dev); 976 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 977 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 978 mp->mnt_flag |= MNT_LOCAL; 979 ump->um_mountp = mp; 980 ump->um_dev = dev; 981 ump->um_devvp = devvp; 982 983 /* 984 * Setting those two parameters allowed us to use 985 * ufs_bmap w/o changse! 986 */ 987 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 988 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 989 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 990 dev->si_mountpoint = mp; 991 992 vfs_add_vnodeops(mp, &ext2_vnodeops, &mp->mnt_vn_norm_ops); 993 vfs_add_vnodeops(mp, &ext2_specops, &mp->mnt_vn_spec_ops); 994 vfs_add_vnodeops(mp, &ext2_fifoops, &mp->mnt_vn_fifo_ops); 995 996 if (ronly == 0) 997 ext2_sbupdate(ump, MNT_WAIT); 998 return (0); 999 out: 1000 if (bp) 1001 ext2_brelse(bp); 1002 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1003 VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1004 vn_unlock(devvp); 1005 if (ump) { 1006 mtx_uninit(EXT2_MTX(ump)); 1007 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1008 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1009 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1010 free(ump->um_e2fs, M_EXT2MNT); 1011 free(ump, M_EXT2MNT); 1012 mp->mnt_data = NULL; 1013 } 1014 return (error); 1015 } 1016 1017 /* 1018 * Unmount system call. 1019 */ 1020 static int 1021 ext2_unmount(struct mount *mp, int mntflags) 1022 { 1023 struct ext2mount *ump; 1024 struct m_ext2fs *fs; 1025 struct csum *sump; 1026 int error, flags, i, ronly; 1027 1028 flags = 0; 1029 if (mntflags & MNT_FORCE) { 1030 if (mp->mnt_flag & MNT_ROOTFS) 1031 return (EINVAL); 1032 flags |= FORCECLOSE; 1033 } 1034 if ((error = ext2_flushfiles(mp, flags)) != 0) 1035 return (error); 1036 ump = VFSTOEXT2(mp); 1037 fs = ump->um_e2fs; 1038 ronly = fs->e2fs_ronly; 1039 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1040 if (fs->e2fs_wasvalid) 1041 fs->e2fs->e2fs_state = 1042 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1043 ext2_sbupdate(ump, MNT_WAIT); 1044 } 1045 1046 ump->um_devvp->v_rdev->si_mountpoint = NULL; 1047 1048 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1049 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1050 vn_unlock(ump->um_devvp); 1051 1052 vrele(ump->um_devvp); 1053 sump = fs->e2fs_clustersum; 1054 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1055 free(sump->cs_sum, M_EXT2MNT); 1056 free(fs->e2fs_clustersum, M_EXT2MNT); 1057 free(fs->e2fs_maxcluster, M_EXT2MNT); 1058 free(fs->e2fs_gd, M_EXT2MNT); 1059 free(fs->e2fs_contigdirs, M_EXT2MNT); 1060 free(fs->e2fs, M_EXT2MNT); 1061 free(fs, M_EXT2MNT); 1062 free(ump, M_EXT2MNT); 1063 mp->mnt_data = NULL; 1064 mp->mnt_flag &= ~MNT_LOCAL; 1065 return (error); 1066 } 1067 1068 /* 1069 * Flush out all the files in a filesystem. 1070 */ 1071 static int 1072 ext2_flushfiles(struct mount *mp, int flags) 1073 { 1074 int error; 1075 1076 error = vflush(mp, 0, flags); 1077 return (error); 1078 } 1079 1080 /* 1081 * Get filesystem statistics. 1082 */ 1083 static int 1084 ext2_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1085 { 1086 struct ext2mount *ump; 1087 struct m_ext2fs *fs; 1088 uint32_t overhead, overhead_per_group, ngdb; 1089 int i, ngroups; 1090 1091 ump = VFSTOEXT2(mp); 1092 fs = ump->um_e2fs; 1093 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1094 panic("ext2_statfs"); 1095 1096 /* 1097 * Compute the overhead (FS structures) 1098 */ 1099 overhead_per_group = 1100 1 /* block bitmap */ + 1101 1 /* inode bitmap */ + 1102 fs->e2fs_itpg; 1103 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1104 fs->e2fs_gcount * overhead_per_group; 1105 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1106 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1107 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1108 if (ext2_cg_has_sb(fs, i)) 1109 ngroups++; 1110 } 1111 } else { 1112 ngroups = fs->e2fs_gcount; 1113 } 1114 ngdb = fs->e2fs_gdbcount; 1115 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1116 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1117 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1118 overhead += ngroups * (1 /* superblock */ + ngdb); 1119 1120 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1121 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1122 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1123 sbp->f_blocks = fs->e2fs_bcount - overhead; 1124 sbp->f_bfree = fs->e2fs_fbcount; 1125 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1126 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1127 sbp->f_ffree = fs->e2fs_ficount; 1128 if (sbp != &mp->mnt_stat) { 1129 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 1130 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 1131 } 1132 return (0); 1133 } 1134 1135 static int 1136 ext2_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1137 { 1138 struct ext2mount *ump; 1139 struct m_ext2fs *fs; 1140 uint32_t overhead, overhead_per_group, ngdb; 1141 int i, ngroups; 1142 1143 ump = VFSTOEXT2(mp); 1144 fs = ump->um_e2fs; 1145 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1146 panic("ext2_statfs"); 1147 1148 /* 1149 * Compute the overhead (FS structures) 1150 */ 1151 overhead_per_group = 1152 1 /* block bitmap */ + 1153 1 /* inode bitmap */ + 1154 fs->e2fs_itpg; 1155 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1156 fs->e2fs_gcount * overhead_per_group; 1157 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1158 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1159 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1160 if (ext2_cg_has_sb(fs, i)) 1161 ngroups++; 1162 } 1163 } else { 1164 ngroups = fs->e2fs_gcount; 1165 } 1166 ngdb = fs->e2fs_gdbcount; 1167 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1168 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1169 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1170 overhead += ngroups * (1 /* superblock */ + ngdb); 1171 1172 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1173 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1174 sbp->f_frsize = EXT2_BLOCK_SIZE(fs); 1175 sbp->f_blocks = fs->e2fs_bcount - overhead; 1176 sbp->f_bfree = fs->e2fs_fbcount; 1177 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1178 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1179 sbp->f_ffree = fs->e2fs_ficount; 1180 return (0); 1181 } 1182 1183 static int 1184 ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) 1185 { 1186 struct scaninfo *info = data; 1187 struct inode *ip; 1188 int error; 1189 1190 ip = VTOI(vp); 1191 if (vp->v_type == VNON || 1192 ((ip->i_flag & 1193 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1194 (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { 1195 return (0); 1196 } 1197 if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) 1198 info->allerror = error; 1199 return (0); 1200 } 1201 1202 /* 1203 * Go through the disk queues to initiate sandbagged IO; 1204 * go through the inodes to write those that have been modified; 1205 * initiate the writing of the super block if it has been modified. 1206 * 1207 * Note: we are always called with the filesystem marked `MPBUSY'. 1208 */ 1209 static int 1210 ext2_sync(struct mount *mp, int waitfor) 1211 { 1212 struct ext2mount *ump = VFSTOEXT2(mp); 1213 struct m_ext2fs *fs; 1214 struct scaninfo scaninfo; 1215 int error; 1216 1217 fs = ump->um_e2fs; 1218 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1219 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1220 } 1221 1222 /* 1223 * Write back each (modified) inode. 1224 */ 1225 scaninfo.allerror = 0; 1226 scaninfo.rescan = 1; 1227 scaninfo.waitfor = waitfor; 1228 while (scaninfo.rescan) { 1229 scaninfo.rescan = 0; 1230 vmntvnodescan(mp, VMSC_GETVP | VMSC_NOWAIT, 1231 NULL, ext2_sync_scan, &scaninfo); 1232 } 1233 1234 /* 1235 * Force stale filesystem control information to be flushed. 1236 */ 1237 if ((waitfor & MNT_LAZY) == 0) { 1238 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1239 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) 1240 scaninfo.allerror = error; 1241 vn_unlock(ump->um_devvp); 1242 } 1243 1244 /* 1245 * Write back modified superblock. 1246 */ 1247 if (fs->e2fs_fmod != 0) { 1248 fs->e2fs_fmod = 0; 1249 fs->e2fs->e2fs_wtime = htole32(time_second); 1250 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1251 scaninfo.allerror = error; 1252 } 1253 return (scaninfo.allerror); 1254 } 1255 1256 /* 1257 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1258 * in from disk. If it is in core, wait for the lock bit to clear, then 1259 * return the inode locked. Detection and handling of mount points must be 1260 * done by the calling routine. 1261 */ 1262 static int 1263 ext2_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) 1264 { 1265 struct m_ext2fs *fs; 1266 struct inode *ip; 1267 struct ext2mount *ump; 1268 struct buf *bp; 1269 struct vnode *vp; 1270 cdev_t dev; 1271 unsigned int i, used_blocks; 1272 int error; 1273 1274 ump = VFSTOEXT2(mp); 1275 dev = ump->um_dev; 1276 restart: 1277 if ((*vpp = ext2_ihashget(dev, ino)) != NULL) 1278 return (0); 1279 1280 /* 1281 * Lock out the creation of new entries in the FFS hash table in 1282 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1283 * may occur! 1284 */ 1285 if (ext2fs_inode_hash_lock) { 1286 while (ext2fs_inode_hash_lock) { 1287 ext2fs_inode_hash_lock = -1; 1288 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 1289 } 1290 goto restart; 1291 } 1292 ext2fs_inode_hash_lock = 1; 1293 1294 /* 1295 * If this MALLOC() is performed after the getnewvnode() 1296 * it might block, leaving a vnode with a NULL v_data to be 1297 * found by ext2_sync() if a sync happens to fire right then, 1298 * which will cause a panic because ext2_sync() blindly 1299 * dereferences vp->v_data (as well it should). 1300 * 1301 * XXX this may no longer be true since getnewvnode returns a 1302 * VX locked vnode now. 1303 */ 1304 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1305 if (ip == NULL) { 1306 return (ENOMEM); 1307 } 1308 1309 /* Allocate a new vnode/inode. */ 1310 if ((error = getnewvnode(VT_EXT2FS, mp, &vp, VLKTIMEOUT, 1311 LK_CANRECURSE)) != 0) { 1312 if (ext2fs_inode_hash_lock < 0) 1313 wakeup(&ext2fs_inode_hash_lock); 1314 ext2fs_inode_hash_lock = 0; 1315 *vpp = NULL; 1316 free(ip, M_EXT2NODE); 1317 return (error); 1318 } 1319 vp->v_data = ip; 1320 ip->i_vnode = vp; 1321 ip->i_e2fs = fs = ump->um_e2fs; 1322 ip->i_dev = dev; 1323 ip->i_ump = ump; 1324 ip->i_number = ino; 1325 1326 /* 1327 * Put it onto its hash chain. Since our vnode is locked, other 1328 * requests for this inode will block if they arrive while we are 1329 * sleeping waiting for old data structures to be purged or for the 1330 * contents of the disk portion of this inode to be read. 1331 */ 1332 if (ext2_ihashins(ip)) { 1333 printf("debug: ext2fs ihashins collision, retrying inode %ld\n", 1334 (long)ip->i_number); 1335 *vpp = NULL; 1336 vp->v_type = VBAD; 1337 vx_put(vp); 1338 free(ip, M_EXT2NODE); 1339 goto restart; 1340 } 1341 1342 if (ext2fs_inode_hash_lock < 0) 1343 wakeup(&ext2fs_inode_hash_lock); 1344 ext2fs_inode_hash_lock = 0; 1345 1346 /* Read in the disk contents for the inode, copy into the inode. */ 1347 if ((error = ext2_bread(ump->um_devvp, fsbtodoff(fs, ino_to_fsba(fs, ino)), 1348 (int)fs->e2fs_bsize, &bp)) != 0) { 1349 /* 1350 * The inode does not contain anything useful, so it would 1351 * be misleading to leave it on its hash chain. With mode 1352 * still zero, it will be unlinked and returned to the free 1353 * list by vput(). 1354 */ 1355 vp->v_type = VBAD; 1356 ext2_brelse(bp); 1357 vx_put(vp); 1358 *vpp = NULL; 1359 return (error); 1360 } 1361 /* convert ext2 inode to dinode */ 1362 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1363 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1364 if (error) { 1365 ext2_brelse(bp); 1366 vx_put(vp); 1367 *vpp = NULL; 1368 return (error); 1369 } 1370 ip->i_block_group = ino_to_cg(fs, ino); 1371 ip->i_next_alloc_block = 0; 1372 ip->i_next_alloc_goal = 0; 1373 1374 /* 1375 * Now we want to make sure that block pointers for unused 1376 * blocks are zeroed out - ext2_balloc depends on this 1377 * although for regular files and directories only 1378 * 1379 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1380 * out because we could corrupt the extent tree. 1381 */ 1382 if (!(ip->i_flag & IN_E4EXTENTS) && 1383 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1384 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1385 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1386 ip->i_db[i] = 0; 1387 } 1388 #ifdef EXT2FS_PRINT_EXTENTS 1389 ext2_print_inode(ip); 1390 ext4_ext_print_extent_tree_status(ip); 1391 #endif 1392 ext2_bqrelse(bp); 1393 1394 /* 1395 * Initialize the vnode from the inode, check for aliases. 1396 * Note that the underlying vnode may have changed. 1397 */ 1398 if ((error = ext2_vinit(mp, &vp)) != 0) { 1399 vx_put(vp); 1400 *vpp = NULL; 1401 return (error); 1402 } 1403 1404 /* 1405 * Finish inode initialization now that aliasing has been resolved. 1406 */ 1407 ip->i_devvp = ump->um_devvp; 1408 vref(ip->i_devvp); 1409 /* 1410 * Set up a generation number for this inode if it does not 1411 * already have one. This should only happen on old filesystems. 1412 */ 1413 if (ip->i_gen == 0) { 1414 ip->i_gen = krandom() / 2 + 1; 1415 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1416 ip->i_flag |= IN_MODIFIED; 1417 } 1418 /* 1419 * Return the locked and refd vnode. 1420 */ 1421 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 1422 *vpp = vp; 1423 1424 return (0); 1425 } 1426 1427 /* 1428 * File handle to vnode 1429 * 1430 * Have to be really careful about stale file handles: 1431 * - check that the inode number is valid 1432 * - call ext2_vget() to get the locked inode 1433 * - check for an unallocated inode (i_mode == 0) 1434 * - check that the given client host has export rights and return 1435 * those rights via. exflagsp and credanonp 1436 */ 1437 static int 1438 ext2_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 1439 struct vnode **vpp) 1440 { 1441 struct inode *ip; 1442 struct ufid *ufhp; 1443 struct vnode *nvp; 1444 struct m_ext2fs *fs; 1445 int error; 1446 1447 ufhp = (struct ufid *)fhp; 1448 fs = VFSTOEXT2(mp)->um_e2fs; 1449 if (ufhp->ufid_ino < EXT2_ROOTINO || 1450 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1451 return (ESTALE); 1452 1453 error = VFS_VGET(mp, NULL, LK_EXCLUSIVE, &nvp); 1454 if (error) { 1455 *vpp = NULLVP; 1456 return (error); 1457 } 1458 ip = VTOI(nvp); 1459 if (ip->i_mode == 0 || 1460 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1461 vput(nvp); 1462 *vpp = NULLVP; 1463 return (ESTALE); 1464 } 1465 *vpp = nvp; 1466 return (0); 1467 } 1468 1469 /* 1470 * Vnode pointer to File handle 1471 */ 1472 /* ARGSUSED */ 1473 static int 1474 ext2_vptofh(struct vnode *vp, struct fid *fhp) 1475 { 1476 struct inode *ip; 1477 struct ufid *ufhp; 1478 1479 ip = VTOI(vp); 1480 ufhp = (struct ufid *)fhp; 1481 ufhp->ufid_len = sizeof(struct ufid); 1482 ufhp->ufid_ino = ip->i_number; 1483 ufhp->ufid_gen = ip->i_gen; 1484 return (0); 1485 } 1486 1487 /* 1488 * This is the generic part of fhtovp called after the underlying 1489 * filesystem has validated the file handle. 1490 * 1491 * Verify that a host should have access to a filesystem. 1492 */ 1493 static int 1494 ext2_check_export(struct mount *mp, struct sockaddr *nam, int *exflagsp, 1495 struct ucred **credanonp) 1496 { 1497 struct netcred *np; 1498 struct ext2mount *ump; 1499 1500 ump = VFSTOEXT2(mp); 1501 /* 1502 * Get the export permission structure for this <mp, client> tuple. 1503 */ 1504 np = vfs_export_lookup(mp, &ump->um_export, nam); 1505 if (np == NULL) 1506 return (EACCES); 1507 1508 *exflagsp = np->netc_exflags; 1509 *credanonp = &np->netc_anon; 1510 return (0); 1511 } 1512 1513 /* 1514 * Write a superblock and associated information back to disk. 1515 */ 1516 static int 1517 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1518 { 1519 struct m_ext2fs *fs = mp->um_e2fs; 1520 struct ext2fs *es = fs->e2fs; 1521 struct buf *bp; 1522 int error = 0; 1523 1524 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1525 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1526 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1527 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1528 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1529 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1530 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1531 } 1532 1533 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1534 1535 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1536 ext2_sb_csum_set(fs); 1537 1538 bp = getblk(mp->um_devvp, SBOFF, SBSIZE, 0, 0); 1539 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1540 if (waitfor == MNT_WAIT) 1541 error = bwrite(bp); 1542 else 1543 bawrite(bp); 1544 1545 /* 1546 * The buffers for group descriptors, inode bitmaps and block bitmaps 1547 * are not busy at this point and are (hopefully) written by the 1548 * usual sync mechanism. No need to write them here. 1549 */ 1550 return (error); 1551 } 1552 1553 static int 1554 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1555 { 1556 struct m_ext2fs *fs = mp->um_e2fs; 1557 struct buf *bp; 1558 int i, j, g_count = 0, error = 0, allerror = 0; 1559 1560 allerror = ext2_sbupdate(mp, waitfor); 1561 1562 /* Update gd csums */ 1563 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1564 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1565 ext2_gd_csum_set(fs); 1566 1567 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1568 bp = getblk(mp->um_devvp, fsbtodoff(fs, 1569 ext2_cg_location(fs, i)), 1570 fs->e2fs_bsize, 0, 0); 1571 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1572 memcpy(bp->b_data, &fs->e2fs_gd[ 1573 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1574 fs->e2fs_bsize); 1575 } else { 1576 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1577 g_count < fs->e2fs_gcount; j++, g_count++) 1578 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1579 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1580 } 1581 if (waitfor == MNT_WAIT) 1582 error = bwrite(bp); 1583 else 1584 bawrite(bp); 1585 } 1586 1587 if (!allerror && error) 1588 allerror = error; 1589 return (allerror); 1590 } 1591 1592 /* 1593 * Return the root of a filesystem. 1594 */ 1595 static int 1596 ext2_root(struct mount *mp, struct vnode **vpp) 1597 { 1598 struct vnode *nvp; 1599 int error; 1600 1601 error = VFS_VGET(mp, NULL, (ino_t)EXT2_ROOTINO, &nvp); 1602 if (error) 1603 return (error); 1604 *vpp = nvp; 1605 return (0); 1606 } 1607 1608 /* 1609 * Initialize ext2 filesystems, done only once. 1610 */ 1611 static int 1612 ext2_init(struct vfsconf *vfsp) 1613 { 1614 static int done; 1615 1616 if (done) 1617 return (0); 1618 done = 1; 1619 ext2_ihashinit(); 1620 1621 return (0); 1622 } 1623 1624 static int 1625 ext2_uninit(struct vfsconf *vfsp) 1626 { 1627 1628 ext2_ihashuninit(); 1629 1630 return (0); 1631 } 1632