1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/stat.h> 46 #include <sys/mount.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/buf2.h> 50 #include <sys/endian.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex2.h> 53 54 #include <vfs/ext2fs/fs.h> 55 #include <vfs/ext2fs/inode.h> 56 #include <vfs/ext2fs/ext2_mount.h> 57 #include <vfs/ext2fs/ext2fs.h> 58 #include <vfs/ext2fs/ext2_extern.h> 59 60 SDT_PROVIDER_DEFINE(ext2fs); 61 /* 62 * ext2fs trace probe: 63 * arg0: verbosity. Higher numbers give more verbose messages 64 * arg1: Textual message 65 */ 66 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 67 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 68 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 70 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 71 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 72 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 73 "int", "daddr_t", "char*"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 75 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 76 77 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 78 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 79 static u_long ext2_dirpref(struct inode *); 80 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 81 daddr_t (*)(struct inode *, int, daddr_t, 82 int)); 83 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 84 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 85 86 /* 87 * Allocate a block in the filesystem. 88 * 89 * A preference may be optionally specified. If a preference is given 90 * the following hierarchy is used to allocate a block: 91 * 1) allocate the requested block. 92 * 2) allocate a rotationally optimal block in the same cylinder. 93 * 3) allocate a block in the same cylinder group. 94 * 4) quadradically rehash into other cylinder groups, until an 95 * available block is located. 96 * If no block preference is given the following hierarchy is used 97 * to allocate a block: 98 * 1) allocate a block in the cylinder group that contains the 99 * inode for the file. 100 * 2) quadradically rehash into other cylinder groups, until an 101 * available block is located. 102 */ 103 int 104 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 105 struct ucred *cred, e4fs_daddr_t *bnp) 106 { 107 struct m_ext2fs *fs; 108 struct ext2mount *ump; 109 e4fs_daddr_t bno; 110 int cg; 111 112 *bnp = 0; 113 fs = ip->i_e2fs; 114 ump = ip->i_ump; 115 mtx_assert(EXT2_MTX(ump), MA_OWNED); 116 #ifdef INVARIANTS 117 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 118 printf("bsize = %lu, size = %d, fs = %s\n", 119 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 120 panic("ext2_alloc: bad size"); 121 } 122 if (cred == NOCRED) 123 panic("ext2_alloc: missing credential"); 124 #endif /* INVARIANTS */ 125 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 126 goto nospace; 127 if (cred->cr_uid != 0 && 128 fs->e2fs_fbcount < fs->e2fs_rbcount) 129 goto nospace; 130 if (bpref >= fs->e2fs_bcount) 131 bpref = 0; 132 if (bpref == 0) 133 cg = ino_to_cg(fs, ip->i_number); 134 else 135 cg = dtog(fs, bpref); 136 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 137 ext2_alloccg); 138 if (bno > 0) { 139 /* set next_alloc fields as done in block_getblk */ 140 ip->i_next_alloc_block = lbn; 141 ip->i_next_alloc_goal = bno; 142 143 ip->i_blocks += btodb(fs->e2fs_bsize); 144 ip->i_flag |= IN_CHANGE | IN_UPDATE; 145 *bnp = bno; 146 return (0); 147 } 148 nospace: 149 EXT2_UNLOCK(ump); 150 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 151 return (ENOSPC); 152 } 153 154 /* 155 * Allocate EA's block for inode. 156 */ 157 e4fs_daddr_t 158 ext2_alloc_meta(struct inode *ip) 159 { 160 struct m_ext2fs *fs; 161 daddr_t blk; 162 163 fs = ip->i_e2fs; 164 165 EXT2_LOCK(ip->i_ump); 166 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 167 ext2_alloccg); 168 if (0 == blk) { 169 EXT2_UNLOCK(ip->i_ump); 170 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 171 } 172 173 return (blk); 174 } 175 176 /* 177 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 178 * 179 * The vnode and an array of buffer pointers for a range of sequential 180 * logical blocks to be made contiguous is given. The allocator attempts 181 * to find a range of sequential blocks starting as close as possible to 182 * an fs_rotdelay offset from the end of the allocation for the logical 183 * block immediately preceding the current range. If successful, the 184 * physical block numbers in the buffer pointers and in the inode are 185 * changed to reflect the new allocation. If unsuccessful, the allocation 186 * is left unchanged. The success in doing the reallocation is returned. 187 * Note that the error return is not reflected back to the user. Rather 188 * the previous block allocation will be used. 189 */ 190 191 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 192 193 static int doasyncfree = 1; 194 195 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 196 "Use asychronous writes to update block pointers when freeing blocks"); 197 198 static int doreallocblks = 0; 199 200 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 201 202 int 203 ext2_reallocblks(struct vop_reallocblks_args *ap) 204 { 205 struct m_ext2fs *fs; 206 struct inode *ip; 207 struct vnode *vp; 208 struct buf *sbp, *ebp; 209 uint32_t *bap, *sbap, *ebap; 210 struct ext2mount *ump; 211 struct cluster_save *buflist; 212 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 213 e2fs_lbn_t start_lbn, end_lbn; 214 int soff; 215 e2fs_daddr_t newblk, blkno; 216 int i, len, start_lvl, end_lvl, pref, ssize; 217 218 if (doreallocblks == 0) 219 return (ENOSPC); 220 221 vp = ap->a_vp; 222 ip = VTOI(vp); 223 fs = ip->i_e2fs; 224 ump = ip->i_ump; 225 226 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 227 return (ENOSPC); 228 229 buflist = ap->a_buflist; 230 len = buflist->bs_nchildren; 231 start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset); 232 end_lbn = start_lbn + len - 1; 233 #ifdef INVARIANTS 234 for (i = 1; i < len; i++) 235 if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn + i)) 236 panic("ext2_reallocblks: non-cluster"); 237 #endif 238 /* 239 * If the cluster crosses the boundary for the first indirect 240 * block, leave space for the indirect block. Indirect blocks 241 * are initially laid out in a position after the last direct 242 * block. Block reallocation would usually destroy locality by 243 * moving the indirect block out of the way to make room for 244 * data blocks if we didn't compensate here. We should also do 245 * this for other indirect block boundaries, but it is only 246 * important for the first one. 247 */ 248 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 249 return (ENOSPC); 250 /* 251 * If the latest allocation is in a new cylinder group, assume that 252 * the filesystem has decided to move and do not force it back to 253 * the previous cylinder group. 254 */ 255 if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) != 256 dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset))) 257 return (ENOSPC); 258 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 259 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 260 return (ENOSPC); 261 /* 262 * Get the starting offset and block map for the first block. 263 */ 264 if (start_lvl == 0) { 265 sbap = &ip->i_db[0]; 266 soff = start_lbn; 267 } else { 268 idp = &start_ap[start_lvl - 1]; 269 if (ext2_bread(vp, lblktodoff(fs, idp->in_lbn), 270 (int)fs->e2fs_bsize, &sbp)) { 271 ext2_brelse(sbp); 272 return (ENOSPC); 273 } 274 sbap = (u_int *)sbp->b_data; 275 soff = idp->in_off; 276 } 277 /* 278 * If the block range spans two block maps, get the second map. 279 */ 280 ebap = NULL; 281 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 282 ssize = len; 283 } else { 284 #ifdef INVARIANTS 285 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 286 panic("ext2_reallocblks: start == end"); 287 #endif 288 ssize = len - (idp->in_off + 1); 289 if (ext2_bread(vp, lblktodoff(fs, idp->in_lbn), 290 (int)fs->e2fs_bsize, &ebp)) 291 goto fail; 292 ebap = (u_int *)ebp->b_data; 293 } 294 /* 295 * Find the preferred location for the cluster. 296 */ 297 EXT2_LOCK(ump); 298 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 299 /* 300 * Search the block map looking for an allocation of the desired size. 301 */ 302 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 303 len, ext2_clusteralloc)) == 0) { 304 EXT2_UNLOCK(ump); 305 goto fail; 306 } 307 /* 308 * We have found a new contiguous block. 309 * 310 * First we have to replace the old block pointers with the new 311 * block pointers in the inode and indirect blocks associated 312 * with the file. 313 */ 314 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 315 ip->i_number, start_lbn, end_lbn); 316 blkno = newblk; 317 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 318 if (i == ssize) { 319 bap = ebap; 320 soff = -i; 321 } 322 #ifdef INVARIANTS 323 if (buflist->bs_children[i]->b_bio2.bio_offset != 324 fsbtodoff(fs, *bap)) 325 panic("ext2_reallocblks: alloc mismatch"); 326 #endif 327 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 328 *bap++ = blkno; 329 } 330 /* 331 * Next we must write out the modified inode and indirect blocks. 332 * For strict correctness, the writes should be synchronous since 333 * the old block values may have been written to disk. In practise 334 * they are almost never written, but if we are concerned about 335 * strict correctness, the `doasyncfree' flag should be set to zero. 336 * 337 * The test on `doasyncfree' should be changed to test a flag 338 * that shows whether the associated buffers and inodes have 339 * been written. The flag should be set when the cluster is 340 * started and cleared whenever the buffer or inode is flushed. 341 * We can then check below to see if it is set, and do the 342 * synchronous write only when it has been cleared. 343 */ 344 if (sbap != &ip->i_db[0]) { 345 if (doasyncfree) 346 bdwrite(sbp); 347 else 348 bwrite(sbp); 349 } else { 350 ip->i_flag |= IN_CHANGE | IN_UPDATE; 351 if (!doasyncfree) 352 ext2_update(vp, 1); 353 } 354 if (ssize < len) { 355 if (doasyncfree) 356 bdwrite(ebp); 357 else 358 bwrite(ebp); 359 } 360 /* 361 * Last, free the old blocks and assign the new blocks to the buffers. 362 */ 363 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 364 ext2_blkfree(ip, dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), 365 fs->e2fs_bsize); 366 buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno); 367 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 368 } 369 370 return (0); 371 372 fail: 373 if (ssize < len) 374 ext2_brelse(ebp); 375 if (sbap != &ip->i_db[0]) 376 ext2_brelse(sbp); 377 return (ENOSPC); 378 } 379 380 /* 381 * Allocate an inode in the filesystem. 382 * 383 */ 384 int 385 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 386 { 387 struct timespec ts; 388 struct m_ext2fs *fs; 389 struct ext2mount *ump; 390 struct inode *pip; 391 struct inode *ip; 392 struct vnode *vp; 393 ino_t ino, ipref; 394 int error, cg; 395 396 *vpp = NULL; 397 pip = VTOI(pvp); 398 fs = pip->i_e2fs; 399 ump = pip->i_ump; 400 401 EXT2_LOCK(ump); 402 if (fs->e2fs_ficount == 0) 403 goto noinodes; 404 /* 405 * If it is a directory then obtain a cylinder group based on 406 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 407 * always the next inode. 408 */ 409 if ((mode & IFMT) == IFDIR) { 410 cg = ext2_dirpref(pip); 411 if (fs->e2fs_contigdirs[cg] < 255) 412 fs->e2fs_contigdirs[cg]++; 413 } else { 414 cg = ino_to_cg(fs, pip->i_number); 415 if (fs->e2fs_contigdirs[cg] > 0) 416 fs->e2fs_contigdirs[cg]--; 417 } 418 ipref = cg * fs->e2fs_ipg + 1; 419 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 420 if (ino == 0) 421 goto noinodes; 422 restart: 423 if ((vp = ext2_ihashget(ump->um_dev, ino)) != NULL) { 424 printf("ext2_valloc: vp %p exists for inode %lu\n", vp, ino); 425 return (EEXIST); 426 } 427 428 /* 429 * Lock out the creation of new entries in the FFS hash table in 430 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 431 * may occur! 432 */ 433 if (ext2fs_inode_hash_lock) { 434 while (ext2fs_inode_hash_lock) { 435 ext2fs_inode_hash_lock = -1; 436 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 437 } 438 goto restart; 439 } 440 ext2fs_inode_hash_lock = 1; 441 442 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 443 if (ip == NULL) { 444 return (ENOMEM); 445 } 446 447 /* Allocate a new vnode/inode. */ 448 if ((error = getnewvnode(VT_EXT2FS, ump->um_mountp, &vp, VLKTIMEOUT, 449 LK_CANRECURSE)) != 0) { 450 if (ext2fs_inode_hash_lock < 0) 451 wakeup(&ext2fs_inode_hash_lock); 452 ext2fs_inode_hash_lock = 0; 453 *vpp = NULL; 454 free(ip, M_EXT2NODE); 455 return (error); 456 } 457 //lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 458 vp->v_data = ip; 459 ip->i_vnode = vp; 460 ip->i_e2fs = fs = ump->um_e2fs; 461 ip->i_dev = ump->um_dev; 462 ip->i_ump = ump; 463 ip->i_number = ino; 464 ip->i_block_group = ino_to_cg(fs, ino); 465 ip->i_next_alloc_block = 0; 466 ip->i_next_alloc_goal = 0; 467 468 /* 469 * Put it onto its hash chain. Since our vnode is locked, other 470 * requests for this inode will block if they arrive while we are 471 * sleeping waiting for old data structures to be purged or for the 472 * contents of the disk portion of this inode to be read. 473 */ 474 if ((error = ext2_ihashins(ip)) != 0) { 475 printf("ext2_valloc: ihashins collision, retrying inode %ld\n", 476 (long)ip->i_number); 477 *vpp = NULL; 478 vp->v_type = VBAD; 479 vx_put(vp); 480 free(ip, M_EXT2NODE); 481 goto restart; 482 } 483 484 if (ext2fs_inode_hash_lock < 0) 485 wakeup(&ext2fs_inode_hash_lock); 486 ext2fs_inode_hash_lock = 0; 487 488 if ((error = ext2_vinit(vp->v_mount, &vp)) != 0) { 489 *vpp = NULL; 490 vp->v_type = VBAD; 491 vx_put(vp); 492 return (error); 493 } 494 495 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 496 && (S_ISREG(mode) || S_ISDIR(mode))) 497 ext4_ext_tree_init(ip); 498 else 499 memset(ip->i_data, 0, sizeof(ip->i_data)); 500 501 /* 502 * Set up a new generation number for this inode. 503 * Avoid zero values. 504 */ 505 do { 506 ip->i_gen = karc4random(); 507 } while (ip->i_gen == 0); 508 509 vfs_timestamp(&ts); 510 ip->i_birthtime = ts.tv_sec; 511 ip->i_birthnsec = ts.tv_nsec; 512 513 /* 514 * Finish inode initialization now that aliasing has been resolved. 515 */ 516 vref(ip->i_devvp); 517 /* 518 * Return the locked and refd vnode. 519 */ 520 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 521 *vpp = vp; 522 523 return (0); 524 525 noinodes: 526 EXT2_UNLOCK(ump); 527 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 528 return (ENOSPC); 529 } 530 531 /* 532 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 533 */ 534 uint64_t 535 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 536 { 537 538 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 539 le32toh(gd->ext2bgd_b_bitmap)); 540 } 541 542 uint64_t 543 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 544 { 545 546 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 547 le32toh(gd->ext2bgd_i_bitmap)); 548 } 549 550 uint64_t 551 e2fs_gd_get_i_tables(struct ext2_gd *gd) 552 { 553 554 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 555 le32toh(gd->ext2bgd_i_tables)); 556 } 557 558 static uint32_t 559 e2fs_gd_get_nbfree(struct ext2_gd *gd) 560 { 561 562 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 563 le16toh(gd->ext2bgd_nbfree)); 564 } 565 566 static void 567 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 568 { 569 570 gd->ext2bgd_nbfree = htole16(val & 0xffff); 571 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 572 } 573 574 static uint32_t 575 e2fs_gd_get_nifree(struct ext2_gd *gd) 576 { 577 578 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 579 le16toh(gd->ext2bgd_nifree)); 580 } 581 582 static void 583 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 584 { 585 586 gd->ext2bgd_nifree = htole16(val & 0xffff); 587 gd->ext4bgd_nifree_hi = htole16(val >> 16); 588 } 589 590 uint32_t 591 e2fs_gd_get_ndirs(struct ext2_gd *gd) 592 { 593 594 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 595 le16toh(gd->ext2bgd_ndirs)); 596 } 597 598 static void 599 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 600 { 601 602 gd->ext2bgd_ndirs = htole16(val & 0xffff); 603 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 604 } 605 606 static uint32_t 607 e2fs_gd_get_i_unused(struct ext2_gd *gd) 608 { 609 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 610 le16toh(gd->ext4bgd_i_unused)); 611 } 612 613 static void 614 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 615 { 616 617 gd->ext4bgd_i_unused = htole16(val & 0xffff); 618 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 619 } 620 621 /* 622 * Find a cylinder to place a directory. 623 * 624 * The policy implemented by this algorithm is to allocate a 625 * directory inode in the same cylinder group as its parent 626 * directory, but also to reserve space for its files inodes 627 * and data. Restrict the number of directories which may be 628 * allocated one after another in the same cylinder group 629 * without intervening allocation of files. 630 * 631 * If we allocate a first level directory then force allocation 632 * in another cylinder group. 633 * 634 */ 635 static u_long 636 ext2_dirpref(struct inode *pip) 637 { 638 struct m_ext2fs *fs; 639 int cg, prefcg, cgsize; 640 uint64_t avgbfree, minbfree; 641 u_int avgifree, avgndir, curdirsize; 642 u_int minifree, maxndir; 643 u_int mincg, minndir; 644 u_int dirsize, maxcontigdirs; 645 646 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 647 fs = pip->i_e2fs; 648 649 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 650 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 651 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 652 653 /* 654 * Force allocation in another cg if creating a first level dir. 655 */ 656 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 657 if (ITOV(pip)->v_flag & VROOT) { 658 prefcg = karc4random() % fs->e2fs_gcount; 659 mincg = prefcg; 660 minndir = fs->e2fs_ipg; 661 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 662 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 663 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 664 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 665 mincg = cg; 666 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 667 } 668 for (cg = 0; cg < prefcg; cg++) 669 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 670 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 671 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 672 mincg = cg; 673 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 674 } 675 return (mincg); 676 } 677 /* 678 * Count various limits which used for 679 * optimal allocation of a directory inode. 680 */ 681 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 682 minifree = avgifree - avgifree / 4; 683 if (minifree < 1) 684 minifree = 1; 685 minbfree = avgbfree - avgbfree / 4; 686 if (minbfree < 1) 687 minbfree = 1; 688 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 689 dirsize = AVGDIRSIZE; 690 curdirsize = avgndir ? 691 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 692 if (dirsize < curdirsize) 693 dirsize = curdirsize; 694 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 695 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 696 if (maxcontigdirs == 0) 697 maxcontigdirs = 1; 698 699 /* 700 * Limit number of dirs in one cg and reserve space for 701 * regular files, but only if we have no deficit in 702 * inodes or space. 703 */ 704 prefcg = ino_to_cg(fs, pip->i_number); 705 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 706 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 707 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 708 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 709 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 710 return (cg); 711 } 712 for (cg = 0; cg < prefcg; cg++) 713 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 714 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 715 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 716 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 717 return (cg); 718 } 719 /* 720 * This is a backstop when we have deficit in space. 721 */ 722 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 723 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 724 return (cg); 725 for (cg = 0; cg < prefcg; cg++) 726 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 727 break; 728 return (cg); 729 } 730 731 /* 732 * Select the desired position for the next block in a file. 733 * 734 * we try to mimic what Remy does in inode_getblk/block_getblk 735 * 736 * we note: blocknr == 0 means that we're about to allocate either 737 * a direct block or a pointer block at the first level of indirection 738 * (In other words, stuff that will go in i_db[] or i_ib[]) 739 * 740 * blocknr != 0 means that we're allocating a block that is none 741 * of the above. Then, blocknr tells us the number of the block 742 * that will hold the pointer 743 */ 744 e4fs_daddr_t 745 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 746 e2fs_daddr_t blocknr) 747 { 748 struct m_ext2fs *fs; 749 int tmp; 750 751 fs = ip->i_e2fs; 752 753 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 754 755 /* 756 * If the next block is actually what we thought it is, then set the 757 * goal to what we thought it should be. 758 */ 759 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 760 return ip->i_next_alloc_goal; 761 762 /* 763 * Now check whether we were provided with an array that basically 764 * tells us previous blocks to which we want to stay close. 765 */ 766 if (bap) 767 for (tmp = indx - 1; tmp >= 0; tmp--) 768 if (bap[tmp]) 769 return (le32toh(bap[tmp])); 770 771 /* 772 * Else lets fall back to the blocknr or, if there is none, follow 773 * the rule that a block should be allocated near its inode. 774 */ 775 return (blocknr ? blocknr : 776 (e2fs_daddr_t)(ip->i_block_group * 777 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 778 } 779 780 /* 781 * Implement the cylinder overflow algorithm. 782 * 783 * The policy implemented by this algorithm is: 784 * 1) allocate the block in its requested cylinder group. 785 * 2) quadradically rehash on the cylinder group number. 786 * 3) brute force search for a free block. 787 */ 788 static e4fs_daddr_t 789 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 790 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 791 { 792 struct m_ext2fs *fs; 793 e4fs_daddr_t result; 794 int i, icg = cg; 795 796 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 797 fs = ip->i_e2fs; 798 /* 799 * 1: preferred cylinder group 800 */ 801 result = (*allocator)(ip, cg, pref, size); 802 if (result) 803 return (result); 804 /* 805 * 2: quadratic rehash 806 */ 807 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 808 cg += i; 809 if (cg >= fs->e2fs_gcount) 810 cg -= fs->e2fs_gcount; 811 result = (*allocator)(ip, cg, 0, size); 812 if (result) 813 return (result); 814 } 815 /* 816 * 3: brute force search 817 * Note that we start at i == 2, since 0 was checked initially, 818 * and 1 is always checked in the quadratic rehash. 819 */ 820 cg = (icg + 2) % fs->e2fs_gcount; 821 for (i = 2; i < fs->e2fs_gcount; i++) { 822 result = (*allocator)(ip, cg, 0, size); 823 if (result) 824 return (result); 825 cg++; 826 if (cg == fs->e2fs_gcount) 827 cg = 0; 828 } 829 return (0); 830 } 831 832 static uint64_t 833 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 834 { 835 836 if (!ext2_cg_has_sb(fs, cg)) 837 return (0); 838 839 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 840 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 841 842 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 843 EXT2_DESCS_PER_BLOCK(fs)); 844 } 845 846 static uint64_t 847 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 848 { 849 unsigned long metagroup; 850 int first, last; 851 852 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 853 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 854 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 855 856 if (cg == first || cg == first + 1 || cg == last) 857 return (1); 858 859 return (0); 860 } 861 862 uint64_t 863 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 864 { 865 unsigned long first_meta_bg, metagroup; 866 867 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 868 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 869 870 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 871 metagroup < first_meta_bg) 872 return (ext2_cg_number_gdb_nometa(fs, cg)); 873 874 return ext2_cg_number_gdb_meta(fs, cg); 875 } 876 877 static int 878 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 879 { 880 int number; 881 882 number = ext2_cg_has_sb(fs, cg); 883 884 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 885 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 886 EXT2_DESCS_PER_BLOCK(fs)) { 887 if (number) { 888 number += ext2_cg_number_gdb(fs, cg); 889 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 890 } 891 } else { 892 number += ext2_cg_number_gdb(fs, cg); 893 } 894 895 return (number); 896 } 897 898 static void 899 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 900 { 901 int i; 902 903 if (start_bit >= end_bit) 904 return; 905 906 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 907 setbit(bitmap, i); 908 if (i < end_bit) 909 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 910 } 911 912 static int 913 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 914 { 915 916 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 917 fs->e2fs_bsize); 918 } 919 920 static int 921 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 922 { 923 924 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 925 } 926 927 static int 928 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 929 { 930 int bit, bit_max, inodes_per_block; 931 uint64_t start, tmp; 932 933 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 934 return (0); 935 936 memset(bp->b_data, 0, fs->e2fs_bsize); 937 938 bit_max = ext2_number_base_meta_blocks(fs, cg); 939 if ((bit_max >> 3) >= fs->e2fs_bsize) 940 return (EINVAL); 941 942 for (bit = 0; bit < bit_max; bit++) 943 setbit(bp->b_data, bit); 944 945 start = (uint64_t)cg * fs->e2fs_bpg + 946 le32toh(fs->e2fs->e2fs_first_dblock); 947 948 /* Set bits for block and inode bitmaps, and inode table. */ 949 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 950 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 951 ext2_block_in_group(fs, tmp, cg)) 952 setbit(bp->b_data, tmp - start); 953 954 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 955 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 956 ext2_block_in_group(fs, tmp, cg)) 957 setbit(bp->b_data, tmp - start); 958 959 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 960 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 961 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 962 fs->e2fs_ipg / inodes_per_block ) { 963 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 964 ext2_block_in_group(fs, tmp, cg)) 965 setbit(bp->b_data, tmp - start); 966 tmp++; 967 } 968 969 /* 970 * Also if the number of blocks within the group is less than 971 * the blocksize * 8 ( which is the size of bitmap ), set rest 972 * of the block bitmap to 1 973 */ 974 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 975 bp->b_data); 976 977 /* Clean the flag */ 978 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 979 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 980 981 return (0); 982 } 983 984 static int 985 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 986 { 987 struct ext2_gd *gd; 988 uint64_t group_first_block; 989 unsigned int offset, max_bit; 990 991 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 992 /* 993 * It is not possible to check block bitmap in case of this 994 * feature, because the inode and block bitmaps and inode table 995 * blocks may not be in the group at all. 996 * So, skip check in this case. 997 */ 998 return (0); 999 } 1000 1001 gd = &fs->e2fs_gd[cg]; 1002 max_bit = fs->e2fs_fpg; 1003 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 1004 le32toh(fs->e2fs->e2fs_first_dblock); 1005 1006 /* Check block bitmap block number */ 1007 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 1008 if (offset >= max_bit || !isset(bp->b_data, offset)) { 1009 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 1010 "bad block bitmap, group", cg); 1011 return (EINVAL); 1012 } 1013 1014 /* Check inode bitmap block number */ 1015 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 1016 if (offset >= max_bit || !isset(bp->b_data, offset)) { 1017 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 1018 "bad inode bitmap", cg); 1019 return (EINVAL); 1020 } 1021 1022 /* Check inode table */ 1023 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 1024 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 1025 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 1026 "bad inode table, group", cg); 1027 return (EINVAL); 1028 } 1029 1030 return (0); 1031 } 1032 1033 /* 1034 * Determine whether a block can be allocated. 1035 * 1036 * Check to see if a block of the appropriate size is available, 1037 * and if it is, allocate it. 1038 */ 1039 static daddr_t 1040 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1041 { 1042 struct m_ext2fs *fs; 1043 struct buf *bp; 1044 struct ext2mount *ump; 1045 daddr_t bno, runstart, runlen; 1046 int bit, loc, end, error, start; 1047 char *bbp; 1048 /* XXX ondisk32 */ 1049 fs = ip->i_e2fs; 1050 ump = ip->i_ump; 1051 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1052 return (0); 1053 1054 EXT2_UNLOCK(ump); 1055 error = ext2_bread(ip->i_devvp, fsbtodoff(fs, 1056 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1057 (int)fs->e2fs_bsize, &bp); 1058 if (error) 1059 goto fail; 1060 1061 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1062 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1063 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1064 if (error) 1065 goto fail; 1066 1067 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1068 } 1069 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1070 if (error) 1071 goto fail; 1072 1073 error = ext2_b_bitmap_validate(fs,bp, cg); 1074 if (error) 1075 goto fail; 1076 1077 /* 1078 * Check, that another thread did not not allocate the last block in 1079 * this group while we were waiting for the buffer. 1080 */ 1081 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1082 goto fail; 1083 1084 bbp = (char *)bp->b_data; 1085 1086 if (dtog(fs, bpref) != cg) 1087 bpref = 0; 1088 if (bpref != 0) { 1089 bpref = dtogd(fs, bpref); 1090 /* 1091 * if the requested block is available, use it 1092 */ 1093 if (isclr(bbp, bpref)) { 1094 bno = bpref; 1095 goto gotit; 1096 } 1097 } 1098 /* 1099 * no blocks in the requested cylinder, so take next 1100 * available one in this cylinder group. 1101 * first try to get 8 contigous blocks, then fall back to a single 1102 * block. 1103 */ 1104 if (bpref) 1105 start = dtogd(fs, bpref) / NBBY; 1106 else 1107 start = 0; 1108 end = howmany(fs->e2fs_fpg, NBBY) - start; 1109 retry: 1110 runlen = 0; 1111 runstart = 0; 1112 for (loc = start; loc < end; loc++) { 1113 if (bbp[loc] == (char)0xff) { 1114 runlen = 0; 1115 continue; 1116 } 1117 1118 /* Start of a run, find the number of high clear bits. */ 1119 if (runlen == 0) { 1120 bit = fls(bbp[loc]); 1121 runlen = NBBY - bit; 1122 runstart = loc * NBBY + bit; 1123 } else if (bbp[loc] == 0) { 1124 /* Continue a run. */ 1125 runlen += NBBY; 1126 } else { 1127 /* 1128 * Finish the current run. If it isn't long 1129 * enough, start a new one. 1130 */ 1131 bit = ffs(bbp[loc]) - 1; 1132 runlen += bit; 1133 if (runlen >= 8) { 1134 bno = runstart; 1135 goto gotit; 1136 } 1137 1138 /* Run was too short, start a new one. */ 1139 bit = fls(bbp[loc]); 1140 runlen = NBBY - bit; 1141 runstart = loc * NBBY + bit; 1142 } 1143 1144 /* If the current run is long enough, use it. */ 1145 if (runlen >= 8) { 1146 bno = runstart; 1147 goto gotit; 1148 } 1149 } 1150 if (start != 0) { 1151 end = start; 1152 start = 0; 1153 goto retry; 1154 } 1155 bno = ext2_mapsearch(fs, bbp, bpref); 1156 if (bno < 0) 1157 goto fail; 1158 1159 gotit: 1160 #ifdef INVARIANTS 1161 if (isset(bbp, bno)) { 1162 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1163 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1164 panic("ext2fs_alloccg: dup alloc"); 1165 } 1166 #endif 1167 setbit(bbp, bno); 1168 EXT2_LOCK(ump); 1169 ext2_clusteracct(fs, bbp, cg, bno, -1); 1170 fs->e2fs_fbcount--; 1171 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1172 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1173 fs->e2fs_fmod = 1; 1174 EXT2_UNLOCK(ump); 1175 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1176 bdwrite(bp); 1177 return (((uint64_t)cg) * fs->e2fs_fpg + 1178 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1179 1180 fail: 1181 ext2_brelse(bp); 1182 EXT2_LOCK(ump); 1183 return (0); 1184 } 1185 1186 /* 1187 * Determine whether a cluster can be allocated. 1188 */ 1189 static daddr_t 1190 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1191 { 1192 struct m_ext2fs *fs; 1193 struct ext2mount *ump; 1194 struct buf *bp; 1195 char *bbp; 1196 int bit, error, got, i, loc, run; 1197 int32_t *lp; 1198 daddr_t bno; 1199 1200 fs = ip->i_e2fs; 1201 ump = ip->i_ump; 1202 1203 if (fs->e2fs_maxcluster[cg] < len) 1204 return (0); 1205 1206 EXT2_UNLOCK(ump); 1207 error = ext2_bread(ip->i_devvp, 1208 fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1209 (int)fs->e2fs_bsize, &bp); 1210 if (error) 1211 goto fail_lock; 1212 1213 bbp = (char *)bp->b_data; 1214 EXT2_LOCK(ump); 1215 /* 1216 * Check to see if a cluster of the needed size (or bigger) is 1217 * available in this cylinder group. 1218 */ 1219 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1220 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1221 if (*lp++ > 0) 1222 break; 1223 if (i > fs->e2fs_contigsumsize) { 1224 /* 1225 * Update the cluster summary information to reflect 1226 * the true maximum-sized cluster so that future cluster 1227 * allocation requests can avoid reading the bitmap only 1228 * to find no cluster. 1229 */ 1230 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1231 for (i = len - 1; i > 0; i--) 1232 if (*lp-- > 0) 1233 break; 1234 fs->e2fs_maxcluster[cg] = i; 1235 goto fail; 1236 } 1237 EXT2_UNLOCK(ump); 1238 1239 /* Search the bitmap to find a big enough cluster like in FFS. */ 1240 if (dtog(fs, bpref) != cg) 1241 bpref = 0; 1242 if (bpref != 0) 1243 bpref = dtogd(fs, bpref); 1244 loc = bpref / NBBY; 1245 bit = 1 << (bpref % NBBY); 1246 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1247 if ((bbp[loc] & bit) != 0) 1248 run = 0; 1249 else { 1250 run++; 1251 if (run == len) 1252 break; 1253 } 1254 if ((got & (NBBY - 1)) != (NBBY - 1)) 1255 bit <<= 1; 1256 else { 1257 loc++; 1258 bit = 1; 1259 } 1260 } 1261 1262 if (got >= fs->e2fs_fpg) 1263 goto fail_lock; 1264 1265 /* Allocate the cluster that we found. */ 1266 for (i = 1; i < len; i++) 1267 if (!isclr(bbp, got - run + i)) 1268 panic("ext2_clusteralloc: map mismatch"); 1269 1270 bno = got - run + 1; 1271 if (bno >= fs->e2fs_fpg) 1272 panic("ext2_clusteralloc: allocated out of group"); 1273 1274 EXT2_LOCK(ump); 1275 for (i = 0; i < len; i += fs->e2fs_fpb) { 1276 setbit(bbp, bno + i); 1277 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1278 fs->e2fs_fbcount--; 1279 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1280 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1281 } 1282 fs->e2fs_fmod = 1; 1283 EXT2_UNLOCK(ump); 1284 1285 bdwrite(bp); 1286 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1287 + bno); 1288 1289 fail_lock: 1290 EXT2_LOCK(ump); 1291 fail: 1292 ext2_brelse(bp); 1293 return (0); 1294 } 1295 1296 static int 1297 ext2_zero_inode_table(struct inode *ip, int cg) 1298 { 1299 struct m_ext2fs *fs; 1300 struct buf *bp; 1301 int i, all_blks, used_blks; 1302 1303 fs = ip->i_e2fs; 1304 1305 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1306 return (0); 1307 1308 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1309 fs->e2fs_bsize; 1310 1311 used_blks = howmany(fs->e2fs_ipg - 1312 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1313 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1314 1315 for (i = 0; i < all_blks - used_blks; i++) { 1316 bp = getblk(ip->i_devvp, fsbtodoff(fs, 1317 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1318 fs->e2fs_bsize, 0, 0); 1319 if (!bp) 1320 return (EIO); 1321 1322 vfs_bio_clrbuf(bp); 1323 bawrite(bp); 1324 } 1325 1326 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1327 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1328 1329 return (0); 1330 } 1331 1332 static void 1333 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1334 { 1335 int i; 1336 1337 for (i = first; i <= last; i++) 1338 bitmap[i] = 0xff; 1339 } 1340 1341 1342 /* 1343 * Determine whether an inode can be allocated. 1344 * 1345 * Check to see if an inode is available, and if it is, 1346 * allocate it using tode in the specified cylinder group. 1347 */ 1348 static daddr_t 1349 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1350 { 1351 struct m_ext2fs *fs; 1352 struct buf *bp; 1353 struct ext2mount *ump; 1354 int error, start, len, ifree, ibytes; 1355 char *ibp, *loc; 1356 1357 ipref--; /* to avoid a lot of (ipref -1) */ 1358 if (ipref == -1) 1359 ipref = 0; 1360 fs = ip->i_e2fs; 1361 ump = ip->i_ump; 1362 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1363 return (0); 1364 EXT2_UNLOCK(ump); 1365 error = ext2_bread(ip->i_devvp, fsbtodoff(fs, 1366 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1367 (int)fs->e2fs_bsize, &bp); 1368 if (error) { 1369 EXT2_LOCK(ump); 1370 return (0); 1371 } 1372 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1373 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1374 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1375 EXT2_BG_INODE_UNINIT) { 1376 ibytes = fs->e2fs_ipg / 8; 1377 memset(bp->b_data, 0, ibytes - 1); 1378 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1379 fs->e2fs_bsize - 1); 1380 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1381 fs->e2fs_gd[cg].ext4bgd_flags) & 1382 ~EXT2_BG_INODE_UNINIT); 1383 } 1384 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1385 error = ext2_zero_inode_table(ip, cg); 1386 if (error) { 1387 ext2_brelse(bp); 1388 EXT2_LOCK(ump); 1389 return (0); 1390 } 1391 } 1392 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1393 if (error) { 1394 ext2_brelse(bp); 1395 EXT2_LOCK(ump); 1396 return (0); 1397 } 1398 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1399 /* 1400 * Another thread allocated the last i-node in this 1401 * group while we were waiting for the buffer. 1402 */ 1403 ext2_brelse(bp); 1404 EXT2_LOCK(ump); 1405 return (0); 1406 } 1407 ibp = (char *)bp->b_data; 1408 if (ipref) { 1409 ipref %= fs->e2fs_ipg; 1410 if (isclr(ibp, ipref)) 1411 goto gotit; 1412 } 1413 start = ipref / NBBY; 1414 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1415 loc = memcchr(&ibp[start], 0xff, len); 1416 if (loc == NULL) { 1417 len = start + 1; 1418 start = 0; 1419 loc = memcchr(&ibp[start], 0xff, len); 1420 if (loc == NULL) { 1421 SDT_PROBE3(ext2fs, , alloc, 1422 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1423 fs->e2fs_fsmnt); 1424 ext2_brelse(bp); 1425 EXT2_LOCK(ump); 1426 return (0); 1427 } 1428 } 1429 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1430 gotit: 1431 setbit(ibp, ipref); 1432 EXT2_LOCK(ump); 1433 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1434 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1435 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1436 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1437 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1438 if (ipref + 1 > ifree) 1439 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1440 fs->e2fs_ipg - (ipref + 1)); 1441 } 1442 fs->e2fs_ficount--; 1443 fs->e2fs_fmod = 1; 1444 if ((mode & IFMT) == IFDIR) { 1445 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1446 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1447 fs->e2fs_total_dir++; 1448 } 1449 EXT2_UNLOCK(ump); 1450 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1451 bdwrite(bp); 1452 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1453 } 1454 1455 /* 1456 * Free a block or fragment. 1457 * 1458 */ 1459 void 1460 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1461 { 1462 struct m_ext2fs *fs; 1463 struct buf *bp; 1464 struct ext2mount *ump; 1465 int cg, error; 1466 char *bbp; 1467 1468 fs = ip->i_e2fs; 1469 ump = ip->i_ump; 1470 cg = dtog(fs, bno); 1471 if (bno >= fs->e2fs_bcount) { 1472 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1473 ip->i_number, bno); 1474 return; 1475 } 1476 error = ext2_bread(ip->i_devvp, 1477 fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1478 (int)fs->e2fs_bsize, &bp); 1479 if (error) { 1480 return; 1481 } 1482 bbp = (char *)bp->b_data; 1483 bno = dtogd(fs, bno); 1484 if (isclr(bbp, bno)) { 1485 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1486 (long long)bno, fs->e2fs_fsmnt); 1487 } 1488 clrbit(bbp, bno); 1489 EXT2_LOCK(ump); 1490 ext2_clusteracct(fs, bbp, cg, bno, 1); 1491 fs->e2fs_fbcount++; 1492 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1493 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1494 fs->e2fs_fmod = 1; 1495 EXT2_UNLOCK(ump); 1496 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1497 bdwrite(bp); 1498 } 1499 1500 /* 1501 * Free an inode. 1502 * 1503 */ 1504 int 1505 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1506 { 1507 struct m_ext2fs *fs; 1508 struct inode *pip; 1509 struct buf *bp; 1510 struct ext2mount *ump; 1511 int error, cg; 1512 char *ibp; 1513 1514 pip = VTOI(pvp); 1515 fs = pip->i_e2fs; 1516 ump = pip->i_ump; 1517 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1518 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1519 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1520 1521 cg = ino_to_cg(fs, ino); 1522 error = ext2_bread(pip->i_devvp, 1523 fsbtodoff(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1524 (int)fs->e2fs_bsize, &bp); 1525 if (error) { 1526 return (0); 1527 } 1528 ibp = (char *)bp->b_data; 1529 ino = (ino - 1) % fs->e2fs_ipg; 1530 if (isclr(ibp, ino)) { 1531 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1532 fs->e2fs_fsmnt, ino); 1533 if (fs->e2fs_ronly == 0) 1534 panic("ext2_vfree: freeing free inode"); 1535 } 1536 clrbit(ibp, ino); 1537 EXT2_LOCK(ump); 1538 fs->e2fs_ficount++; 1539 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1540 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1541 if ((mode & IFMT) == IFDIR) { 1542 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1543 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1544 fs->e2fs_total_dir--; 1545 } 1546 fs->e2fs_fmod = 1; 1547 EXT2_UNLOCK(ump); 1548 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1549 bdwrite(bp); 1550 return (0); 1551 } 1552 1553 /* 1554 * Find a block in the specified cylinder group. 1555 * 1556 * It is a panic if a request is made to find a block if none are 1557 * available. 1558 */ 1559 static daddr_t 1560 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1561 { 1562 char *loc; 1563 int start, len; 1564 1565 /* 1566 * find the fragment by searching through the free block 1567 * map for an appropriate bit pattern 1568 */ 1569 if (bpref) 1570 start = dtogd(fs, bpref) / NBBY; 1571 else 1572 start = 0; 1573 len = howmany(fs->e2fs_fpg, NBBY) - start; 1574 loc = memcchr(&bbp[start], 0xff, len); 1575 if (loc == NULL) { 1576 len = start + 1; 1577 start = 0; 1578 loc = memcchr(&bbp[start], 0xff, len); 1579 if (loc == NULL) { 1580 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1581 "fs=%s", start, len, fs->e2fs_fsmnt); 1582 /* NOTREACHED */ 1583 } 1584 } 1585 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1586 } 1587 1588 int 1589 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1590 { 1591 int a3, a5, a7; 1592 1593 if (cg == 0) 1594 return (1); 1595 1596 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1597 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1598 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1599 return (1); 1600 return (0); 1601 } 1602 1603 if ((cg <= 1) || 1604 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1605 return (1); 1606 1607 if (!(cg & 1)) 1608 return (0); 1609 1610 for (a3 = 3, a5 = 5, a7 = 7; 1611 a3 <= cg || a5 <= cg || a7 <= cg; 1612 a3 *= 3, a5 *= 5, a7 *= 7) 1613 if (cg == a3 || cg == a5 || cg == a7) 1614 return (1); 1615 return (0); 1616 } 1617