1 /* $OpenBSD: ext2fs_alloc.c,v 1.16 2004/07/13 21:04:29 millert Exp $ */ 2 /* $NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Manuel Bouyer. 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 34 * Modified for ext2fs by Manuel Bouyer. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/vnode.h> 42 #include <sys/mount.h> 43 #include <sys/kernel.h> 44 #include <sys/syslog.h> 45 46 #include <ufs/ufs/quota.h> 47 #include <ufs/ufs/inode.h> 48 #include <ufs/ufs/extattr.h> 49 #include <ufs/ufs/ufsmount.h> 50 #include <ufs/ufs/ufs_extern.h> 51 52 #include <ufs/ext2fs/ext2fs.h> 53 #include <ufs/ext2fs/ext2fs_extern.h> 54 55 u_long ext2gennumber; 56 57 static ufs1_daddr_t ext2fs_alloccg(struct inode *, int, ufs1_daddr_t, int); 58 static u_long ext2fs_dirpref(struct m_ext2fs *); 59 static void ext2fs_fserr(struct m_ext2fs *, u_int, char *); 60 static u_long ext2fs_hashalloc(struct inode *, int, long, int, 61 ufs1_daddr_t (*)(struct inode *, int, ufs1_daddr_t, int)); 62 static ufs1_daddr_t ext2fs_nodealloccg(struct inode *, int, ufs1_daddr_t, int); 63 static ufs1_daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, ufs1_daddr_t); 64 65 /* 66 * Allocate a block in the file system. 67 * 68 * A preference may be optionally specified. If a preference is given 69 * the following hierarchy is used to allocate a block: 70 * 1) allocate the requested block. 71 * 2) allocate a rotationally optimal block in the same cylinder. 72 * 3) allocate a block in the same cylinder group. 73 * 4) quadradically rehash into other cylinder groups, until an 74 * available block is located. 75 * If no block preference is given the following heirarchy is used 76 * to allocate a block: 77 * 1) allocate a block in the cylinder group that contains the 78 * inode for the file. 79 * 2) quadradically rehash into other cylinder groups, until an 80 * available block is located. 81 */ 82 int 83 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 84 struct inode *ip; 85 ufs1_daddr_t lbn, bpref; 86 struct ucred *cred; 87 ufs1_daddr_t *bnp; 88 { 89 struct m_ext2fs *fs; 90 ufs1_daddr_t bno; 91 int cg; 92 93 *bnp = 0; 94 fs = ip->i_e2fs; 95 #ifdef DIAGNOSTIC 96 if (cred == NOCRED) 97 panic("ext2fs_alloc: missing credential"); 98 #endif /* DIAGNOSTIC */ 99 if (fs->e2fs.e2fs_fbcount == 0) 100 goto nospace; 101 if (cred->cr_uid != 0 && freespace(fs) <= 0) 102 goto nospace; 103 if (bpref >= fs->e2fs.e2fs_bcount) 104 bpref = 0; 105 if (bpref == 0) 106 cg = ino_to_cg(fs, ip->i_number); 107 else 108 cg = dtog(fs, bpref); 109 bno = (ufs1_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 110 ext2fs_alloccg); 111 if (bno > 0) { 112 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 113 ip->i_flag |= IN_CHANGE | IN_UPDATE; 114 *bnp = bno; 115 return (0); 116 } 117 nospace: 118 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 119 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 120 return (ENOSPC); 121 } 122 123 /* 124 * Allocate an inode in the file system. 125 * 126 * If allocating a directory, use ext2fs_dirpref to select the inode. 127 * If allocating in a directory, the following hierarchy is followed: 128 * 1) allocate the preferred inode. 129 * 2) allocate an inode in the same cylinder group. 130 * 3) quadradically rehash into other cylinder groups, until an 131 * available inode is located. 132 * If no inode preference is given the following heirarchy is used 133 * to allocate an inode: 134 * 1) allocate an inode in cylinder group 0. 135 * 2) quadradically rehash into other cylinder groups, until an 136 * available inode is located. 137 */ 138 int 139 ext2fs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred, 140 struct vnode **vpp) 141 { 142 struct vnode *pvp; 143 struct m_ext2fs *fs; 144 struct inode *ip; 145 ino_t ino, ipref; 146 int cg, error; 147 148 *vpp = NULL; 149 pvp = ITOV(pip); 150 fs = pip->i_e2fs; 151 if (fs->e2fs.e2fs_ficount == 0) 152 goto noinodes; 153 154 if ((mode & IFMT) == IFDIR) 155 cg = ext2fs_dirpref(fs); 156 else 157 cg = ino_to_cg(fs, pip->i_number); 158 ipref = cg * fs->e2fs.e2fs_ipg + 1; 159 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 160 if (ino == 0) 161 goto noinodes; 162 error = VFS_VGET(pvp->v_mount, ino, vpp); 163 if (error) { 164 ext2fs_inode_free(pip, ino, mode); 165 return (error); 166 } 167 ip = VTOI(*vpp); 168 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 169 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 170 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 171 panic("ext2fs_valloc: dup alloc"); 172 } 173 174 bzero(&(ip->i_e2din), sizeof(struct ext2fs_dinode)); 175 176 /* 177 * Set up a new generation number for this inode. 178 */ 179 if (++ext2gennumber < (u_long)time_second) 180 ext2gennumber = time_second; 181 ip->i_e2fs_gen = ext2gennumber; 182 return (0); 183 noinodes: 184 ext2fs_fserr(fs, cred->cr_uid, "out of inodes"); 185 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 186 return (ENOSPC); 187 } 188 189 /* 190 * Find a cylinder to place a directory. 191 * 192 * The policy implemented by this algorithm is to select from 193 * among those cylinder groups with above the average number of 194 * free inodes, the one with the smallest number of directories. 195 */ 196 static u_long 197 ext2fs_dirpref(fs) 198 struct m_ext2fs *fs; 199 { 200 int cg, maxspace, mincg, avgifree; 201 202 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 203 maxspace = 0; 204 mincg = -1; 205 for (cg = 0; cg < fs->e2fs_ncg; cg++) 206 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 207 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 208 mincg = cg; 209 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 210 } 211 } 212 return mincg; 213 } 214 215 /* 216 * Select the desired position for the next block in a file. The file is 217 * logically divided into sections. The first section is composed of the 218 * direct blocks. Each additional section contains fs_maxbpg blocks. 219 * 220 * If no blocks have been allocated in the first section, the policy is to 221 * request a block in the same cylinder group as the inode that describes 222 * the file. Otherwise, the policy is to try to allocate the blocks 223 * contigously. The two fields of the ext2 inode extension (see 224 * ufs/ufs/inode.h) help this. 225 */ 226 ufs1_daddr_t 227 ext2fs_blkpref(ip, lbn, indx, bap) 228 struct inode *ip; 229 ufs1_daddr_t lbn; 230 int indx; 231 ufs1_daddr_t *bap; 232 { 233 struct m_ext2fs *fs; 234 int cg, i; 235 236 fs = ip->i_e2fs; 237 /* 238 * if we are doing contigous lbn allocation, try to alloc blocks 239 * contigously on disk 240 */ 241 242 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 243 return ip->i_e2fs_last_blk + 1; 244 } 245 246 /* 247 * bap, if provided, gives us a list of blocks to which we want to 248 * stay close 249 */ 250 251 if (bap) { 252 for (i = indx; i >= 0 ; i--) { 253 if (bap[i]) { 254 return fs2h32(bap[i]) + 1; 255 } 256 } 257 } 258 259 /* fall back to the first block of the cylinder containing the inode */ 260 261 cg = ino_to_cg(fs, ip->i_number); 262 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 263 } 264 265 /* 266 * Implement the cylinder overflow algorithm. 267 * 268 * The policy implemented by this algorithm is: 269 * 1) allocate the block in its requested cylinder group. 270 * 2) quadradically rehash on the cylinder group number. 271 * 3) brute force search for a free block. 272 */ 273 static u_long 274 ext2fs_hashalloc(ip, cg, pref, size, allocator) 275 struct inode *ip; 276 int cg; 277 long pref; 278 int size; /* size for data blocks, mode for inodes */ 279 ufs1_daddr_t (*allocator)(struct inode *, int, ufs1_daddr_t, int); 280 { 281 struct m_ext2fs *fs; 282 long result; 283 int i, icg = cg; 284 285 fs = ip->i_e2fs; 286 /* 287 * 1: preferred cylinder group 288 */ 289 result = (*allocator)(ip, cg, pref, size); 290 if (result) 291 return (result); 292 /* 293 * 2: quadratic rehash 294 */ 295 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 296 cg += i; 297 if (cg >= fs->e2fs_ncg) 298 cg -= fs->e2fs_ncg; 299 result = (*allocator)(ip, cg, 0, size); 300 if (result) 301 return (result); 302 } 303 /* 304 * 3: brute force search 305 * Note that we start at i == 2, since 0 was checked initially, 306 * and 1 is always checked in the quadratic rehash. 307 */ 308 cg = (icg + 2) % fs->e2fs_ncg; 309 for (i = 2; i < fs->e2fs_ncg; i++) { 310 result = (*allocator)(ip, cg, 0, size); 311 if (result) 312 return (result); 313 cg++; 314 if (cg == fs->e2fs_ncg) 315 cg = 0; 316 } 317 return (0); 318 } 319 320 /* 321 * Determine whether a block can be allocated. 322 * 323 * Check to see if a block of the appropriate size is available, 324 * and if it is, allocate it. 325 */ 326 327 static ufs1_daddr_t 328 ext2fs_alloccg(ip, cg, bpref, size) 329 struct inode *ip; 330 int cg; 331 ufs1_daddr_t bpref; 332 int size; 333 { 334 struct m_ext2fs *fs; 335 char *bbp; 336 struct buf *bp; 337 int error, bno, start, end, loc; 338 339 fs = ip->i_e2fs; 340 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 341 return (0); 342 error = bread(ip->i_devvp, fsbtodb(fs, 343 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 344 (int)fs->e2fs_bsize, NOCRED, &bp); 345 if (error) { 346 brelse(bp); 347 return (0); 348 } 349 bbp = (char *)bp->b_data; 350 351 if (dtog(fs, bpref) != cg) 352 bpref = 0; 353 if (bpref != 0) { 354 bpref = dtogd(fs, bpref); 355 /* 356 * if the requested block is available, use it 357 */ 358 if (isclr(bbp, bpref)) { 359 bno = bpref; 360 goto gotit; 361 } 362 } 363 /* 364 * no blocks in the requested cylinder, so take next 365 * available one in this cylinder group. 366 * first try to get 8 contigous blocks, then fall back to a single 367 * block. 368 */ 369 if (bpref) 370 start = dtogd(fs, bpref) / NBBY; 371 else 372 start = 0; 373 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 374 for (loc = start; loc < end; loc++) { 375 if (bbp[loc] == 0) { 376 bno = loc * NBBY; 377 goto gotit; 378 } 379 } 380 for (loc = 0; loc < start; loc++) { 381 if (bbp[loc] == 0) { 382 bno = loc * NBBY; 383 goto gotit; 384 } 385 } 386 387 bno = ext2fs_mapsearch(fs, bbp, bpref); 388 if (bno < 0) 389 return (0); 390 gotit: 391 #ifdef DIAGNOSTIC 392 if (isset(bbp, (long)bno)) { 393 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 394 cg, bno, fs->e2fs_fsmnt); 395 panic("ext2fs_alloccg: dup alloc"); 396 } 397 #endif 398 setbit(bbp, (long)bno); 399 fs->e2fs.e2fs_fbcount--; 400 fs->e2fs_gd[cg].ext2bgd_nbfree--; 401 fs->e2fs_fmod = 1; 402 bdwrite(bp); 403 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 404 } 405 406 /* 407 * Determine whether an inode can be allocated. 408 * 409 * Check to see if an inode is available, and if it is, 410 * allocate it using the following policy: 411 * 1) allocate the requested inode. 412 * 2) allocate the next available inode after the requested 413 * inode in the specified cylinder group. 414 */ 415 static ufs1_daddr_t 416 ext2fs_nodealloccg(ip, cg, ipref, mode) 417 struct inode *ip; 418 int cg; 419 ufs1_daddr_t ipref; 420 int mode; 421 { 422 struct m_ext2fs *fs; 423 char *ibp; 424 struct buf *bp; 425 int error, start, len, loc, map, i; 426 427 ipref--; /* to avoid a lot of (ipref -1) */ 428 fs = ip->i_e2fs; 429 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 430 return (0); 431 error = bread(ip->i_devvp, fsbtodb(fs, 432 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 433 (int)fs->e2fs_bsize, NOCRED, &bp); 434 if (error) { 435 brelse(bp); 436 return (0); 437 } 438 ibp = (char *)bp->b_data; 439 if (ipref) { 440 ipref %= fs->e2fs.e2fs_ipg; 441 if (isclr(ibp, ipref)) 442 goto gotit; 443 } 444 start = ipref / NBBY; 445 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 446 loc = skpc(0xff, len, &ibp[start]); 447 if (loc == 0) { 448 len = start + 1; 449 start = 0; 450 loc = skpc(0xff, len, &ibp[0]); 451 if (loc == 0) { 452 printf("cg = %d, ipref = %d, fs = %s\n", 453 cg, ipref, fs->e2fs_fsmnt); 454 panic("ext2fs_nodealloccg: map corrupted"); 455 /* NOTREACHED */ 456 } 457 } 458 i = start + len - loc; 459 map = ibp[i]; 460 ipref = i * NBBY; 461 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 462 if ((map & i) == 0) { 463 goto gotit; 464 } 465 } 466 printf("fs = %s\n", fs->e2fs_fsmnt); 467 panic("ext2fs_nodealloccg: block not in map"); 468 /* NOTREACHED */ 469 gotit: 470 setbit(ibp, ipref); 471 fs->e2fs.e2fs_ficount--; 472 fs->e2fs_gd[cg].ext2bgd_nifree--; 473 fs->e2fs_fmod = 1; 474 if ((mode & IFMT) == IFDIR) { 475 fs->e2fs_gd[cg].ext2bgd_ndirs++; 476 } 477 bdwrite(bp); 478 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 479 } 480 481 /* 482 * Free a block. 483 * 484 * The specified block is placed back in the 485 * free map. 486 */ 487 void 488 ext2fs_blkfree(ip, bno) 489 struct inode *ip; 490 ufs1_daddr_t bno; 491 { 492 struct m_ext2fs *fs; 493 char *bbp; 494 struct buf *bp; 495 int error, cg; 496 497 fs = ip->i_e2fs; 498 cg = dtog(fs, bno); 499 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 500 printf("bad block %d, ino %d\n", bno, ip->i_number); 501 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 502 return; 503 } 504 error = bread(ip->i_devvp, 505 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 506 (int)fs->e2fs_bsize, NOCRED, &bp); 507 if (error) { 508 brelse(bp); 509 return; 510 } 511 bbp = (char *)bp->b_data; 512 bno = dtogd(fs, bno); 513 if (isclr(bbp, bno)) { 514 printf("dev = 0x%x, block = %d, fs = %s\n", 515 ip->i_dev, bno, fs->e2fs_fsmnt); 516 panic("blkfree: freeing free block"); 517 } 518 clrbit(bbp, bno); 519 fs->e2fs.e2fs_fbcount++; 520 fs->e2fs_gd[cg].ext2bgd_nbfree++; 521 522 fs->e2fs_fmod = 1; 523 bdwrite(bp); 524 } 525 526 /* 527 * Free an inode. 528 * 529 * The specified inode is placed back in the free map. 530 */ 531 int 532 ext2fs_inode_free(struct inode *pip, ino_t ino, mode_t mode) 533 { 534 register struct m_ext2fs *fs; 535 register char *ibp; 536 struct buf *bp; 537 int error, cg; 538 539 fs = pip->i_e2fs; 540 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 541 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 542 pip->i_dev, ino, fs->e2fs_fsmnt); 543 cg = ino_to_cg(fs, ino); 544 error = bread(pip->i_devvp, 545 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 546 (int)fs->e2fs_bsize, NOCRED, &bp); 547 if (error) { 548 brelse(bp); 549 return (0); 550 } 551 ibp = (char *)bp->b_data; 552 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 553 if (isclr(ibp, ino)) { 554 printf("dev = 0x%x, ino = %d, fs = %s\n", 555 pip->i_dev, ino, fs->e2fs_fsmnt); 556 if (fs->e2fs_ronly == 0) 557 panic("ifree: freeing free inode"); 558 } 559 clrbit(ibp, ino); 560 fs->e2fs.e2fs_ficount++; 561 fs->e2fs_gd[cg].ext2bgd_nifree++; 562 if ((mode & IFMT) == IFDIR) { 563 fs->e2fs_gd[cg].ext2bgd_ndirs--; 564 } 565 fs->e2fs_fmod = 1; 566 bdwrite(bp); 567 return (0); 568 } 569 570 /* 571 * Find a block in the specified cylinder group. 572 * 573 * It is a panic if a request is made to find a block if none are 574 * available. 575 */ 576 577 static ufs1_daddr_t 578 ext2fs_mapsearch(fs, bbp, bpref) 579 struct m_ext2fs *fs; 580 char *bbp; 581 ufs1_daddr_t bpref; 582 { 583 ufs1_daddr_t bno; 584 int start, len, loc, i, map; 585 586 /* 587 * find the fragment by searching through the free block 588 * map for an appropriate bit pattern 589 */ 590 if (bpref) 591 start = dtogd(fs, bpref) / NBBY; 592 else 593 start = 0; 594 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 595 loc = skpc(0xff, len, &bbp[start]); 596 if (loc == 0) { 597 len = start + 1; 598 start = 0; 599 loc = skpc(0xff, len, &bbp[start]); 600 if (loc == 0) { 601 printf("start = %d, len = %d, fs = %s\n", 602 start, len, fs->e2fs_fsmnt); 603 panic("ext2fs_alloccg: map corrupted"); 604 /* NOTREACHED */ 605 } 606 } 607 i = start + len - loc; 608 map = bbp[i]; 609 bno = i * NBBY; 610 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 611 if ((map & i) == 0) 612 return (bno); 613 } 614 printf("fs = %s\n", fs->e2fs_fsmnt); 615 panic("ext2fs_mapsearch: block not in map"); 616 /* NOTREACHED */ 617 } 618 619 /* 620 * Fserr prints the name of a file system with an error diagnostic. 621 * 622 * The form of the error message is: 623 * fs: error message 624 */ 625 static void 626 ext2fs_fserr(fs, uid, cp) 627 struct m_ext2fs *fs; 628 u_int uid; 629 char *cp; 630 { 631 632 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 633 } 634