1 /* $OpenBSD: ext2fs_alloc.c,v 1.7 2001/09/17 23:25:04 art Exp $ */ 2 /* $NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Manuel Bouyer. 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 38 * Modified for ext2fs by Manuel Bouyer. 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/buf.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/mount.h> 47 #include <sys/kernel.h> 48 #include <sys/syslog.h> 49 50 #include <ufs/ufs/quota.h> 51 #include <ufs/ufs/inode.h> 52 #include <ufs/ufs/ufs_extern.h> 53 54 #include <ufs/ext2fs/ext2fs.h> 55 #include <ufs/ext2fs/ext2fs_extern.h> 56 57 u_long ext2gennumber; 58 59 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); 60 static u_long ext2fs_dirpref __P((struct m_ext2fs *)); 61 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *)); 62 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int, 63 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t, 64 int))); 65 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); 66 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t)); 67 68 /* 69 * Allocate a block in the file system. 70 * 71 * A preference may be optionally specified. If a preference is given 72 * the following hierarchy is used to allocate a block: 73 * 1) allocate the requested block. 74 * 2) allocate a rotationally optimal block in the same cylinder. 75 * 3) allocate a block in the same cylinder group. 76 * 4) quadradically rehash into other cylinder groups, until an 77 * available block is located. 78 * If no block preference is given the following heirarchy is used 79 * to allocate a block: 80 * 1) allocate a block in the cylinder group that contains the 81 * inode for the file. 82 * 2) quadradically rehash into other cylinder groups, until an 83 * available block is located. 84 */ 85 int 86 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 87 struct inode *ip; 88 ufs_daddr_t lbn, bpref; 89 struct ucred *cred; 90 ufs_daddr_t *bnp; 91 { 92 struct m_ext2fs *fs; 93 ufs_daddr_t bno; 94 int cg; 95 96 *bnp = 0; 97 fs = ip->i_e2fs; 98 #ifdef DIAGNOSTIC 99 if (cred == NOCRED) 100 panic("ext2fs_alloc: missing credential"); 101 #endif /* DIAGNOSTIC */ 102 if (fs->e2fs.e2fs_fbcount == 0) 103 goto nospace; 104 if (cred->cr_uid != 0 && freespace(fs) <= 0) 105 goto nospace; 106 if (bpref >= fs->e2fs.e2fs_bcount) 107 bpref = 0; 108 if (bpref == 0) 109 cg = ino_to_cg(fs, ip->i_number); 110 else 111 cg = dtog(fs, bpref); 112 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 113 ext2fs_alloccg); 114 if (bno > 0) { 115 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 116 ip->i_flag |= IN_CHANGE | IN_UPDATE; 117 *bnp = bno; 118 return (0); 119 } 120 nospace: 121 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 122 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 123 return (ENOSPC); 124 } 125 126 /* 127 * Allocate an inode in the file system. 128 * 129 * If allocating a directory, use ext2fs_dirpref to select the inode. 130 * If allocating in a directory, the following hierarchy is followed: 131 * 1) allocate the preferred inode. 132 * 2) allocate an inode in the same cylinder group. 133 * 3) quadradically rehash into other cylinder groups, until an 134 * available inode is located. 135 * If no inode preference is given the following heirarchy is used 136 * to allocate an inode: 137 * 1) allocate an inode in cylinder group 0. 138 * 2) quadradically rehash into other cylinder groups, until an 139 * available inode is located. 140 */ 141 int 142 ext2fs_inode_alloc(struct inode *pip, int mode, struct ucred *cred, 143 struct vnode **vpp) 144 { 145 struct vnode *pvp; 146 struct m_ext2fs *fs; 147 struct inode *ip; 148 ino_t ino, ipref; 149 int cg, error; 150 151 *vpp = NULL; 152 pvp = ITOV(pip); 153 fs = pip->i_e2fs; 154 if (fs->e2fs.e2fs_ficount == 0) 155 goto noinodes; 156 157 if ((mode & IFMT) == IFDIR) 158 cg = ext2fs_dirpref(fs); 159 else 160 cg = ino_to_cg(fs, pip->i_number); 161 ipref = cg * fs->e2fs.e2fs_ipg + 1; 162 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 163 if (ino == 0) 164 goto noinodes; 165 error = VFS_VGET(pvp->v_mount, ino, vpp); 166 if (error) { 167 ext2fs_inode_free(pip, ino, mode); 168 return (error); 169 } 170 ip = VTOI(*vpp); 171 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 172 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 173 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 174 panic("ext2fs_valloc: dup alloc"); 175 } 176 177 bzero(&(ip->i_din.e2fs_din), sizeof(struct ext2fs_dinode)); 178 179 /* 180 * Set up a new generation number for this inode. 181 */ 182 if (++ext2gennumber < (u_long)time.tv_sec) 183 ext2gennumber = time.tv_sec; 184 ip->i_e2fs_gen = ext2gennumber; 185 return (0); 186 noinodes: 187 ext2fs_fserr(fs, cred->cr_uid, "out of inodes"); 188 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 189 return (ENOSPC); 190 } 191 192 /* 193 * Find a cylinder to place a directory. 194 * 195 * The policy implemented by this algorithm is to select from 196 * among those cylinder groups with above the average number of 197 * free inodes, the one with the smallest number of directories. 198 */ 199 static u_long 200 ext2fs_dirpref(fs) 201 struct m_ext2fs *fs; 202 { 203 int cg, maxspace, mincg, avgifree; 204 205 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 206 maxspace = 0; 207 mincg = -1; 208 for (cg = 0; cg < fs->e2fs_ncg; cg++) 209 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 210 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 211 mincg = cg; 212 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 213 } 214 } 215 return mincg; 216 } 217 218 /* 219 * Select the desired position for the next block in a file. The file is 220 * logically divided into sections. The first section is composed of the 221 * direct blocks. Each additional section contains fs_maxbpg blocks. 222 * 223 * If no blocks have been allocated in the first section, the policy is to 224 * request a block in the same cylinder group as the inode that describes 225 * the file. Otherwise, the policy is to try to allocate the blocks 226 * contigously. The two fields of the ext2 inode extension (see 227 * ufs/ufs/inode.h) help this. 228 */ 229 ufs_daddr_t 230 ext2fs_blkpref(ip, lbn, indx, bap) 231 struct inode *ip; 232 ufs_daddr_t lbn; 233 int indx; 234 ufs_daddr_t *bap; 235 { 236 struct m_ext2fs *fs; 237 int cg, i; 238 239 fs = ip->i_e2fs; 240 /* 241 * if we are doing contigous lbn allocation, try to alloc blocks 242 * contigously on disk 243 */ 244 245 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 246 return ip->i_e2fs_last_blk + 1; 247 } 248 249 /* 250 * bap, if provided, gives us a list of blocks to which we want to 251 * stay close 252 */ 253 254 if (bap) { 255 for (i = indx; i >= 0 ; i--) { 256 if (bap[i]) { 257 return fs2h32(bap[i]) + 1; 258 } 259 } 260 } 261 262 /* fall back to the first block of the cylinder containing the inode */ 263 264 cg = ino_to_cg(fs, ip->i_number); 265 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 266 } 267 268 /* 269 * Implement the cylinder overflow algorithm. 270 * 271 * The policy implemented by this algorithm is: 272 * 1) allocate the block in its requested cylinder group. 273 * 2) quadradically rehash on the cylinder group number. 274 * 3) brute force search for a free block. 275 */ 276 static u_long 277 ext2fs_hashalloc(ip, cg, pref, size, allocator) 278 struct inode *ip; 279 int cg; 280 long pref; 281 int size; /* size for data blocks, mode for inodes */ 282 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int)); 283 { 284 struct m_ext2fs *fs; 285 long result; 286 int i, icg = cg; 287 288 fs = ip->i_e2fs; 289 /* 290 * 1: preferred cylinder group 291 */ 292 result = (*allocator)(ip, cg, pref, size); 293 if (result) 294 return (result); 295 /* 296 * 2: quadratic rehash 297 */ 298 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 299 cg += i; 300 if (cg >= fs->e2fs_ncg) 301 cg -= fs->e2fs_ncg; 302 result = (*allocator)(ip, cg, 0, size); 303 if (result) 304 return (result); 305 } 306 /* 307 * 3: brute force search 308 * Note that we start at i == 2, since 0 was checked initially, 309 * and 1 is always checked in the quadratic rehash. 310 */ 311 cg = (icg + 2) % fs->e2fs_ncg; 312 for (i = 2; i < fs->e2fs_ncg; i++) { 313 result = (*allocator)(ip, cg, 0, size); 314 if (result) 315 return (result); 316 cg++; 317 if (cg == fs->e2fs_ncg) 318 cg = 0; 319 } 320 return (0); 321 } 322 323 /* 324 * Determine whether a block can be allocated. 325 * 326 * Check to see if a block of the appropriate size is available, 327 * and if it is, allocate it. 328 */ 329 330 static ufs_daddr_t 331 ext2fs_alloccg(ip, cg, bpref, size) 332 struct inode *ip; 333 int cg; 334 ufs_daddr_t bpref; 335 int size; 336 { 337 struct m_ext2fs *fs; 338 char *bbp; 339 struct buf *bp; 340 int error, bno, start, end, loc; 341 342 fs = ip->i_e2fs; 343 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 344 return (0); 345 error = bread(ip->i_devvp, fsbtodb(fs, 346 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 347 (int)fs->e2fs_bsize, NOCRED, &bp); 348 if (error) { 349 brelse(bp); 350 return (0); 351 } 352 bbp = (char *)bp->b_data; 353 354 if (dtog(fs, bpref) != cg) 355 bpref = 0; 356 if (bpref != 0) { 357 bpref = dtogd(fs, bpref); 358 /* 359 * if the requested block is available, use it 360 */ 361 if (isclr(bbp, bpref)) { 362 bno = bpref; 363 goto gotit; 364 } 365 } 366 /* 367 * no blocks in the requested cylinder, so take next 368 * available one in this cylinder group. 369 * first try to get 8 contigous blocks, then fall back to a single 370 * block. 371 */ 372 if (bpref) 373 start = dtogd(fs, bpref) / NBBY; 374 else 375 start = 0; 376 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 377 for (loc = start; loc < end; loc++) { 378 if (bbp[loc] == 0) { 379 bno = loc * NBBY; 380 goto gotit; 381 } 382 } 383 for (loc = 0; loc < start; loc++) { 384 if (bbp[loc] == 0) { 385 bno = loc * NBBY; 386 goto gotit; 387 } 388 } 389 390 bno = ext2fs_mapsearch(fs, bbp, bpref); 391 if (bno < 0) 392 return (0); 393 gotit: 394 #ifdef DIAGNOSTIC 395 if (isset(bbp, (long)bno)) { 396 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 397 cg, bno, fs->e2fs_fsmnt); 398 panic("ext2fs_alloccg: dup alloc"); 399 } 400 #endif 401 setbit(bbp, (long)bno); 402 fs->e2fs.e2fs_fbcount--; 403 fs->e2fs_gd[cg].ext2bgd_nbfree--; 404 fs->e2fs_fmod = 1; 405 bdwrite(bp); 406 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 407 } 408 409 /* 410 * Determine whether an inode can be allocated. 411 * 412 * Check to see if an inode is available, and if it is, 413 * allocate it using the following policy: 414 * 1) allocate the requested inode. 415 * 2) allocate the next available inode after the requested 416 * inode in the specified cylinder group. 417 */ 418 static ufs_daddr_t 419 ext2fs_nodealloccg(ip, cg, ipref, mode) 420 struct inode *ip; 421 int cg; 422 ufs_daddr_t ipref; 423 int mode; 424 { 425 struct m_ext2fs *fs; 426 char *ibp; 427 struct buf *bp; 428 int error, start, len, loc, map, i; 429 430 ipref--; /* to avoid a lot of (ipref -1) */ 431 fs = ip->i_e2fs; 432 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 433 return (0); 434 error = bread(ip->i_devvp, fsbtodb(fs, 435 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 436 (int)fs->e2fs_bsize, NOCRED, &bp); 437 if (error) { 438 brelse(bp); 439 return (0); 440 } 441 ibp = (char *)bp->b_data; 442 if (ipref) { 443 ipref %= fs->e2fs.e2fs_ipg; 444 if (isclr(ibp, ipref)) 445 goto gotit; 446 } 447 start = ipref / NBBY; 448 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 449 loc = skpc(0xff, len, &ibp[start]); 450 if (loc == 0) { 451 len = start + 1; 452 start = 0; 453 loc = skpc(0xff, len, &ibp[0]); 454 if (loc == 0) { 455 printf("cg = %d, ipref = %d, fs = %s\n", 456 cg, ipref, fs->e2fs_fsmnt); 457 panic("ext2fs_nodealloccg: map corrupted"); 458 /* NOTREACHED */ 459 } 460 } 461 i = start + len - loc; 462 map = ibp[i]; 463 ipref = i * NBBY; 464 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 465 if ((map & i) == 0) { 466 goto gotit; 467 } 468 } 469 printf("fs = %s\n", fs->e2fs_fsmnt); 470 panic("ext2fs_nodealloccg: block not in map"); 471 /* NOTREACHED */ 472 gotit: 473 setbit(ibp, ipref); 474 fs->e2fs.e2fs_ficount--; 475 fs->e2fs_gd[cg].ext2bgd_nifree--; 476 fs->e2fs_fmod = 1; 477 if ((mode & IFMT) == IFDIR) { 478 fs->e2fs_gd[cg].ext2bgd_ndirs++; 479 } 480 bdwrite(bp); 481 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 482 } 483 484 /* 485 * Free a block. 486 * 487 * The specified block is placed back in the 488 * free map. 489 */ 490 void 491 ext2fs_blkfree(ip, bno) 492 struct inode *ip; 493 ufs_daddr_t bno; 494 { 495 struct m_ext2fs *fs; 496 char *bbp; 497 struct buf *bp; 498 int error, cg; 499 500 fs = ip->i_e2fs; 501 cg = dtog(fs, bno); 502 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 503 printf("bad block %d, ino %d\n", bno, ip->i_number); 504 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 505 return; 506 } 507 error = bread(ip->i_devvp, 508 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 509 (int)fs->e2fs_bsize, NOCRED, &bp); 510 if (error) { 511 brelse(bp); 512 return; 513 } 514 bbp = (char *)bp->b_data; 515 bno = dtogd(fs, bno); 516 if (isclr(bbp, bno)) { 517 printf("dev = 0x%x, block = %d, fs = %s\n", 518 ip->i_dev, bno, fs->e2fs_fsmnt); 519 panic("blkfree: freeing free block"); 520 } 521 clrbit(bbp, bno); 522 fs->e2fs.e2fs_fbcount++; 523 fs->e2fs_gd[cg].ext2bgd_nbfree++; 524 525 fs->e2fs_fmod = 1; 526 bdwrite(bp); 527 } 528 529 /* 530 * Free an inode. 531 * 532 * The specified inode is placed back in the free map. 533 */ 534 int 535 ext2fs_inode_free(struct inode *pip, ino_t ino, int mode) 536 { 537 register struct m_ext2fs *fs; 538 register char *ibp; 539 struct buf *bp; 540 int error, cg; 541 542 fs = pip->i_e2fs; 543 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 544 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 545 pip->i_dev, ino, fs->e2fs_fsmnt); 546 cg = ino_to_cg(fs, ino); 547 error = bread(pip->i_devvp, 548 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 549 (int)fs->e2fs_bsize, NOCRED, &bp); 550 if (error) { 551 brelse(bp); 552 return (0); 553 } 554 ibp = (char *)bp->b_data; 555 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 556 if (isclr(ibp, ino)) { 557 printf("dev = 0x%x, ino = %d, fs = %s\n", 558 pip->i_dev, ino, fs->e2fs_fsmnt); 559 if (fs->e2fs_ronly == 0) 560 panic("ifree: freeing free inode"); 561 } 562 clrbit(ibp, ino); 563 fs->e2fs.e2fs_ficount++; 564 fs->e2fs_gd[cg].ext2bgd_nifree++; 565 if ((mode & IFMT) == IFDIR) { 566 fs->e2fs_gd[cg].ext2bgd_ndirs--; 567 } 568 fs->e2fs_fmod = 1; 569 bdwrite(bp); 570 return (0); 571 } 572 573 /* 574 * Find a block in the specified cylinder group. 575 * 576 * It is a panic if a request is made to find a block if none are 577 * available. 578 */ 579 580 static ufs_daddr_t 581 ext2fs_mapsearch(fs, bbp, bpref) 582 struct m_ext2fs *fs; 583 char *bbp; 584 ufs_daddr_t bpref; 585 { 586 ufs_daddr_t bno; 587 int start, len, loc, i, map; 588 589 /* 590 * find the fragment by searching through the free block 591 * map for an appropriate bit pattern 592 */ 593 if (bpref) 594 start = dtogd(fs, bpref) / NBBY; 595 else 596 start = 0; 597 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 598 loc = skpc(0xff, len, &bbp[start]); 599 if (loc == 0) { 600 len = start + 1; 601 start = 0; 602 loc = skpc(0xff, len, &bbp[start]); 603 if (loc == 0) { 604 printf("start = %d, len = %d, fs = %s\n", 605 start, len, fs->e2fs_fsmnt); 606 panic("ext2fs_alloccg: map corrupted"); 607 /* NOTREACHED */ 608 } 609 } 610 i = start + len - loc; 611 map = bbp[i]; 612 bno = i * NBBY; 613 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 614 if ((map & i) == 0) 615 return (bno); 616 } 617 printf("fs = %s\n", fs->e2fs_fsmnt); 618 panic("ext2fs_mapsearch: block not in map"); 619 /* NOTREACHED */ 620 } 621 622 /* 623 * Fserr prints the name of a file system with an error diagnostic. 624 * 625 * The form of the error message is: 626 * fs: error message 627 */ 628 static void 629 ext2fs_fserr(fs, uid, cp) 630 struct m_ext2fs *fs; 631 u_int uid; 632 char *cp; 633 { 634 635 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 636 } 637