1 /* $NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Manuel Bouyer. 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 37 * Modified for ext2fs by Manuel Bouyer. 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/buf.h> 43 #include <sys/proc.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/kernel.h> 47 #include <sys/syslog.h> 48 49 #include <ufs/ufs/quota.h> 50 #include <ufs/ufs/inode.h> 51 #include <ufs/ufs/ufs_extern.h> 52 53 #include <ufs/ext2fs/ext2fs.h> 54 #include <ufs/ext2fs/ext2fs_extern.h> 55 56 u_long ext2gennumber; 57 58 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); 59 static u_long ext2fs_dirpref __P((struct m_ext2fs *)); 60 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *)); 61 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int, 62 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t, 63 int))); 64 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); 65 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t)); 66 67 /* 68 * Allocate a block in the file system. 69 * 70 * A preference may be optionally specified. If a preference is given 71 * the following hierarchy is used to allocate a block: 72 * 1) allocate the requested block. 73 * 2) allocate a rotationally optimal block in the same cylinder. 74 * 3) allocate a block in the same cylinder group. 75 * 4) quadradically rehash into other cylinder groups, until an 76 * available block is located. 77 * If no block preference is given the following heirarchy is used 78 * to allocate a block: 79 * 1) allocate a block in the cylinder group that contains the 80 * inode for the file. 81 * 2) quadradically rehash into other cylinder groups, until an 82 * available block is located. 83 */ 84 int 85 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 86 struct inode *ip; 87 ufs_daddr_t lbn, bpref; 88 struct ucred *cred; 89 ufs_daddr_t *bnp; 90 { 91 struct m_ext2fs *fs; 92 ufs_daddr_t bno; 93 int cg; 94 95 *bnp = 0; 96 fs = ip->i_e2fs; 97 #ifdef DIAGNOSTIC 98 if (cred == NOCRED) 99 panic("ext2fs_alloc: missing credential\n"); 100 #endif /* DIAGNOSTIC */ 101 if (fs->e2fs.e2fs_fbcount == 0) 102 goto nospace; 103 if (cred->cr_uid != 0 && freespace(fs) <= 0) 104 goto nospace; 105 if (bpref >= fs->e2fs.e2fs_bcount) 106 bpref = 0; 107 if (bpref == 0) 108 cg = ino_to_cg(fs, ip->i_number); 109 else 110 cg = dtog(fs, bpref); 111 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 112 ext2fs_alloccg); 113 if (bno > 0) { 114 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 115 ip->i_flag |= IN_CHANGE | IN_UPDATE; 116 *bnp = bno; 117 return (0); 118 } 119 nospace: 120 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 121 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 122 return (ENOSPC); 123 } 124 125 /* 126 * Allocate an inode in the file system. 127 * 128 * If allocating a directory, use ext2fs_dirpref to select the inode. 129 * If allocating in a directory, the following hierarchy is followed: 130 * 1) allocate the preferred inode. 131 * 2) allocate an inode in the same cylinder group. 132 * 3) quadradically rehash into other cylinder groups, until an 133 * available inode is located. 134 * If no inode preference is given the following heirarchy is used 135 * to allocate an inode: 136 * 1) allocate an inode in cylinder group 0. 137 * 2) quadradically rehash into other cylinder groups, until an 138 * available inode is located. 139 */ 140 int 141 ext2fs_valloc(v) 142 void *v; 143 { 144 struct vop_valloc_args /* { 145 struct vnode *a_pvp; 146 int a_mode; 147 struct ucred *a_cred; 148 struct vnode **a_vpp; 149 } */ *ap = v; 150 struct vnode *pvp = ap->a_pvp; 151 struct inode *pip; 152 struct m_ext2fs *fs; 153 struct inode *ip; 154 mode_t mode = ap->a_mode; 155 ino_t ino, ipref; 156 int cg, error; 157 158 *ap->a_vpp = NULL; 159 pip = VTOI(pvp); 160 fs = pip->i_e2fs; 161 if (fs->e2fs.e2fs_ficount == 0) 162 goto noinodes; 163 164 if ((mode & IFMT) == IFDIR) 165 cg = ext2fs_dirpref(fs); 166 else 167 cg = ino_to_cg(fs, pip->i_number); 168 ipref = cg * fs->e2fs.e2fs_ipg + 1; 169 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 170 if (ino == 0) 171 goto noinodes; 172 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); 173 if (error) { 174 VOP_VFREE(pvp, ino, mode); 175 return (error); 176 } 177 ip = VTOI(*ap->a_vpp); 178 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 179 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 180 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 181 panic("ext2fs_valloc: dup alloc"); 182 } 183 184 memset(&ip->i_din, 0, sizeof(ip->i_din)); 185 186 /* 187 * Set up a new generation number for this inode. 188 */ 189 if (++ext2gennumber < (u_long)time.tv_sec) 190 ext2gennumber = time.tv_sec; 191 ip->i_e2fs_gen = ext2gennumber; 192 return (0); 193 noinodes: 194 ext2fs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); 195 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 196 return (ENOSPC); 197 } 198 199 /* 200 * Find a cylinder to place a directory. 201 * 202 * The policy implemented by this algorithm is to select from 203 * among those cylinder groups with above the average number of 204 * free inodes, the one with the smallest number of directories. 205 */ 206 static u_long 207 ext2fs_dirpref(fs) 208 struct m_ext2fs *fs; 209 { 210 int cg, maxspace, mincg, avgifree; 211 212 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 213 maxspace = 0; 214 mincg = -1; 215 for (cg = 0; cg < fs->e2fs_ncg; cg++) 216 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 217 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 218 mincg = cg; 219 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 220 } 221 } 222 return mincg; 223 } 224 225 /* 226 * Select the desired position for the next block in a file. The file is 227 * logically divided into sections. The first section is composed of the 228 * direct blocks. Each additional section contains fs_maxbpg blocks. 229 * 230 * If no blocks have been allocated in the first section, the policy is to 231 * request a block in the same cylinder group as the inode that describes 232 * the file. Otherwise, the policy is to try to allocate the blocks 233 * contigously. The two fields of the ext2 inode extension (see 234 * ufs/ufs/inode.h) help this. 235 */ 236 ufs_daddr_t 237 ext2fs_blkpref(ip, lbn, indx, bap) 238 struct inode *ip; 239 ufs_daddr_t lbn; 240 int indx; 241 ufs_daddr_t *bap; 242 { 243 struct m_ext2fs *fs; 244 int cg, i; 245 246 fs = ip->i_e2fs; 247 /* 248 * if we are doing contigous lbn allocation, try to alloc blocks 249 * contigously on disk 250 */ 251 252 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 253 return ip->i_e2fs_last_blk + 1; 254 } 255 256 /* 257 * bap, if provided, gives us a list of blocks to which we want to 258 * stay close 259 */ 260 261 if (bap) { 262 for (i = indx; i >= 0 ; i--) { 263 if (bap[i]) { 264 return fs2h32(bap[i]) + 1; 265 } 266 } 267 } 268 269 /* fall back to the first block of the cylinder containing the inode */ 270 271 cg = ino_to_cg(fs, ip->i_number); 272 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 273 } 274 275 /* 276 * Implement the cylinder overflow algorithm. 277 * 278 * The policy implemented by this algorithm is: 279 * 1) allocate the block in its requested cylinder group. 280 * 2) quadradically rehash on the cylinder group number. 281 * 3) brute force search for a free block. 282 */ 283 static u_long 284 ext2fs_hashalloc(ip, cg, pref, size, allocator) 285 struct inode *ip; 286 int cg; 287 long pref; 288 int size; /* size for data blocks, mode for inodes */ 289 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int)); 290 { 291 struct m_ext2fs *fs; 292 long result; 293 int i, icg = cg; 294 295 fs = ip->i_e2fs; 296 /* 297 * 1: preferred cylinder group 298 */ 299 result = (*allocator)(ip, cg, pref, size); 300 if (result) 301 return (result); 302 /* 303 * 2: quadratic rehash 304 */ 305 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 306 cg += i; 307 if (cg >= fs->e2fs_ncg) 308 cg -= fs->e2fs_ncg; 309 result = (*allocator)(ip, cg, 0, size); 310 if (result) 311 return (result); 312 } 313 /* 314 * 3: brute force search 315 * Note that we start at i == 2, since 0 was checked initially, 316 * and 1 is always checked in the quadratic rehash. 317 */ 318 cg = (icg + 2) % fs->e2fs_ncg; 319 for (i = 2; i < fs->e2fs_ncg; i++) { 320 result = (*allocator)(ip, cg, 0, size); 321 if (result) 322 return (result); 323 cg++; 324 if (cg == fs->e2fs_ncg) 325 cg = 0; 326 } 327 return (0); 328 } 329 330 /* 331 * Determine whether a block can be allocated. 332 * 333 * Check to see if a block of the appropriate size is available, 334 * and if it is, allocate it. 335 */ 336 337 static ufs_daddr_t 338 ext2fs_alloccg(ip, cg, bpref, size) 339 struct inode *ip; 340 int cg; 341 ufs_daddr_t bpref; 342 int size; 343 { 344 struct m_ext2fs *fs; 345 char *bbp; 346 struct buf *bp; 347 int error, bno, start, end, loc; 348 349 fs = ip->i_e2fs; 350 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 351 return (0); 352 error = bread(ip->i_devvp, fsbtodb(fs, 353 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 354 (int)fs->e2fs_bsize, NOCRED, &bp); 355 if (error) { 356 brelse(bp); 357 return (0); 358 } 359 bbp = (char *)bp->b_data; 360 361 if (dtog(fs, bpref) != cg) 362 bpref = 0; 363 if (bpref != 0) { 364 bpref = dtogd(fs, bpref); 365 /* 366 * if the requested block is available, use it 367 */ 368 if (isclr(bbp, bpref)) { 369 bno = bpref; 370 goto gotit; 371 } 372 } 373 /* 374 * no blocks in the requested cylinder, so take next 375 * available one in this cylinder group. 376 * first try to get 8 contigous blocks, then fall back to a single 377 * block. 378 */ 379 if (bpref) 380 start = dtogd(fs, bpref) / NBBY; 381 else 382 start = 0; 383 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 384 for (loc = start; loc < end; loc++) { 385 if (bbp[loc] == 0) { 386 bno = loc * NBBY; 387 goto gotit; 388 } 389 } 390 for (loc = 0; loc < start; loc++) { 391 if (bbp[loc] == 0) { 392 bno = loc * NBBY; 393 goto gotit; 394 } 395 } 396 397 bno = ext2fs_mapsearch(fs, bbp, bpref); 398 if (bno < 0) 399 return (0); 400 gotit: 401 #ifdef DIAGNOSTIC 402 if (isset(bbp, (long)bno)) { 403 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 404 cg, bno, fs->e2fs_fsmnt); 405 panic("ext2fs_alloccg: dup alloc"); 406 } 407 #endif 408 setbit(bbp, (long)bno); 409 fs->e2fs.e2fs_fbcount--; 410 fs->e2fs_gd[cg].ext2bgd_nbfree--; 411 fs->e2fs_fmod = 1; 412 bdwrite(bp); 413 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 414 } 415 416 /* 417 * Determine whether an inode can be allocated. 418 * 419 * Check to see if an inode is available, and if it is, 420 * allocate it using the following policy: 421 * 1) allocate the requested inode. 422 * 2) allocate the next available inode after the requested 423 * inode in the specified cylinder group. 424 */ 425 static ufs_daddr_t 426 ext2fs_nodealloccg(ip, cg, ipref, mode) 427 struct inode *ip; 428 int cg; 429 ufs_daddr_t ipref; 430 int mode; 431 { 432 struct m_ext2fs *fs; 433 char *ibp; 434 struct buf *bp; 435 int error, start, len, loc, map, i; 436 437 ipref--; /* to avoid a lot of (ipref -1) */ 438 fs = ip->i_e2fs; 439 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 440 return (0); 441 error = bread(ip->i_devvp, fsbtodb(fs, 442 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 443 (int)fs->e2fs_bsize, NOCRED, &bp); 444 if (error) { 445 brelse(bp); 446 return (0); 447 } 448 ibp = (char *)bp->b_data; 449 if (ipref) { 450 ipref %= fs->e2fs.e2fs_ipg; 451 if (isclr(ibp, ipref)) 452 goto gotit; 453 } 454 start = ipref / NBBY; 455 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 456 loc = skpc(0xff, len, &ibp[start]); 457 if (loc == 0) { 458 len = start + 1; 459 start = 0; 460 loc = skpc(0xff, len, &ibp[0]); 461 if (loc == 0) { 462 printf("cg = %d, ipref = %d, fs = %s\n", 463 cg, ipref, fs->e2fs_fsmnt); 464 panic("ext2fs_nodealloccg: map corrupted"); 465 /* NOTREACHED */ 466 } 467 } 468 i = start + len - loc; 469 map = ibp[i]; 470 ipref = i * NBBY; 471 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 472 if ((map & i) == 0) { 473 goto gotit; 474 } 475 } 476 printf("fs = %s\n", fs->e2fs_fsmnt); 477 panic("ext2fs_nodealloccg: block not in map"); 478 /* NOTREACHED */ 479 gotit: 480 setbit(ibp, ipref); 481 fs->e2fs.e2fs_ficount--; 482 fs->e2fs_gd[cg].ext2bgd_nifree--; 483 fs->e2fs_fmod = 1; 484 if ((mode & IFMT) == IFDIR) { 485 fs->e2fs_gd[cg].ext2bgd_ndirs++; 486 } 487 bdwrite(bp); 488 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 489 } 490 491 /* 492 * Free a block. 493 * 494 * The specified block is placed back in the 495 * free map. 496 */ 497 void 498 ext2fs_blkfree(ip, bno) 499 struct inode *ip; 500 ufs_daddr_t bno; 501 { 502 struct m_ext2fs *fs; 503 char *bbp; 504 struct buf *bp; 505 int error, cg; 506 507 fs = ip->i_e2fs; 508 cg = dtog(fs, bno); 509 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 510 printf("bad block %d, ino %d\n", bno, ip->i_number); 511 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 512 return; 513 } 514 error = bread(ip->i_devvp, 515 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 516 (int)fs->e2fs_bsize, NOCRED, &bp); 517 if (error) { 518 brelse(bp); 519 return; 520 } 521 bbp = (char *)bp->b_data; 522 bno = dtogd(fs, bno); 523 if (isclr(bbp, bno)) { 524 printf("dev = 0x%x, block = %d, fs = %s\n", 525 ip->i_dev, bno, fs->e2fs_fsmnt); 526 panic("blkfree: freeing free block"); 527 } 528 clrbit(bbp, bno); 529 fs->e2fs.e2fs_fbcount++; 530 fs->e2fs_gd[cg].ext2bgd_nbfree++; 531 532 fs->e2fs_fmod = 1; 533 bdwrite(bp); 534 } 535 536 /* 537 * Free an inode. 538 * 539 * The specified inode is placed back in the free map. 540 */ 541 int 542 ext2fs_vfree(v) 543 void *v; 544 { 545 struct vop_vfree_args /* { 546 struct vnode *a_pvp; 547 ino_t a_ino; 548 int a_mode; 549 } */ *ap = v; 550 struct m_ext2fs *fs; 551 char *ibp; 552 struct inode *pip; 553 ino_t ino = ap->a_ino; 554 struct buf *bp; 555 int error, cg; 556 557 pip = VTOI(ap->a_pvp); 558 fs = pip->i_e2fs; 559 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 560 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", 561 pip->i_dev, ino, fs->e2fs_fsmnt); 562 cg = ino_to_cg(fs, ino); 563 error = bread(pip->i_devvp, 564 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 565 (int)fs->e2fs_bsize, NOCRED, &bp); 566 if (error) { 567 brelse(bp); 568 return (0); 569 } 570 ibp = (char *)bp->b_data; 571 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 572 if (isclr(ibp, ino)) { 573 printf("dev = 0x%x, ino = %d, fs = %s\n", 574 pip->i_dev, ino, fs->e2fs_fsmnt); 575 if (fs->e2fs_ronly == 0) 576 panic("ifree: freeing free inode"); 577 } 578 clrbit(ibp, ino); 579 fs->e2fs.e2fs_ficount++; 580 fs->e2fs_gd[cg].ext2bgd_nifree++; 581 if ((ap->a_mode & IFMT) == IFDIR) { 582 fs->e2fs_gd[cg].ext2bgd_ndirs--; 583 } 584 fs->e2fs_fmod = 1; 585 bdwrite(bp); 586 return (0); 587 } 588 589 /* 590 * Find a block in the specified cylinder group. 591 * 592 * It is a panic if a request is made to find a block if none are 593 * available. 594 */ 595 596 static ufs_daddr_t 597 ext2fs_mapsearch(fs, bbp, bpref) 598 struct m_ext2fs *fs; 599 char *bbp; 600 ufs_daddr_t bpref; 601 { 602 ufs_daddr_t bno; 603 int start, len, loc, i, map; 604 605 /* 606 * find the fragment by searching through the free block 607 * map for an appropriate bit pattern 608 */ 609 if (bpref) 610 start = dtogd(fs, bpref) / NBBY; 611 else 612 start = 0; 613 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 614 loc = skpc(0xff, len, &bbp[start]); 615 if (loc == 0) { 616 len = start + 1; 617 start = 0; 618 loc = skpc(0xff, len, &bbp[start]); 619 if (loc == 0) { 620 printf("start = %d, len = %d, fs = %s\n", 621 start, len, fs->e2fs_fsmnt); 622 panic("ext2fs_alloccg: map corrupted"); 623 /* NOTREACHED */ 624 } 625 } 626 i = start + len - loc; 627 map = bbp[i]; 628 bno = i * NBBY; 629 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 630 if ((map & i) == 0) 631 return (bno); 632 } 633 printf("fs = %s\n", fs->e2fs_fsmnt); 634 panic("ext2fs_mapsearch: block not in map"); 635 /* NOTREACHED */ 636 } 637 638 /* 639 * Fserr prints the name of a file system with an error diagnostic. 640 * 641 * The form of the error message is: 642 * fs: error message 643 */ 644 static void 645 ext2fs_fserr(fs, uid, cp) 646 struct m_ext2fs *fs; 647 u_int uid; 648 char *cp; 649 { 650 651 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 652 } 653