1 /* $NetBSD: ext2fs_alloc.c,v 1.13 2001/11/08 02:39:06 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Manuel Bouyer. 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 37 * Modified for ext2fs by Manuel Bouyer. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.13 2001/11/08 02:39:06 lukem Exp $"); 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/proc.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/kernel.h> 50 #include <sys/syslog.h> 51 52 #include <ufs/ufs/inode.h> 53 #include <ufs/ufs/ufs_extern.h> 54 55 #include <ufs/ext2fs/ext2fs.h> 56 #include <ufs/ext2fs/ext2fs_extern.h> 57 58 u_long ext2gennumber; 59 60 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); 61 static u_long ext2fs_dirpref __P((struct m_ext2fs *)); 62 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *)); 63 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int, 64 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t, 65 int))); 66 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); 67 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t)); 68 69 /* 70 * Allocate a block in the file system. 71 * 72 * A preference may be optionally specified. If a preference is given 73 * the following hierarchy is used to allocate a block: 74 * 1) allocate the requested block. 75 * 2) allocate a rotationally optimal block in the same cylinder. 76 * 3) allocate a block in the same cylinder group. 77 * 4) quadradically rehash into other cylinder groups, until an 78 * available block is located. 79 * If no block preference is given the following hierarchy is used 80 * to allocate a block: 81 * 1) allocate a block in the cylinder group that contains the 82 * inode for the file. 83 * 2) quadradically rehash into other cylinder groups, until an 84 * available block is located. 85 */ 86 int 87 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 88 struct inode *ip; 89 ufs_daddr_t lbn, bpref; 90 struct ucred *cred; 91 ufs_daddr_t *bnp; 92 { 93 struct m_ext2fs *fs; 94 ufs_daddr_t bno; 95 int cg; 96 97 *bnp = 0; 98 fs = ip->i_e2fs; 99 #ifdef DIAGNOSTIC 100 if (cred == NOCRED) 101 panic("ext2fs_alloc: missing credential\n"); 102 #endif /* DIAGNOSTIC */ 103 if (fs->e2fs.e2fs_fbcount == 0) 104 goto nospace; 105 if (cred->cr_uid != 0 && freespace(fs) <= 0) 106 goto nospace; 107 if (bpref >= fs->e2fs.e2fs_bcount) 108 bpref = 0; 109 if (bpref == 0) 110 cg = ino_to_cg(fs, ip->i_number); 111 else 112 cg = dtog(fs, bpref); 113 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 114 ext2fs_alloccg); 115 if (bno > 0) { 116 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 117 ip->i_flag |= IN_CHANGE | IN_UPDATE; 118 *bnp = bno; 119 return (0); 120 } 121 nospace: 122 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 123 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 124 return (ENOSPC); 125 } 126 127 /* 128 * Allocate an inode in the file system. 129 * 130 * If allocating a directory, use ext2fs_dirpref to select the inode. 131 * If allocating in a directory, the following hierarchy is followed: 132 * 1) allocate the preferred inode. 133 * 2) allocate an inode in the same cylinder group. 134 * 3) quadradically rehash into other cylinder groups, until an 135 * available inode is located. 136 * If no inode preference is given the following hierarchy is used 137 * to allocate an inode: 138 * 1) allocate an inode in cylinder group 0. 139 * 2) quadradically rehash into other cylinder groups, until an 140 * available inode is located. 141 */ 142 int 143 ext2fs_valloc(v) 144 void *v; 145 { 146 struct vop_valloc_args /* { 147 struct vnode *a_pvp; 148 int a_mode; 149 struct ucred *a_cred; 150 struct vnode **a_vpp; 151 } */ *ap = v; 152 struct vnode *pvp = ap->a_pvp; 153 struct inode *pip; 154 struct m_ext2fs *fs; 155 struct inode *ip; 156 mode_t mode = ap->a_mode; 157 ino_t ino, ipref; 158 int cg, error; 159 160 *ap->a_vpp = NULL; 161 pip = VTOI(pvp); 162 fs = pip->i_e2fs; 163 if (fs->e2fs.e2fs_ficount == 0) 164 goto noinodes; 165 166 if ((mode & IFMT) == IFDIR) 167 cg = ext2fs_dirpref(fs); 168 else 169 cg = ino_to_cg(fs, pip->i_number); 170 ipref = cg * fs->e2fs.e2fs_ipg + 1; 171 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 172 if (ino == 0) 173 goto noinodes; 174 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); 175 if (error) { 176 VOP_VFREE(pvp, ino, mode); 177 return (error); 178 } 179 ip = VTOI(*ap->a_vpp); 180 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 181 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 182 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 183 panic("ext2fs_valloc: dup alloc"); 184 } 185 186 memset(&ip->i_din, 0, sizeof(ip->i_din)); 187 188 /* 189 * Set up a new generation number for this inode. 190 */ 191 if (++ext2gennumber < (u_long)time.tv_sec) 192 ext2gennumber = time.tv_sec; 193 ip->i_e2fs_gen = ext2gennumber; 194 return (0); 195 noinodes: 196 ext2fs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); 197 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 198 return (ENOSPC); 199 } 200 201 /* 202 * Find a cylinder to place a directory. 203 * 204 * The policy implemented by this algorithm is to select from 205 * among those cylinder groups with above the average number of 206 * free inodes, the one with the smallest number of directories. 207 */ 208 static u_long 209 ext2fs_dirpref(fs) 210 struct m_ext2fs *fs; 211 { 212 int cg, maxspace, mincg, avgifree; 213 214 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 215 maxspace = 0; 216 mincg = -1; 217 for (cg = 0; cg < fs->e2fs_ncg; cg++) 218 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 219 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 220 mincg = cg; 221 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 222 } 223 } 224 return mincg; 225 } 226 227 /* 228 * Select the desired position for the next block in a file. The file is 229 * logically divided into sections. The first section is composed of the 230 * direct blocks. Each additional section contains fs_maxbpg blocks. 231 * 232 * If no blocks have been allocated in the first section, the policy is to 233 * request a block in the same cylinder group as the inode that describes 234 * the file. Otherwise, the policy is to try to allocate the blocks 235 * contigously. The two fields of the ext2 inode extension (see 236 * ufs/ufs/inode.h) help this. 237 */ 238 ufs_daddr_t 239 ext2fs_blkpref(ip, lbn, indx, bap) 240 struct inode *ip; 241 ufs_daddr_t lbn; 242 int indx; 243 ufs_daddr_t *bap; 244 { 245 struct m_ext2fs *fs; 246 int cg, i; 247 248 fs = ip->i_e2fs; 249 /* 250 * if we are doing contigous lbn allocation, try to alloc blocks 251 * contigously on disk 252 */ 253 254 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 255 return ip->i_e2fs_last_blk + 1; 256 } 257 258 /* 259 * bap, if provided, gives us a list of blocks to which we want to 260 * stay close 261 */ 262 263 if (bap) { 264 for (i = indx; i >= 0 ; i--) { 265 if (bap[i]) { 266 return fs2h32(bap[i]) + 1; 267 } 268 } 269 } 270 271 /* fall back to the first block of the cylinder containing the inode */ 272 273 cg = ino_to_cg(fs, ip->i_number); 274 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 275 } 276 277 /* 278 * Implement the cylinder overflow algorithm. 279 * 280 * The policy implemented by this algorithm is: 281 * 1) allocate the block in its requested cylinder group. 282 * 2) quadradically rehash on the cylinder group number. 283 * 3) brute force search for a free block. 284 */ 285 static u_long 286 ext2fs_hashalloc(ip, cg, pref, size, allocator) 287 struct inode *ip; 288 int cg; 289 long pref; 290 int size; /* size for data blocks, mode for inodes */ 291 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int)); 292 { 293 struct m_ext2fs *fs; 294 long result; 295 int i, icg = cg; 296 297 fs = ip->i_e2fs; 298 /* 299 * 1: preferred cylinder group 300 */ 301 result = (*allocator)(ip, cg, pref, size); 302 if (result) 303 return (result); 304 /* 305 * 2: quadratic rehash 306 */ 307 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 308 cg += i; 309 if (cg >= fs->e2fs_ncg) 310 cg -= fs->e2fs_ncg; 311 result = (*allocator)(ip, cg, 0, size); 312 if (result) 313 return (result); 314 } 315 /* 316 * 3: brute force search 317 * Note that we start at i == 2, since 0 was checked initially, 318 * and 1 is always checked in the quadratic rehash. 319 */ 320 cg = (icg + 2) % fs->e2fs_ncg; 321 for (i = 2; i < fs->e2fs_ncg; i++) { 322 result = (*allocator)(ip, cg, 0, size); 323 if (result) 324 return (result); 325 cg++; 326 if (cg == fs->e2fs_ncg) 327 cg = 0; 328 } 329 return (0); 330 } 331 332 /* 333 * Determine whether a block can be allocated. 334 * 335 * Check to see if a block of the appropriate size is available, 336 * and if it is, allocate it. 337 */ 338 339 static ufs_daddr_t 340 ext2fs_alloccg(ip, cg, bpref, size) 341 struct inode *ip; 342 int cg; 343 ufs_daddr_t bpref; 344 int size; 345 { 346 struct m_ext2fs *fs; 347 char *bbp; 348 struct buf *bp; 349 int error, bno, start, end, loc; 350 351 fs = ip->i_e2fs; 352 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 353 return (0); 354 error = bread(ip->i_devvp, fsbtodb(fs, 355 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 356 (int)fs->e2fs_bsize, NOCRED, &bp); 357 if (error) { 358 brelse(bp); 359 return (0); 360 } 361 bbp = (char *)bp->b_data; 362 363 if (dtog(fs, bpref) != cg) 364 bpref = 0; 365 if (bpref != 0) { 366 bpref = dtogd(fs, bpref); 367 /* 368 * if the requested block is available, use it 369 */ 370 if (isclr(bbp, bpref)) { 371 bno = bpref; 372 goto gotit; 373 } 374 } 375 /* 376 * no blocks in the requested cylinder, so take next 377 * available one in this cylinder group. 378 * first try to get 8 contigous blocks, then fall back to a single 379 * block. 380 */ 381 if (bpref) 382 start = dtogd(fs, bpref) / NBBY; 383 else 384 start = 0; 385 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 386 for (loc = start; loc < end; loc++) { 387 if (bbp[loc] == 0) { 388 bno = loc * NBBY; 389 goto gotit; 390 } 391 } 392 for (loc = 0; loc < start; loc++) { 393 if (bbp[loc] == 0) { 394 bno = loc * NBBY; 395 goto gotit; 396 } 397 } 398 399 bno = ext2fs_mapsearch(fs, bbp, bpref); 400 if (bno < 0) 401 return (0); 402 gotit: 403 #ifdef DIAGNOSTIC 404 if (isset(bbp, (long)bno)) { 405 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 406 cg, bno, fs->e2fs_fsmnt); 407 panic("ext2fs_alloccg: dup alloc"); 408 } 409 #endif 410 setbit(bbp, (long)bno); 411 fs->e2fs.e2fs_fbcount--; 412 fs->e2fs_gd[cg].ext2bgd_nbfree--; 413 fs->e2fs_fmod = 1; 414 bdwrite(bp); 415 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 416 } 417 418 /* 419 * Determine whether an inode can be allocated. 420 * 421 * Check to see if an inode is available, and if it is, 422 * allocate it using the following policy: 423 * 1) allocate the requested inode. 424 * 2) allocate the next available inode after the requested 425 * inode in the specified cylinder group. 426 */ 427 static ufs_daddr_t 428 ext2fs_nodealloccg(ip, cg, ipref, mode) 429 struct inode *ip; 430 int cg; 431 ufs_daddr_t ipref; 432 int mode; 433 { 434 struct m_ext2fs *fs; 435 char *ibp; 436 struct buf *bp; 437 int error, start, len, loc, map, i; 438 439 ipref--; /* to avoid a lot of (ipref -1) */ 440 fs = ip->i_e2fs; 441 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 442 return (0); 443 error = bread(ip->i_devvp, fsbtodb(fs, 444 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 445 (int)fs->e2fs_bsize, NOCRED, &bp); 446 if (error) { 447 brelse(bp); 448 return (0); 449 } 450 ibp = (char *)bp->b_data; 451 if (ipref) { 452 ipref %= fs->e2fs.e2fs_ipg; 453 if (isclr(ibp, ipref)) 454 goto gotit; 455 } 456 start = ipref / NBBY; 457 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 458 loc = skpc(0xff, len, &ibp[start]); 459 if (loc == 0) { 460 len = start + 1; 461 start = 0; 462 loc = skpc(0xff, len, &ibp[0]); 463 if (loc == 0) { 464 printf("cg = %d, ipref = %d, fs = %s\n", 465 cg, ipref, fs->e2fs_fsmnt); 466 panic("ext2fs_nodealloccg: map corrupted"); 467 /* NOTREACHED */ 468 } 469 } 470 i = start + len - loc; 471 map = ibp[i]; 472 ipref = i * NBBY; 473 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 474 if ((map & i) == 0) { 475 goto gotit; 476 } 477 } 478 printf("fs = %s\n", fs->e2fs_fsmnt); 479 panic("ext2fs_nodealloccg: block not in map"); 480 /* NOTREACHED */ 481 gotit: 482 setbit(ibp, ipref); 483 fs->e2fs.e2fs_ficount--; 484 fs->e2fs_gd[cg].ext2bgd_nifree--; 485 fs->e2fs_fmod = 1; 486 if ((mode & IFMT) == IFDIR) { 487 fs->e2fs_gd[cg].ext2bgd_ndirs++; 488 } 489 bdwrite(bp); 490 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 491 } 492 493 /* 494 * Free a block. 495 * 496 * The specified block is placed back in the 497 * free map. 498 */ 499 void 500 ext2fs_blkfree(ip, bno) 501 struct inode *ip; 502 ufs_daddr_t bno; 503 { 504 struct m_ext2fs *fs; 505 char *bbp; 506 struct buf *bp; 507 int error, cg; 508 509 fs = ip->i_e2fs; 510 cg = dtog(fs, bno); 511 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 512 printf("bad block %d, ino %d\n", bno, ip->i_number); 513 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 514 return; 515 } 516 error = bread(ip->i_devvp, 517 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 518 (int)fs->e2fs_bsize, NOCRED, &bp); 519 if (error) { 520 brelse(bp); 521 return; 522 } 523 bbp = (char *)bp->b_data; 524 bno = dtogd(fs, bno); 525 if (isclr(bbp, bno)) { 526 printf("dev = 0x%x, block = %d, fs = %s\n", 527 ip->i_dev, bno, fs->e2fs_fsmnt); 528 panic("blkfree: freeing free block"); 529 } 530 clrbit(bbp, bno); 531 fs->e2fs.e2fs_fbcount++; 532 fs->e2fs_gd[cg].ext2bgd_nbfree++; 533 534 fs->e2fs_fmod = 1; 535 bdwrite(bp); 536 } 537 538 /* 539 * Free an inode. 540 * 541 * The specified inode is placed back in the free map. 542 */ 543 int 544 ext2fs_vfree(v) 545 void *v; 546 { 547 struct vop_vfree_args /* { 548 struct vnode *a_pvp; 549 ino_t a_ino; 550 int a_mode; 551 } */ *ap = v; 552 struct m_ext2fs *fs; 553 char *ibp; 554 struct inode *pip; 555 ino_t ino = ap->a_ino; 556 struct buf *bp; 557 int error, cg; 558 559 pip = VTOI(ap->a_pvp); 560 fs = pip->i_e2fs; 561 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 562 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", 563 pip->i_dev, ino, fs->e2fs_fsmnt); 564 cg = ino_to_cg(fs, ino); 565 error = bread(pip->i_devvp, 566 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 567 (int)fs->e2fs_bsize, NOCRED, &bp); 568 if (error) { 569 brelse(bp); 570 return (0); 571 } 572 ibp = (char *)bp->b_data; 573 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 574 if (isclr(ibp, ino)) { 575 printf("dev = 0x%x, ino = %d, fs = %s\n", 576 pip->i_dev, ino, fs->e2fs_fsmnt); 577 if (fs->e2fs_ronly == 0) 578 panic("ifree: freeing free inode"); 579 } 580 clrbit(ibp, ino); 581 fs->e2fs.e2fs_ficount++; 582 fs->e2fs_gd[cg].ext2bgd_nifree++; 583 if ((ap->a_mode & IFMT) == IFDIR) { 584 fs->e2fs_gd[cg].ext2bgd_ndirs--; 585 } 586 fs->e2fs_fmod = 1; 587 bdwrite(bp); 588 return (0); 589 } 590 591 /* 592 * Find a block in the specified cylinder group. 593 * 594 * It is a panic if a request is made to find a block if none are 595 * available. 596 */ 597 598 static ufs_daddr_t 599 ext2fs_mapsearch(fs, bbp, bpref) 600 struct m_ext2fs *fs; 601 char *bbp; 602 ufs_daddr_t bpref; 603 { 604 ufs_daddr_t bno; 605 int start, len, loc, i, map; 606 607 /* 608 * find the fragment by searching through the free block 609 * map for an appropriate bit pattern 610 */ 611 if (bpref) 612 start = dtogd(fs, bpref) / NBBY; 613 else 614 start = 0; 615 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 616 loc = skpc(0xff, len, &bbp[start]); 617 if (loc == 0) { 618 len = start + 1; 619 start = 0; 620 loc = skpc(0xff, len, &bbp[start]); 621 if (loc == 0) { 622 printf("start = %d, len = %d, fs = %s\n", 623 start, len, fs->e2fs_fsmnt); 624 panic("ext2fs_alloccg: map corrupted"); 625 /* NOTREACHED */ 626 } 627 } 628 i = start + len - loc; 629 map = bbp[i]; 630 bno = i * NBBY; 631 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 632 if ((map & i) == 0) 633 return (bno); 634 } 635 printf("fs = %s\n", fs->e2fs_fsmnt); 636 panic("ext2fs_mapsearch: block not in map"); 637 /* NOTREACHED */ 638 } 639 640 /* 641 * Fserr prints the name of a file system with an error diagnostic. 642 * 643 * The form of the error message is: 644 * fs: error message 645 */ 646 static void 647 ext2fs_fserr(fs, uid, cp) 648 struct m_ext2fs *fs; 649 u_int uid; 650 char *cp; 651 { 652 653 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 654 } 655