1 /* $OpenBSD: ext2fs_alloc.c,v 1.6 2001/06/23 02:07:50 csapuntz Exp $ */ 2 /* $NetBSD: ext2fs_alloc.c,v 1.1 1997/06/11 09:33:41 bouyer Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Manuel Bouyer. 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 38 * Modified for ext2fs by Manuel Bouyer. 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/buf.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/mount.h> 47 #include <sys/kernel.h> 48 #include <sys/syslog.h> 49 50 #include <vm/vm.h> 51 52 #include <ufs/ufs/quota.h> 53 #include <ufs/ufs/inode.h> 54 #include <ufs/ufs/ufs_extern.h> 55 56 #include <ufs/ext2fs/ext2fs.h> 57 #include <ufs/ext2fs/ext2fs_extern.h> 58 59 u_long ext2gennumber; 60 61 static daddr_t ext2fs_alloccg __P((struct inode *, int, daddr_t, int)); 62 static u_long ext2fs_dirpref __P((struct m_ext2fs *)); 63 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *)); 64 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int, 65 daddr_t (*)(struct inode *, int, daddr_t, 66 int))); 67 static daddr_t ext2fs_nodealloccg __P((struct inode *, int, daddr_t, int)); 68 static daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, daddr_t)); 69 70 /* 71 * Allocate a block in the file system. 72 * 73 * A preference may be optionally specified. If a preference is given 74 * the following hierarchy is used to allocate a block: 75 * 1) allocate the requested block. 76 * 2) allocate a rotationally optimal block in the same cylinder. 77 * 3) allocate a block in the same cylinder group. 78 * 4) quadradically rehash into other cylinder groups, until an 79 * available block is located. 80 * If no block preference is given the following heirarchy is used 81 * to allocate a block: 82 * 1) allocate a block in the cylinder group that contains the 83 * inode for the file. 84 * 2) quadradically rehash into other cylinder groups, until an 85 * available block is located. 86 */ 87 int 88 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 89 register struct inode *ip; 90 daddr_t lbn, bpref; 91 struct ucred *cred; 92 daddr_t *bnp; 93 { 94 register struct m_ext2fs *fs; 95 daddr_t bno; 96 int cg; 97 98 *bnp = 0; 99 fs = ip->i_e2fs; 100 #ifdef DIAGNOSTIC 101 if (cred == NOCRED) 102 panic("ext2fs_alloc: missing credential"); 103 #endif /* DIAGNOSTIC */ 104 if (fs->e2fs.e2fs_fbcount == 0) 105 goto nospace; 106 if (cred->cr_uid != 0 && freespace(fs) <= 0) 107 goto nospace; 108 if (bpref >= fs->e2fs.e2fs_bcount) 109 bpref = 0; 110 if (bpref == 0) 111 cg = ino_to_cg(fs, ip->i_number); 112 else 113 cg = dtog(fs, bpref); 114 bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 115 ext2fs_alloccg); 116 if (bno > 0) { 117 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 118 ip->i_flag |= IN_CHANGE | IN_UPDATE; 119 *bnp = bno; 120 return (0); 121 } 122 nospace: 123 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 124 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 125 return (ENOSPC); 126 } 127 128 /* 129 * Allocate an inode in the file system. 130 * 131 * If allocating a directory, use ext2fs_dirpref to select the inode. 132 * If allocating in a directory, the following hierarchy is followed: 133 * 1) allocate the preferred inode. 134 * 2) allocate an inode in the same cylinder group. 135 * 3) quadradically rehash into other cylinder groups, until an 136 * available inode is located. 137 * If no inode preference is given the following heirarchy is used 138 * to allocate an inode: 139 * 1) allocate an inode in cylinder group 0. 140 * 2) quadradically rehash into other cylinder groups, until an 141 * available inode is located. 142 */ 143 int 144 ext2fs_inode_alloc(struct inode *pip, int mode, struct ucred *cred, 145 struct vnode **vpp) 146 { 147 struct vnode *pvp; 148 struct m_ext2fs *fs; 149 struct inode *ip; 150 ino_t ino, ipref; 151 int cg, error; 152 153 *vpp = NULL; 154 pvp = ITOV(pip); 155 fs = pip->i_e2fs; 156 if (fs->e2fs.e2fs_ficount == 0) 157 goto noinodes; 158 159 if ((mode & IFMT) == IFDIR) 160 cg = ext2fs_dirpref(fs); 161 else 162 cg = ino_to_cg(fs, pip->i_number); 163 ipref = cg * fs->e2fs.e2fs_ipg + 1; 164 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 165 if (ino == 0) 166 goto noinodes; 167 error = VFS_VGET(pvp->v_mount, ino, vpp); 168 if (error) { 169 ext2fs_inode_free(pip, ino, mode); 170 return (error); 171 } 172 ip = VTOI(*vpp); 173 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 174 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 175 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 176 panic("ext2fs_valloc: dup alloc"); 177 } 178 179 bzero(&(ip->i_din.e2fs_din), sizeof(struct ext2fs_dinode)); 180 181 /* 182 * Set up a new generation number for this inode. 183 */ 184 if (++ext2gennumber < (u_long)time.tv_sec) 185 ext2gennumber = time.tv_sec; 186 ip->i_e2fs_gen = ext2gennumber; 187 return (0); 188 noinodes: 189 ext2fs_fserr(fs, cred->cr_uid, "out of inodes"); 190 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 191 return (ENOSPC); 192 } 193 194 /* 195 * Find a cylinder to place a directory. 196 * 197 * The policy implemented by this algorithm is to select from 198 * among those cylinder groups with above the average number of 199 * free inodes, the one with the smallest number of directories. 200 */ 201 static u_long 202 ext2fs_dirpref(fs) 203 register struct m_ext2fs *fs; 204 { 205 int cg, maxspace, mincg, avgifree; 206 207 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 208 maxspace = 0; 209 mincg = -1; 210 for (cg = 0; cg < fs->e2fs_ncg; cg++) 211 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 212 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 213 mincg = cg; 214 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 215 } 216 } 217 return mincg; 218 } 219 220 /* 221 * Select the desired position for the next block in a file. The file is 222 * logically divided into sections. The first section is composed of the 223 * direct blocks. Each additional section contains fs_maxbpg blocks. 224 * 225 * If no blocks have been allocated in the first section, the policy is to 226 * request a block in the same cylinder group as the inode that describes 227 * the file. Otherwise, the policy is to try to allocate the blocks 228 * contigously. The two fields of the ext2 inode extention (see 229 * ufs/ufs/inode.h) help this. 230 */ 231 daddr_t 232 ext2fs_blkpref(ip, lbn, indx, bap) 233 struct inode *ip; 234 daddr_t lbn; 235 int indx; 236 daddr_t *bap; 237 { 238 register struct m_ext2fs *fs; 239 register int cg, i; 240 241 fs = ip->i_e2fs; 242 /* 243 * if we are doing contigous lbn allocation, try to alloc blocks 244 * contigously on disk 245 */ 246 247 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 248 return ip->i_e2fs_last_blk + 1; 249 } 250 251 /* 252 * bap, if provided, gives us a list of blocks to which we want to 253 * stay close 254 */ 255 256 if (bap) { 257 for (i = indx; i >= 0 ; i--) { 258 if (bap[i]) { 259 return fs2h32(bap[i]) + 1; 260 } 261 } 262 } 263 264 /* fall back to the first block of the cylinder containing the inode */ 265 266 cg = ino_to_cg(fs, ip->i_number); 267 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 268 } 269 270 /* 271 * Implement the cylinder overflow algorithm. 272 * 273 * The policy implemented by this algorithm is: 274 * 1) allocate the block in its requested cylinder group. 275 * 2) quadradically rehash on the cylinder group number. 276 * 3) brute force search for a free block. 277 */ 278 static u_long 279 ext2fs_hashalloc(ip, cg, pref, size, allocator) 280 struct inode *ip; 281 int cg; 282 long pref; 283 int size; /* size for data blocks, mode for inodes */ 284 daddr_t (*allocator) __P((struct inode *, int, daddr_t, int)); 285 { 286 register struct m_ext2fs *fs; 287 long result; 288 int i, icg = cg; 289 290 fs = ip->i_e2fs; 291 /* 292 * 1: preferred cylinder group 293 */ 294 result = (*allocator)(ip, cg, pref, size); 295 if (result) 296 return (result); 297 /* 298 * 2: quadratic rehash 299 */ 300 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 301 cg += i; 302 if (cg >= fs->e2fs_ncg) 303 cg -= fs->e2fs_ncg; 304 result = (*allocator)(ip, cg, 0, size); 305 if (result) 306 return (result); 307 } 308 /* 309 * 3: brute force search 310 * Note that we start at i == 2, since 0 was checked initially, 311 * and 1 is always checked in the quadratic rehash. 312 */ 313 cg = (icg + 2) % fs->e2fs_ncg; 314 for (i = 2; i < fs->e2fs_ncg; i++) { 315 result = (*allocator)(ip, cg, 0, size); 316 if (result) 317 return (result); 318 cg++; 319 if (cg == fs->e2fs_ncg) 320 cg = 0; 321 } 322 return (NULL); 323 } 324 325 /* 326 * Determine whether a block can be allocated. 327 * 328 * Check to see if a block of the appropriate size is available, 329 * and if it is, allocate it. 330 */ 331 332 static daddr_t 333 ext2fs_alloccg(ip, cg, bpref, size) 334 struct inode *ip; 335 int cg; 336 daddr_t bpref; 337 int size; 338 { 339 register struct m_ext2fs *fs; 340 register char *bbp; 341 struct buf *bp; 342 int error, bno, start, end, loc; 343 344 fs = ip->i_e2fs; 345 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 346 return (NULL); 347 error = bread(ip->i_devvp, fsbtodb(fs, 348 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 349 (int)fs->e2fs_bsize, NOCRED, &bp); 350 if (error) { 351 brelse(bp); 352 return (NULL); 353 } 354 bbp = (char *)bp->b_data; 355 356 if (dtog(fs, bpref) != cg) 357 bpref = 0; 358 if (bpref != 0) { 359 bpref = dtogd(fs, bpref); 360 /* 361 * if the requested block is available, use it 362 */ 363 if (isclr(bbp, bpref)) { 364 bno = bpref; 365 goto gotit; 366 } 367 } 368 /* 369 * no blocks in the requested cylinder, so take next 370 * available one in this cylinder group. 371 * first try to get 8 contigous blocks, then fall back to a single 372 * block. 373 */ 374 if (bpref) 375 start = dtogd(fs, bpref) / NBBY; 376 else 377 start = 0; 378 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 379 for (loc = start; loc < end; loc++) { 380 if (bbp[loc] == 0) { 381 bno = loc * NBBY; 382 goto gotit; 383 } 384 } 385 for (loc = 0; loc < start; loc++) { 386 if (bbp[loc] == 0) { 387 bno = loc * NBBY; 388 goto gotit; 389 } 390 } 391 392 bno = ext2fs_mapsearch(fs, bbp, bpref); 393 if (bno < 0) 394 return (NULL); 395 gotit: 396 #ifdef DIAGNOSTIC 397 if (isset(bbp, (long)bno)) { 398 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 399 cg, bno, fs->e2fs_fsmnt); 400 panic("ext2fs_valloc: dup alloc"); 401 } 402 #endif 403 setbit(bbp, (long)bno); 404 fs->e2fs.e2fs_fbcount--; 405 fs->e2fs_gd[cg].ext2bgd_nbfree--; 406 fs->e2fs_fmod = 1; 407 bdwrite(bp); 408 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 409 } 410 411 /* 412 * Determine whether an inode can be allocated. 413 * 414 * Check to see if an inode is available, and if it is, 415 * allocate it using the following policy: 416 * 1) allocate the requested inode. 417 * 2) allocate the next available inode after the requested 418 * inode in the specified cylinder group. 419 */ 420 static daddr_t 421 ext2fs_nodealloccg(ip, cg, ipref, mode) 422 struct inode *ip; 423 int cg; 424 daddr_t ipref; 425 int mode; 426 { 427 register struct m_ext2fs *fs; 428 register char *ibp; 429 struct buf *bp; 430 int error, start, len, loc, map, i; 431 432 ipref--; /* to avoid a lot of (ipref -1) */ 433 fs = ip->i_e2fs; 434 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 435 return (NULL); 436 error = bread(ip->i_devvp, fsbtodb(fs, 437 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 438 (int)fs->e2fs_bsize, NOCRED, &bp); 439 if (error) { 440 brelse(bp); 441 return (NULL); 442 } 443 ibp = (char *)bp->b_data; 444 if (ipref) { 445 ipref %= fs->e2fs.e2fs_ipg; 446 if (isclr(ibp, ipref)) 447 goto gotit; 448 } 449 start = ipref / NBBY; 450 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 451 loc = skpc(0xff, len, &ibp[start]); 452 if (loc == 0) { 453 len = start + 1; 454 start = 0; 455 loc = skpc(0xff, len, &ibp[0]); 456 if (loc == 0) { 457 printf("cg = %d, ipref = %d, fs = %s\n", 458 cg, ipref, fs->e2fs_fsmnt); 459 panic("ext2fs_nodealloccg: map corrupted"); 460 /* NOTREACHED */ 461 } 462 } 463 i = start + len - loc; 464 map = ibp[i]; 465 ipref = i * NBBY; 466 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 467 if ((map & i) == 0) { 468 goto gotit; 469 } 470 } 471 printf("fs = %s\n", fs->e2fs_fsmnt); 472 panic("ext2fs_nodealloccg: block not in map"); 473 /* NOTREACHED */ 474 gotit: 475 setbit(ibp, ipref); 476 fs->e2fs.e2fs_ficount--; 477 fs->e2fs_gd[cg].ext2bgd_nifree--; 478 fs->e2fs_fmod = 1; 479 if ((mode & IFMT) == IFDIR) { 480 fs->e2fs_gd[cg].ext2bgd_ndirs++; 481 } 482 bdwrite(bp); 483 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 484 } 485 486 /* 487 * Free a block. 488 * 489 * The specified block is placed back in the 490 * free map. 491 */ 492 void 493 ext2fs_blkfree(ip, bno) 494 register struct inode *ip; 495 daddr_t bno; 496 { 497 register struct m_ext2fs *fs; 498 register char *bbp; 499 struct buf *bp; 500 int error, cg; 501 502 fs = ip->i_e2fs; 503 cg = dtog(fs, bno); 504 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 505 printf("bad block %d, ino %d\n", bno, ip->i_number); 506 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 507 return; 508 } 509 error = bread(ip->i_devvp, fsbtodb(fs, 510 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 511 (int)fs->e2fs_bsize, NOCRED, &bp); 512 if (error) { 513 brelse(bp); 514 return; 515 } 516 bbp = (char *)bp->b_data; 517 bno = dtogd(fs, bno); 518 if (isclr(bbp, bno)) { 519 printf("dev = 0x%x, block = %d, fs = %s\n", 520 ip->i_dev, bno, fs->e2fs_fsmnt); 521 panic("blkfree: freeing free block"); 522 } 523 clrbit(bbp, bno); 524 fs->e2fs.e2fs_fbcount++; 525 fs->e2fs_gd[cg].ext2bgd_nbfree++; 526 527 fs->e2fs_fmod = 1; 528 bdwrite(bp); 529 } 530 531 /* 532 * Free an inode. 533 * 534 * The specified inode is placed back in the free map. 535 */ 536 int 537 ext2fs_inode_free(struct inode *pip, ino_t ino, int mode) 538 { 539 register struct m_ext2fs *fs; 540 register char *ibp; 541 struct buf *bp; 542 int error, cg; 543 544 fs = pip->i_e2fs; 545 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 546 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 547 pip->i_dev, ino, fs->e2fs_fsmnt); 548 cg = ino_to_cg(fs, ino); 549 error = bread(pip->i_devvp, 550 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 551 (int)fs->e2fs_bsize, NOCRED, &bp); 552 if (error) { 553 brelse(bp); 554 return (0); 555 } 556 ibp = (char *)bp->b_data; 557 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 558 if (isclr(ibp, ino)) { 559 printf("dev = 0x%x, ino = %d, fs = %s\n", 560 pip->i_dev, ino, fs->e2fs_fsmnt); 561 if (fs->e2fs_ronly == 0) 562 panic("ifree: freeing free inode"); 563 } 564 clrbit(ibp, ino); 565 fs->e2fs.e2fs_ficount++; 566 fs->e2fs_gd[cg].ext2bgd_nifree++; 567 if ((mode & IFMT) == IFDIR) { 568 fs->e2fs_gd[cg].ext2bgd_ndirs--; 569 } 570 fs->e2fs_fmod = 1; 571 bdwrite(bp); 572 return (0); 573 } 574 575 /* 576 * Find a block in the specified cylinder group. 577 * 578 * It is a panic if a request is made to find a block if none are 579 * available. 580 */ 581 582 static daddr_t 583 ext2fs_mapsearch(fs, bbp, bpref) 584 register struct m_ext2fs *fs; 585 register char *bbp; 586 daddr_t bpref; 587 { 588 daddr_t bno; 589 int start, len, loc, i, map; 590 591 /* 592 * find the fragment by searching through the free block 593 * map for an appropriate bit pattern 594 */ 595 if (bpref) 596 start = dtogd(fs, bpref) / NBBY; 597 else 598 start = 0; 599 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 600 loc = skpc(0xff, len, &bbp[start]); 601 if (loc == 0) { 602 len = start + 1; 603 start = 0; 604 loc = skpc(0xff, len, &bbp[start]); 605 if (loc == 0) { 606 printf("start = %d, len = %d, fs = %s\n", 607 start, len, fs->e2fs_fsmnt); 608 panic("ext2fs_alloccg: map corrupted"); 609 /* NOTREACHED */ 610 } 611 } 612 i = start + len - loc; 613 map = bbp[i]; 614 bno = i * NBBY; 615 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 616 if ((map & i) == 0) 617 return (bno); 618 } 619 printf("fs = %s\n", fs->e2fs_fsmnt); 620 panic("ext2fs_mapsearch: block not in map"); 621 /* NOTREACHED */ 622 } 623 624 /* 625 * Fserr prints the name of a file system with an error diagnostic. 626 * 627 * The form of the error message is: 628 * fs: error message 629 */ 630 static void 631 ext2fs_fserr(fs, uid, cp) 632 struct m_ext2fs *fs; 633 u_int uid; 634 char *cp; 635 { 636 637 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 638 } 639