1 /* $NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 2002 Networks Associates Technology, Inc. 34 * All rights reserved. 35 * 36 * This software was developed for the FreeBSD Project by Marshall 37 * Kirk McKusick and Network Associates Laboratories, the Security 38 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 39 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 40 * research program 41 * 42 * Copyright (c) 1982, 1986, 1989, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 * 69 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $"); 74 75 #if defined(_KERNEL_OPT) 76 #include "opt_ffs.h" 77 #include "opt_quota.h" 78 #include "opt_uvm_page_trkown.h" 79 #endif 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/buf.h> 84 #include <sys/fstrans.h> 85 #include <sys/kauth.h> 86 #include <sys/kernel.h> 87 #include <sys/mount.h> 88 #include <sys/proc.h> 89 #include <sys/syslog.h> 90 #include <sys/vnode.h> 91 #include <sys/wapbl.h> 92 93 #include <miscfs/specfs/specdev.h> 94 #include <ufs/ufs/quota.h> 95 #include <ufs/ufs/ufsmount.h> 96 #include <ufs/ufs/inode.h> 97 #include <ufs/ufs/ufs_extern.h> 98 #include <ufs/ufs/ufs_bswap.h> 99 #include <ufs/ufs/ufs_wapbl.h> 100 101 #include <ufs/ffs/fs.h> 102 #include <ufs/ffs/ffs_extern.h> 103 104 #ifdef UVM_PAGE_TRKOWN 105 #include <uvm/uvm.h> 106 #endif 107 108 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int); 109 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int); 110 static ino_t ffs_dirpref(struct inode *); 111 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int); 112 static void ffs_fserr(struct fs *, u_int, const char *); 113 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int, 114 daddr_t (*)(struct inode *, int, daddr_t, int, int)); 115 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int); 116 static int32_t ffs_mapsearch(struct fs *, struct cg *, 117 daddr_t, int); 118 static void ffs_blkfree_common(struct ufsmount *, struct fs *, dev_t, struct buf *, 119 daddr_t, long, bool); 120 static void ffs_freefile_common(struct ufsmount *, struct fs *, dev_t, struct buf *, ino_t, 121 int, bool); 122 123 /* if 1, changes in optimalization strategy are logged */ 124 int ffs_log_changeopt = 0; 125 126 /* in ffs_tables.c */ 127 extern const int inside[], around[]; 128 extern const u_char * const fragtbl[]; 129 130 /* Basic consistency check for block allocations */ 131 static int 132 ffs_check_bad_allocation(const char *func, struct fs *fs, daddr_t bno, 133 long size, dev_t dev, ino_t inum) 134 { 135 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 136 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 137 printf("dev = 0x%llx, bno = %" PRId64 " bsize = %d, " 138 "size = %ld, fs = %s\n", 139 (long long)dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); 140 panic("%s: bad size", func); 141 } 142 143 if (bno >= fs->fs_size) { 144 printf("bad block %" PRId64 ", ino %llu\n", bno, 145 (unsigned long long)inum); 146 ffs_fserr(fs, inum, "bad block"); 147 return EINVAL; 148 } 149 return 0; 150 } 151 152 /* 153 * Allocate a block in the file system. 154 * 155 * The size of the requested block is given, which must be some 156 * multiple of fs_fsize and <= fs_bsize. 157 * A preference may be optionally specified. If a preference is given 158 * the following hierarchy is used to allocate a block: 159 * 1) allocate the requested block. 160 * 2) allocate a rotationally optimal block in the same cylinder. 161 * 3) allocate a block in the same cylinder group. 162 * 4) quadradically rehash into other cylinder groups, until an 163 * available block is located. 164 * If no block preference is given the following hierarchy is used 165 * to allocate a block: 166 * 1) allocate a block in the cylinder group that contains the 167 * inode for the file. 168 * 2) quadradically rehash into other cylinder groups, until an 169 * available block is located. 170 * 171 * => called with um_lock held 172 * => releases um_lock before returning 173 */ 174 int 175 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags, 176 kauth_cred_t cred, daddr_t *bnp) 177 { 178 struct ufsmount *ump; 179 struct fs *fs; 180 daddr_t bno; 181 int cg; 182 #if defined(QUOTA) || defined(QUOTA2) 183 int error; 184 #endif 185 186 fs = ip->i_fs; 187 ump = ip->i_ump; 188 189 KASSERT(mutex_owned(&ump->um_lock)); 190 191 #ifdef UVM_PAGE_TRKOWN 192 193 /* 194 * Sanity-check that allocations within the file size 195 * do not allow other threads to read the stale contents 196 * of newly allocated blocks. 197 * Usually pages will exist to cover the new allocation. 198 * There is an optimization in ffs_write() where we skip 199 * creating pages if several conditions are met: 200 * - the file must not be mapped (in any user address space). 201 * - the write must cover whole pages and whole blocks. 202 * If those conditions are not met then pages must exist and 203 * be locked by the current thread. 204 */ 205 206 if (ITOV(ip)->v_type == VREG && 207 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) { 208 struct vm_page *pg; 209 struct vnode *vp = ITOV(ip); 210 struct uvm_object *uobj = &vp->v_uobj; 211 voff_t off = trunc_page(lblktosize(fs, lbn)); 212 voff_t endoff = round_page(lblktosize(fs, lbn) + size); 213 214 mutex_enter(uobj->vmobjlock); 215 while (off < endoff) { 216 pg = uvm_pagelookup(uobj, off); 217 KASSERT((pg == NULL && (vp->v_vflag & VV_MAPPED) == 0 && 218 (size & PAGE_MASK) == 0 && 219 blkoff(fs, size) == 0) || 220 (pg != NULL && pg->owner == curproc->p_pid && 221 pg->lowner == curlwp->l_lid)); 222 off += PAGE_SIZE; 223 } 224 mutex_exit(uobj->vmobjlock); 225 } 226 #endif 227 228 *bnp = 0; 229 #ifdef DIAGNOSTIC 230 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 231 printf("dev = 0x%llx, bsize = %d, size = %d, fs = %s\n", 232 (unsigned long long)ip->i_dev, fs->fs_bsize, size, 233 fs->fs_fsmnt); 234 panic("ffs_alloc: bad size"); 235 } 236 if (cred == NOCRED) 237 panic("ffs_alloc: missing credential"); 238 #endif /* DIAGNOSTIC */ 239 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 240 goto nospace; 241 if (freespace(fs, fs->fs_minfree) <= 0 && 242 kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 243 NULL, NULL) != 0) 244 goto nospace; 245 #if defined(QUOTA) || defined(QUOTA2) 246 mutex_exit(&ump->um_lock); 247 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0) 248 return (error); 249 mutex_enter(&ump->um_lock); 250 #endif 251 252 if (bpref >= fs->fs_size) 253 bpref = 0; 254 if (bpref == 0) 255 cg = ino_to_cg(fs, ip->i_number); 256 else 257 cg = dtog(fs, bpref); 258 bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg); 259 if (bno > 0) { 260 DIP_ADD(ip, blocks, btodb(size)); 261 ip->i_flag |= IN_CHANGE | IN_UPDATE; 262 *bnp = bno; 263 return (0); 264 } 265 #if defined(QUOTA) || defined(QUOTA2) 266 /* 267 * Restore user's disk quota because allocation failed. 268 */ 269 (void) chkdq(ip, -btodb(size), cred, FORCE); 270 #endif 271 if (flags & B_CONTIG) { 272 /* 273 * XXX ump->um_lock handling is "suspect" at best. 274 * For the case where ffs_hashalloc() fails early 275 * in the B_CONTIG case we reach here with um_lock 276 * already unlocked, so we can't release it again 277 * like in the normal error path. See kern/39206. 278 * 279 * 280 * Fail silently - it's up to our caller to report 281 * errors. 282 */ 283 return (ENOSPC); 284 } 285 nospace: 286 mutex_exit(&ump->um_lock); 287 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); 288 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 289 return (ENOSPC); 290 } 291 292 /* 293 * Reallocate a fragment to a bigger size 294 * 295 * The number and size of the old block is given, and a preference 296 * and new size is also specified. The allocator attempts to extend 297 * the original block. Failing that, the regular block allocator is 298 * invoked to get an appropriate block. 299 * 300 * => called with um_lock held 301 * => return with um_lock released 302 */ 303 int 304 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, 305 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop) 306 { 307 struct ufsmount *ump; 308 struct fs *fs; 309 struct buf *bp; 310 int cg, request, error; 311 daddr_t bprev, bno; 312 313 fs = ip->i_fs; 314 ump = ip->i_ump; 315 316 KASSERT(mutex_owned(&ump->um_lock)); 317 318 #ifdef UVM_PAGE_TRKOWN 319 320 /* 321 * Sanity-check that allocations within the file size 322 * do not allow other threads to read the stale contents 323 * of newly allocated blocks. 324 * Unlike in ffs_alloc(), here pages must always exist 325 * for such allocations, because only the last block of a file 326 * can be a fragment and ffs_write() will reallocate the 327 * fragment to the new size using ufs_balloc_range(), 328 * which always creates pages to cover blocks it allocates. 329 */ 330 331 if (ITOV(ip)->v_type == VREG) { 332 struct vm_page *pg; 333 struct uvm_object *uobj = &ITOV(ip)->v_uobj; 334 voff_t off = trunc_page(lblktosize(fs, lbprev)); 335 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize); 336 337 mutex_enter(uobj->vmobjlock); 338 while (off < endoff) { 339 pg = uvm_pagelookup(uobj, off); 340 KASSERT(pg->owner == curproc->p_pid && 341 pg->lowner == curlwp->l_lid); 342 off += PAGE_SIZE; 343 } 344 mutex_exit(uobj->vmobjlock); 345 } 346 #endif 347 348 #ifdef DIAGNOSTIC 349 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 350 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 351 printf( 352 "dev = 0x%llx, bsize = %d, osize = %d, nsize = %d, fs = %s\n", 353 (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 354 fs->fs_fsmnt); 355 panic("ffs_realloccg: bad size"); 356 } 357 if (cred == NOCRED) 358 panic("ffs_realloccg: missing credential"); 359 #endif /* DIAGNOSTIC */ 360 if (freespace(fs, fs->fs_minfree) <= 0 && 361 kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 362 NULL, NULL) != 0) { 363 mutex_exit(&ump->um_lock); 364 goto nospace; 365 } 366 if (fs->fs_magic == FS_UFS2_MAGIC) 367 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs)); 368 else 369 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs)); 370 371 if (bprev == 0) { 372 printf("dev = 0x%llx, bsize = %d, bprev = %" PRId64 ", fs = %s\n", 373 (unsigned long long)ip->i_dev, fs->fs_bsize, bprev, 374 fs->fs_fsmnt); 375 panic("ffs_realloccg: bad bprev"); 376 } 377 mutex_exit(&ump->um_lock); 378 379 /* 380 * Allocate the extra space in the buffer. 381 */ 382 if (bpp != NULL && 383 (error = bread(ITOV(ip), lbprev, osize, NOCRED, 0, &bp)) != 0) { 384 brelse(bp, 0); 385 return (error); 386 } 387 #if defined(QUOTA) || defined(QUOTA2) 388 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) { 389 if (bpp != NULL) { 390 brelse(bp, 0); 391 } 392 return (error); 393 } 394 #endif 395 /* 396 * Check for extension in the existing location. 397 */ 398 cg = dtog(fs, bprev); 399 mutex_enter(&ump->um_lock); 400 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) { 401 DIP_ADD(ip, blocks, btodb(nsize - osize)); 402 ip->i_flag |= IN_CHANGE | IN_UPDATE; 403 404 if (bpp != NULL) { 405 if (bp->b_blkno != fsbtodb(fs, bno)) 406 panic("bad blockno"); 407 allocbuf(bp, nsize, 1); 408 memset((char *)bp->b_data + osize, 0, nsize - osize); 409 mutex_enter(bp->b_objlock); 410 KASSERT(!cv_has_waiters(&bp->b_done)); 411 bp->b_oflags |= BO_DONE; 412 mutex_exit(bp->b_objlock); 413 *bpp = bp; 414 } 415 if (blknop != NULL) { 416 *blknop = bno; 417 } 418 return (0); 419 } 420 /* 421 * Allocate a new disk location. 422 */ 423 if (bpref >= fs->fs_size) 424 bpref = 0; 425 switch ((int)fs->fs_optim) { 426 case FS_OPTSPACE: 427 /* 428 * Allocate an exact sized fragment. Although this makes 429 * best use of space, we will waste time relocating it if 430 * the file continues to grow. If the fragmentation is 431 * less than half of the minimum free reserve, we choose 432 * to begin optimizing for time. 433 */ 434 request = nsize; 435 if (fs->fs_minfree < 5 || 436 fs->fs_cstotal.cs_nffree > 437 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 438 break; 439 440 if (ffs_log_changeopt) { 441 log(LOG_NOTICE, 442 "%s: optimization changed from SPACE to TIME\n", 443 fs->fs_fsmnt); 444 } 445 446 fs->fs_optim = FS_OPTTIME; 447 break; 448 case FS_OPTTIME: 449 /* 450 * At this point we have discovered a file that is trying to 451 * grow a small fragment to a larger fragment. To save time, 452 * we allocate a full sized block, then free the unused portion. 453 * If the file continues to grow, the `ffs_fragextend' call 454 * above will be able to grow it in place without further 455 * copying. If aberrant programs cause disk fragmentation to 456 * grow within 2% of the free reserve, we choose to begin 457 * optimizing for space. 458 */ 459 request = fs->fs_bsize; 460 if (fs->fs_cstotal.cs_nffree < 461 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 462 break; 463 464 if (ffs_log_changeopt) { 465 log(LOG_NOTICE, 466 "%s: optimization changed from TIME to SPACE\n", 467 fs->fs_fsmnt); 468 } 469 470 fs->fs_optim = FS_OPTSPACE; 471 break; 472 default: 473 printf("dev = 0x%llx, optim = %d, fs = %s\n", 474 (unsigned long long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 475 panic("ffs_realloccg: bad optim"); 476 /* NOTREACHED */ 477 } 478 bno = ffs_hashalloc(ip, cg, bpref, request, 0, ffs_alloccg); 479 if (bno > 0) { 480 if ((ip->i_ump->um_mountp->mnt_wapbl) && 481 (ITOV(ip)->v_type != VREG)) { 482 UFS_WAPBL_REGISTER_DEALLOCATION( 483 ip->i_ump->um_mountp, fsbtodb(fs, bprev), 484 osize); 485 } else { 486 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, 487 ip->i_number); 488 } 489 if (nsize < request) { 490 if ((ip->i_ump->um_mountp->mnt_wapbl) && 491 (ITOV(ip)->v_type != VREG)) { 492 UFS_WAPBL_REGISTER_DEALLOCATION( 493 ip->i_ump->um_mountp, 494 fsbtodb(fs, (bno + numfrags(fs, nsize))), 495 request - nsize); 496 } else 497 ffs_blkfree(fs, ip->i_devvp, 498 bno + numfrags(fs, nsize), 499 (long)(request - nsize), ip->i_number); 500 } 501 DIP_ADD(ip, blocks, btodb(nsize - osize)); 502 ip->i_flag |= IN_CHANGE | IN_UPDATE; 503 if (bpp != NULL) { 504 bp->b_blkno = fsbtodb(fs, bno); 505 allocbuf(bp, nsize, 1); 506 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize); 507 mutex_enter(bp->b_objlock); 508 KASSERT(!cv_has_waiters(&bp->b_done)); 509 bp->b_oflags |= BO_DONE; 510 mutex_exit(bp->b_objlock); 511 *bpp = bp; 512 } 513 if (blknop != NULL) { 514 *blknop = bno; 515 } 516 return (0); 517 } 518 mutex_exit(&ump->um_lock); 519 520 #if defined(QUOTA) || defined(QUOTA2) 521 /* 522 * Restore user's disk quota because allocation failed. 523 */ 524 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 525 #endif 526 if (bpp != NULL) { 527 brelse(bp, 0); 528 } 529 530 nospace: 531 /* 532 * no space available 533 */ 534 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); 535 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 536 return (ENOSPC); 537 } 538 539 /* 540 * Allocate an inode in the file system. 541 * 542 * If allocating a directory, use ffs_dirpref to select the inode. 543 * If allocating in a directory, the following hierarchy is followed: 544 * 1) allocate the preferred inode. 545 * 2) allocate an inode in the same cylinder group. 546 * 3) quadradically rehash into other cylinder groups, until an 547 * available inode is located. 548 * If no inode preference is given the following hierarchy is used 549 * to allocate an inode: 550 * 1) allocate an inode in cylinder group 0. 551 * 2) quadradically rehash into other cylinder groups, until an 552 * available inode is located. 553 * 554 * => um_lock not held upon entry or return 555 */ 556 int 557 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, 558 struct vnode **vpp) 559 { 560 struct ufsmount *ump; 561 struct inode *pip; 562 struct fs *fs; 563 struct inode *ip; 564 struct timespec ts; 565 ino_t ino, ipref; 566 int cg, error; 567 568 UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount); 569 570 *vpp = NULL; 571 pip = VTOI(pvp); 572 fs = pip->i_fs; 573 ump = pip->i_ump; 574 575 error = UFS_WAPBL_BEGIN(pvp->v_mount); 576 if (error) { 577 return error; 578 } 579 mutex_enter(&ump->um_lock); 580 if (fs->fs_cstotal.cs_nifree == 0) 581 goto noinodes; 582 583 if ((mode & IFMT) == IFDIR) 584 ipref = ffs_dirpref(pip); 585 else 586 ipref = pip->i_number; 587 if (ipref >= fs->fs_ncg * fs->fs_ipg) 588 ipref = 0; 589 cg = ino_to_cg(fs, ipref); 590 /* 591 * Track number of dirs created one after another 592 * in a same cg without intervening by files. 593 */ 594 if ((mode & IFMT) == IFDIR) { 595 if (fs->fs_contigdirs[cg] < 255) 596 fs->fs_contigdirs[cg]++; 597 } else { 598 if (fs->fs_contigdirs[cg] > 0) 599 fs->fs_contigdirs[cg]--; 600 } 601 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg); 602 if (ino == 0) 603 goto noinodes; 604 UFS_WAPBL_END(pvp->v_mount); 605 error = VFS_VGET(pvp->v_mount, ino, vpp); 606 if (error) { 607 int err; 608 err = UFS_WAPBL_BEGIN(pvp->v_mount); 609 if (err == 0) 610 ffs_vfree(pvp, ino, mode); 611 if (err == 0) 612 UFS_WAPBL_END(pvp->v_mount); 613 return (error); 614 } 615 KASSERT((*vpp)->v_type == VNON); 616 ip = VTOI(*vpp); 617 if (ip->i_mode) { 618 #if 0 619 printf("mode = 0%o, inum = %d, fs = %s\n", 620 ip->i_mode, ip->i_number, fs->fs_fsmnt); 621 #else 622 printf("dmode %x mode %x dgen %x gen %x\n", 623 DIP(ip, mode), ip->i_mode, 624 DIP(ip, gen), ip->i_gen); 625 printf("size %llx blocks %llx\n", 626 (long long)DIP(ip, size), (long long)DIP(ip, blocks)); 627 printf("ino %llu ipref %llu\n", (unsigned long long)ino, 628 (unsigned long long)ipref); 629 #if 0 630 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 631 (int)fs->fs_bsize, NOCRED, 0, &bp); 632 #endif 633 634 #endif 635 panic("ffs_valloc: dup alloc"); 636 } 637 if (DIP(ip, blocks)) { /* XXX */ 638 printf("free inode %s/%llu had %" PRId64 " blocks\n", 639 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks)); 640 DIP_ASSIGN(ip, blocks, 0); 641 } 642 ip->i_flag &= ~IN_SPACECOUNTED; 643 ip->i_flags = 0; 644 DIP_ASSIGN(ip, flags, 0); 645 /* 646 * Set up a new generation number for this inode. 647 */ 648 ip->i_gen++; 649 DIP_ASSIGN(ip, gen, ip->i_gen); 650 if (fs->fs_magic == FS_UFS2_MAGIC) { 651 vfs_timestamp(&ts); 652 ip->i_ffs2_birthtime = ts.tv_sec; 653 ip->i_ffs2_birthnsec = ts.tv_nsec; 654 } 655 return (0); 656 noinodes: 657 mutex_exit(&ump->um_lock); 658 UFS_WAPBL_END(pvp->v_mount); 659 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes"); 660 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 661 return (ENOSPC); 662 } 663 664 /* 665 * Find a cylinder group in which to place a directory. 666 * 667 * The policy implemented by this algorithm is to allocate a 668 * directory inode in the same cylinder group as its parent 669 * directory, but also to reserve space for its files inodes 670 * and data. Restrict the number of directories which may be 671 * allocated one after another in the same cylinder group 672 * without intervening allocation of files. 673 * 674 * If we allocate a first level directory then force allocation 675 * in another cylinder group. 676 */ 677 static ino_t 678 ffs_dirpref(struct inode *pip) 679 { 680 register struct fs *fs; 681 int cg, prefcg; 682 int64_t dirsize, cgsize, curdsz; 683 int avgifree, avgbfree, avgndir; 684 int minifree, minbfree, maxndir; 685 int mincg, minndir; 686 int maxcontigdirs; 687 688 KASSERT(mutex_owned(&pip->i_ump->um_lock)); 689 690 fs = pip->i_fs; 691 692 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 693 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 694 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 695 696 /* 697 * Force allocation in another cg if creating a first level dir. 698 */ 699 if (ITOV(pip)->v_vflag & VV_ROOT) { 700 prefcg = random() % fs->fs_ncg; 701 mincg = prefcg; 702 minndir = fs->fs_ipg; 703 for (cg = prefcg; cg < fs->fs_ncg; cg++) 704 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 705 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 706 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 707 mincg = cg; 708 minndir = fs->fs_cs(fs, cg).cs_ndir; 709 } 710 for (cg = 0; cg < prefcg; cg++) 711 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 712 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 713 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 714 mincg = cg; 715 minndir = fs->fs_cs(fs, cg).cs_ndir; 716 } 717 return ((ino_t)(fs->fs_ipg * mincg)); 718 } 719 720 /* 721 * Count various limits which used for 722 * optimal allocation of a directory inode. 723 */ 724 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 725 minifree = avgifree - fs->fs_ipg / 4; 726 if (minifree < 0) 727 minifree = 0; 728 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4; 729 if (minbfree < 0) 730 minbfree = 0; 731 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg; 732 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir; 733 if (avgndir != 0) { 734 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir; 735 if (dirsize < curdsz) 736 dirsize = curdsz; 737 } 738 if (cgsize < dirsize * 255) 739 maxcontigdirs = cgsize / dirsize; 740 else 741 maxcontigdirs = 255; 742 if (fs->fs_avgfpdir > 0) 743 maxcontigdirs = min(maxcontigdirs, 744 fs->fs_ipg / fs->fs_avgfpdir); 745 if (maxcontigdirs == 0) 746 maxcontigdirs = 1; 747 748 /* 749 * Limit number of dirs in one cg and reserve space for 750 * regular files, but only if we have no deficit in 751 * inodes or space. 752 */ 753 prefcg = ino_to_cg(fs, pip->i_number); 754 for (cg = prefcg; cg < fs->fs_ncg; cg++) 755 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 756 fs->fs_cs(fs, cg).cs_nifree >= minifree && 757 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 758 if (fs->fs_contigdirs[cg] < maxcontigdirs) 759 return ((ino_t)(fs->fs_ipg * cg)); 760 } 761 for (cg = 0; cg < prefcg; cg++) 762 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 763 fs->fs_cs(fs, cg).cs_nifree >= minifree && 764 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 765 if (fs->fs_contigdirs[cg] < maxcontigdirs) 766 return ((ino_t)(fs->fs_ipg * cg)); 767 } 768 /* 769 * This is a backstop when we are deficient in space. 770 */ 771 for (cg = prefcg; cg < fs->fs_ncg; cg++) 772 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 773 return ((ino_t)(fs->fs_ipg * cg)); 774 for (cg = 0; cg < prefcg; cg++) 775 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 776 break; 777 return ((ino_t)(fs->fs_ipg * cg)); 778 } 779 780 /* 781 * Select the desired position for the next block in a file. The file is 782 * logically divided into sections. The first section is composed of the 783 * direct blocks. Each additional section contains fs_maxbpg blocks. 784 * 785 * If no blocks have been allocated in the first section, the policy is to 786 * request a block in the same cylinder group as the inode that describes 787 * the file. If no blocks have been allocated in any other section, the 788 * policy is to place the section in a cylinder group with a greater than 789 * average number of free blocks. An appropriate cylinder group is found 790 * by using a rotor that sweeps the cylinder groups. When a new group of 791 * blocks is needed, the sweep begins in the cylinder group following the 792 * cylinder group from which the previous allocation was made. The sweep 793 * continues until a cylinder group with greater than the average number 794 * of free blocks is found. If the allocation is for the first block in an 795 * indirect block, the information on the previous allocation is unavailable; 796 * here a best guess is made based upon the logical block number being 797 * allocated. 798 * 799 * If a section is already partially allocated, the policy is to 800 * contiguously allocate fs_maxcontig blocks. The end of one of these 801 * contiguous blocks and the beginning of the next is laid out 802 * contigously if possible. 803 * 804 * => um_lock held on entry and exit 805 */ 806 daddr_t 807 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags, 808 int32_t *bap /* XXX ondisk32 */) 809 { 810 struct fs *fs; 811 int cg; 812 int avgbfree, startcg; 813 814 KASSERT(mutex_owned(&ip->i_ump->um_lock)); 815 816 fs = ip->i_fs; 817 818 /* 819 * If allocating a contiguous file with B_CONTIG, use the hints 820 * in the inode extentions to return the desired block. 821 * 822 * For metadata (indirect blocks) return the address of where 823 * the first indirect block resides - we'll scan for the next 824 * available slot if we need to allocate more than one indirect 825 * block. For data, return the address of the actual block 826 * relative to the address of the first data block. 827 */ 828 if (flags & B_CONTIG) { 829 KASSERT(ip->i_ffs_first_data_blk != 0); 830 KASSERT(ip->i_ffs_first_indir_blk != 0); 831 if (flags & B_METAONLY) 832 return ip->i_ffs_first_indir_blk; 833 else 834 return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); 835 } 836 837 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 838 if (lbn < NDADDR + NINDIR(fs)) { 839 cg = ino_to_cg(fs, ip->i_number); 840 return (cgbase(fs, cg) + fs->fs_frag); 841 } 842 /* 843 * Find a cylinder with greater than average number of 844 * unused data blocks. 845 */ 846 if (indx == 0 || bap[indx - 1] == 0) 847 startcg = 848 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 849 else 850 startcg = dtog(fs, 851 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 852 startcg %= fs->fs_ncg; 853 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 854 for (cg = startcg; cg < fs->fs_ncg; cg++) 855 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 856 return (cgbase(fs, cg) + fs->fs_frag); 857 } 858 for (cg = 0; cg < startcg; cg++) 859 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 860 return (cgbase(fs, cg) + fs->fs_frag); 861 } 862 return (0); 863 } 864 /* 865 * We just always try to lay things out contiguously. 866 */ 867 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 868 } 869 870 daddr_t 871 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags, 872 int64_t *bap) 873 { 874 struct fs *fs; 875 int cg; 876 int avgbfree, startcg; 877 878 KASSERT(mutex_owned(&ip->i_ump->um_lock)); 879 880 fs = ip->i_fs; 881 882 /* 883 * If allocating a contiguous file with B_CONTIG, use the hints 884 * in the inode extentions to return the desired block. 885 * 886 * For metadata (indirect blocks) return the address of where 887 * the first indirect block resides - we'll scan for the next 888 * available slot if we need to allocate more than one indirect 889 * block. For data, return the address of the actual block 890 * relative to the address of the first data block. 891 */ 892 if (flags & B_CONTIG) { 893 KASSERT(ip->i_ffs_first_data_blk != 0); 894 KASSERT(ip->i_ffs_first_indir_blk != 0); 895 if (flags & B_METAONLY) 896 return ip->i_ffs_first_indir_blk; 897 else 898 return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); 899 } 900 901 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 902 if (lbn < NDADDR + NINDIR(fs)) { 903 cg = ino_to_cg(fs, ip->i_number); 904 return (cgbase(fs, cg) + fs->fs_frag); 905 } 906 /* 907 * Find a cylinder with greater than average number of 908 * unused data blocks. 909 */ 910 if (indx == 0 || bap[indx - 1] == 0) 911 startcg = 912 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 913 else 914 startcg = dtog(fs, 915 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 916 startcg %= fs->fs_ncg; 917 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 918 for (cg = startcg; cg < fs->fs_ncg; cg++) 919 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 920 return (cgbase(fs, cg) + fs->fs_frag); 921 } 922 for (cg = 0; cg < startcg; cg++) 923 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 924 return (cgbase(fs, cg) + fs->fs_frag); 925 } 926 return (0); 927 } 928 /* 929 * We just always try to lay things out contiguously. 930 */ 931 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 932 } 933 934 935 /* 936 * Implement the cylinder overflow algorithm. 937 * 938 * The policy implemented by this algorithm is: 939 * 1) allocate the block in its requested cylinder group. 940 * 2) quadradically rehash on the cylinder group number. 941 * 3) brute force search for a free block. 942 * 943 * => called with um_lock held 944 * => returns with um_lock released on success, held on failure 945 * (*allocator releases lock on success, retains lock on failure) 946 */ 947 /*VARARGS5*/ 948 static daddr_t 949 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, 950 int size /* size for data blocks, mode for inodes */, 951 int flags, daddr_t (*allocator)(struct inode *, int, daddr_t, int, int)) 952 { 953 struct fs *fs; 954 daddr_t result; 955 int i, icg = cg; 956 957 fs = ip->i_fs; 958 /* 959 * 1: preferred cylinder group 960 */ 961 result = (*allocator)(ip, cg, pref, size, flags); 962 if (result) 963 return (result); 964 965 if (flags & B_CONTIG) 966 return (result); 967 /* 968 * 2: quadratic rehash 969 */ 970 for (i = 1; i < fs->fs_ncg; i *= 2) { 971 cg += i; 972 if (cg >= fs->fs_ncg) 973 cg -= fs->fs_ncg; 974 result = (*allocator)(ip, cg, 0, size, flags); 975 if (result) 976 return (result); 977 } 978 /* 979 * 3: brute force search 980 * Note that we start at i == 2, since 0 was checked initially, 981 * and 1 is always checked in the quadratic rehash. 982 */ 983 cg = (icg + 2) % fs->fs_ncg; 984 for (i = 2; i < fs->fs_ncg; i++) { 985 result = (*allocator)(ip, cg, 0, size, flags); 986 if (result) 987 return (result); 988 cg++; 989 if (cg == fs->fs_ncg) 990 cg = 0; 991 } 992 return (0); 993 } 994 995 /* 996 * Determine whether a fragment can be extended. 997 * 998 * Check to see if the necessary fragments are available, and 999 * if they are, allocate them. 1000 * 1001 * => called with um_lock held 1002 * => returns with um_lock released on success, held on failure 1003 */ 1004 static daddr_t 1005 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize) 1006 { 1007 struct ufsmount *ump; 1008 struct fs *fs; 1009 struct cg *cgp; 1010 struct buf *bp; 1011 daddr_t bno; 1012 int frags, bbase; 1013 int i, error; 1014 u_int8_t *blksfree; 1015 1016 fs = ip->i_fs; 1017 ump = ip->i_ump; 1018 1019 KASSERT(mutex_owned(&ump->um_lock)); 1020 1021 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1022 return (0); 1023 frags = numfrags(fs, nsize); 1024 bbase = fragnum(fs, bprev); 1025 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1026 /* cannot extend across a block boundary */ 1027 return (0); 1028 } 1029 mutex_exit(&ump->um_lock); 1030 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1031 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1032 if (error) 1033 goto fail; 1034 cgp = (struct cg *)bp->b_data; 1035 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) 1036 goto fail; 1037 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs)); 1038 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1039 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1040 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs)); 1041 bno = dtogd(fs, bprev); 1042 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)); 1043 for (i = numfrags(fs, osize); i < frags; i++) 1044 if (isclr(blksfree, bno + i)) 1045 goto fail; 1046 /* 1047 * the current fragment can be extended 1048 * deduct the count on fragment being extended into 1049 * increase the count on the remaining fragment (if any) 1050 * allocate the extended piece 1051 */ 1052 for (i = frags; i < fs->fs_frag - bbase; i++) 1053 if (isclr(blksfree, bno + i)) 1054 break; 1055 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs)); 1056 if (i != frags) 1057 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs)); 1058 mutex_enter(&ump->um_lock); 1059 for (i = numfrags(fs, osize); i < frags; i++) { 1060 clrbit(blksfree, bno + i); 1061 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs)); 1062 fs->fs_cstotal.cs_nffree--; 1063 fs->fs_cs(fs, cg).cs_nffree--; 1064 } 1065 fs->fs_fmod = 1; 1066 ACTIVECG_CLR(fs, cg); 1067 mutex_exit(&ump->um_lock); 1068 bdwrite(bp); 1069 return (bprev); 1070 1071 fail: 1072 brelse(bp, 0); 1073 mutex_enter(&ump->um_lock); 1074 return (0); 1075 } 1076 1077 /* 1078 * Determine whether a block can be allocated. 1079 * 1080 * Check to see if a block of the appropriate size is available, 1081 * and if it is, allocate it. 1082 */ 1083 static daddr_t 1084 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags) 1085 { 1086 struct ufsmount *ump; 1087 struct fs *fs = ip->i_fs; 1088 struct cg *cgp; 1089 struct buf *bp; 1090 int32_t bno; 1091 daddr_t blkno; 1092 int error, frags, allocsiz, i; 1093 u_int8_t *blksfree; 1094 #ifdef FFS_EI 1095 const int needswap = UFS_FSNEEDSWAP(fs); 1096 #endif 1097 1098 ump = ip->i_ump; 1099 1100 KASSERT(mutex_owned(&ump->um_lock)); 1101 1102 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1103 return (0); 1104 mutex_exit(&ump->um_lock); 1105 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1106 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1107 if (error) 1108 goto fail; 1109 cgp = (struct cg *)bp->b_data; 1110 if (!cg_chkmagic(cgp, needswap) || 1111 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1112 goto fail; 1113 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1114 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1115 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1116 cgp->cg_time = ufs_rw64(time_second, needswap); 1117 if (size == fs->fs_bsize) { 1118 mutex_enter(&ump->um_lock); 1119 blkno = ffs_alloccgblk(ip, bp, bpref, flags); 1120 ACTIVECG_CLR(fs, cg); 1121 mutex_exit(&ump->um_lock); 1122 bdwrite(bp); 1123 return (blkno); 1124 } 1125 /* 1126 * check to see if any fragments are already available 1127 * allocsiz is the size which will be allocated, hacking 1128 * it down to a smaller size if necessary 1129 */ 1130 blksfree = cg_blksfree(cgp, needswap); 1131 frags = numfrags(fs, size); 1132 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1133 if (cgp->cg_frsum[allocsiz] != 0) 1134 break; 1135 if (allocsiz == fs->fs_frag) { 1136 /* 1137 * no fragments were available, so a block will be 1138 * allocated, and hacked up 1139 */ 1140 if (cgp->cg_cs.cs_nbfree == 0) 1141 goto fail; 1142 mutex_enter(&ump->um_lock); 1143 blkno = ffs_alloccgblk(ip, bp, bpref, flags); 1144 bno = dtogd(fs, blkno); 1145 for (i = frags; i < fs->fs_frag; i++) 1146 setbit(blksfree, bno + i); 1147 i = fs->fs_frag - frags; 1148 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1149 fs->fs_cstotal.cs_nffree += i; 1150 fs->fs_cs(fs, cg).cs_nffree += i; 1151 fs->fs_fmod = 1; 1152 ufs_add32(cgp->cg_frsum[i], 1, needswap); 1153 ACTIVECG_CLR(fs, cg); 1154 mutex_exit(&ump->um_lock); 1155 bdwrite(bp); 1156 return (blkno); 1157 } 1158 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1159 #if 0 1160 /* 1161 * XXX fvdl mapsearch will panic, and never return -1 1162 * also: returning NULL as daddr_t ? 1163 */ 1164 if (bno < 0) 1165 goto fail; 1166 #endif 1167 for (i = 0; i < frags; i++) 1168 clrbit(blksfree, bno + i); 1169 mutex_enter(&ump->um_lock); 1170 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap); 1171 fs->fs_cstotal.cs_nffree -= frags; 1172 fs->fs_cs(fs, cg).cs_nffree -= frags; 1173 fs->fs_fmod = 1; 1174 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap); 1175 if (frags != allocsiz) 1176 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap); 1177 blkno = cgbase(fs, cg) + bno; 1178 ACTIVECG_CLR(fs, cg); 1179 mutex_exit(&ump->um_lock); 1180 bdwrite(bp); 1181 return blkno; 1182 1183 fail: 1184 brelse(bp, 0); 1185 mutex_enter(&ump->um_lock); 1186 return (0); 1187 } 1188 1189 /* 1190 * Allocate a block in a cylinder group. 1191 * 1192 * This algorithm implements the following policy: 1193 * 1) allocate the requested block. 1194 * 2) allocate a rotationally optimal block in the same cylinder. 1195 * 3) allocate the next available block on the block rotor for the 1196 * specified cylinder group. 1197 * Note that this routine only allocates fs_bsize blocks; these 1198 * blocks may be fragmented by the routine that allocates them. 1199 */ 1200 static daddr_t 1201 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags) 1202 { 1203 struct ufsmount *ump; 1204 struct fs *fs = ip->i_fs; 1205 struct cg *cgp; 1206 int cg; 1207 daddr_t blkno; 1208 int32_t bno; 1209 u_int8_t *blksfree; 1210 #ifdef FFS_EI 1211 const int needswap = UFS_FSNEEDSWAP(fs); 1212 #endif 1213 1214 ump = ip->i_ump; 1215 1216 KASSERT(mutex_owned(&ump->um_lock)); 1217 1218 cgp = (struct cg *)bp->b_data; 1219 blksfree = cg_blksfree(cgp, needswap); 1220 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) { 1221 bpref = ufs_rw32(cgp->cg_rotor, needswap); 1222 } else { 1223 bpref = blknum(fs, bpref); 1224 bno = dtogd(fs, bpref); 1225 /* 1226 * if the requested block is available, use it 1227 */ 1228 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1229 goto gotit; 1230 /* 1231 * if the requested data block isn't available and we are 1232 * trying to allocate a contiguous file, return an error. 1233 */ 1234 if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG) 1235 return (0); 1236 } 1237 1238 /* 1239 * Take the next available block in this cylinder group. 1240 */ 1241 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1242 if (bno < 0) 1243 return (0); 1244 cgp->cg_rotor = ufs_rw32(bno, needswap); 1245 gotit: 1246 blkno = fragstoblks(fs, bno); 1247 ffs_clrblock(fs, blksfree, blkno); 1248 ffs_clusteracct(fs, cgp, blkno, -1); 1249 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1250 fs->fs_cstotal.cs_nbfree--; 1251 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--; 1252 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1253 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1254 int cylno; 1255 cylno = old_cbtocylno(fs, bno); 1256 KASSERT(cylno >= 0); 1257 KASSERT(cylno < fs->fs_old_ncyl); 1258 KASSERT(old_cbtorpos(fs, bno) >= 0); 1259 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos); 1260 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1, 1261 needswap); 1262 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap); 1263 } 1264 fs->fs_fmod = 1; 1265 cg = ufs_rw32(cgp->cg_cgx, needswap); 1266 blkno = cgbase(fs, cg) + bno; 1267 return (blkno); 1268 } 1269 1270 /* 1271 * Determine whether an inode can be allocated. 1272 * 1273 * Check to see if an inode is available, and if it is, 1274 * allocate it using the following policy: 1275 * 1) allocate the requested inode. 1276 * 2) allocate the next available inode after the requested 1277 * inode in the specified cylinder group. 1278 */ 1279 static daddr_t 1280 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags) 1281 { 1282 struct ufsmount *ump = ip->i_ump; 1283 struct fs *fs = ip->i_fs; 1284 struct cg *cgp; 1285 struct buf *bp, *ibp; 1286 u_int8_t *inosused; 1287 int error, start, len, loc, map, i; 1288 int32_t initediblk; 1289 daddr_t nalloc; 1290 struct ufs2_dinode *dp2; 1291 #ifdef FFS_EI 1292 const int needswap = UFS_FSNEEDSWAP(fs); 1293 #endif 1294 1295 KASSERT(mutex_owned(&ump->um_lock)); 1296 UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp); 1297 1298 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1299 return (0); 1300 mutex_exit(&ump->um_lock); 1301 ibp = NULL; 1302 initediblk = -1; 1303 retry: 1304 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1305 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1306 if (error) 1307 goto fail; 1308 cgp = (struct cg *)bp->b_data; 1309 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) 1310 goto fail; 1311 1312 if (ibp != NULL && 1313 initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) { 1314 /* Another thread allocated more inodes so we retry the test. */ 1315 brelse(ibp, 0); 1316 ibp = NULL; 1317 } 1318 /* 1319 * Check to see if we need to initialize more inodes. 1320 */ 1321 if (fs->fs_magic == FS_UFS2_MAGIC && ibp == NULL) { 1322 initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 1323 nalloc = fs->fs_ipg - ufs_rw32(cgp->cg_cs.cs_nifree, needswap); 1324 if (nalloc + INOPB(fs) > initediblk && 1325 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) { 1326 /* 1327 * We have to release the cg buffer here to prevent 1328 * a deadlock when reading the inode block will 1329 * run a copy-on-write that might use this cg. 1330 */ 1331 brelse(bp, 0); 1332 bp = NULL; 1333 error = ffs_getblk(ip->i_devvp, fsbtodb(fs, 1334 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)), 1335 FFS_NOBLK, fs->fs_bsize, false, &ibp); 1336 if (error) 1337 goto fail; 1338 goto retry; 1339 } 1340 } 1341 1342 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1343 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1344 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1345 cgp->cg_time = ufs_rw64(time_second, needswap); 1346 inosused = cg_inosused(cgp, needswap); 1347 if (ipref) { 1348 ipref %= fs->fs_ipg; 1349 if (isclr(inosused, ipref)) 1350 goto gotit; 1351 } 1352 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY; 1353 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap), 1354 NBBY); 1355 loc = skpc(0xff, len, &inosused[start]); 1356 if (loc == 0) { 1357 len = start + 1; 1358 start = 0; 1359 loc = skpc(0xff, len, &inosused[0]); 1360 if (loc == 0) { 1361 printf("cg = %d, irotor = %d, fs = %s\n", 1362 cg, ufs_rw32(cgp->cg_irotor, needswap), 1363 fs->fs_fsmnt); 1364 panic("ffs_nodealloccg: map corrupted"); 1365 /* NOTREACHED */ 1366 } 1367 } 1368 i = start + len - loc; 1369 map = inosused[i] ^ 0xff; 1370 if (map == 0) { 1371 printf("fs = %s\n", fs->fs_fsmnt); 1372 panic("ffs_nodealloccg: block not in map"); 1373 } 1374 ipref = i * NBBY + ffs(map) - 1; 1375 cgp->cg_irotor = ufs_rw32(ipref, needswap); 1376 gotit: 1377 UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref, 1378 mode); 1379 /* 1380 * Check to see if we need to initialize more inodes. 1381 */ 1382 if (ibp != NULL) { 1383 KASSERT(initediblk == ufs_rw32(cgp->cg_initediblk, needswap)); 1384 memset(ibp->b_data, 0, fs->fs_bsize); 1385 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1386 for (i = 0; i < INOPB(fs); i++) { 1387 /* 1388 * Don't bother to swap, it's supposed to be 1389 * random, after all. 1390 */ 1391 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1; 1392 dp2++; 1393 } 1394 initediblk += INOPB(fs); 1395 cgp->cg_initediblk = ufs_rw32(initediblk, needswap); 1396 } 1397 1398 mutex_enter(&ump->um_lock); 1399 ACTIVECG_CLR(fs, cg); 1400 setbit(inosused, ipref); 1401 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap); 1402 fs->fs_cstotal.cs_nifree--; 1403 fs->fs_cs(fs, cg).cs_nifree--; 1404 fs->fs_fmod = 1; 1405 if ((mode & IFMT) == IFDIR) { 1406 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap); 1407 fs->fs_cstotal.cs_ndir++; 1408 fs->fs_cs(fs, cg).cs_ndir++; 1409 } 1410 mutex_exit(&ump->um_lock); 1411 if (ibp != NULL) { 1412 bwrite(bp); 1413 bawrite(ibp); 1414 } else 1415 bdwrite(bp); 1416 return (cg * fs->fs_ipg + ipref); 1417 fail: 1418 if (bp != NULL) 1419 brelse(bp, 0); 1420 if (ibp != NULL) 1421 brelse(ibp, 0); 1422 mutex_enter(&ump->um_lock); 1423 return (0); 1424 } 1425 1426 /* 1427 * Allocate a block or fragment. 1428 * 1429 * The specified block or fragment is removed from the 1430 * free map, possibly fragmenting a block in the process. 1431 * 1432 * This implementation should mirror fs_blkfree 1433 * 1434 * => um_lock not held on entry or exit 1435 */ 1436 int 1437 ffs_blkalloc(struct inode *ip, daddr_t bno, long size) 1438 { 1439 int error; 1440 1441 error = ffs_check_bad_allocation(__func__, ip->i_fs, bno, size, 1442 ip->i_dev, ip->i_uid); 1443 if (error) 1444 return error; 1445 1446 return ffs_blkalloc_ump(ip->i_ump, bno, size); 1447 } 1448 1449 int 1450 ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size) 1451 { 1452 struct fs *fs = ump->um_fs; 1453 struct cg *cgp; 1454 struct buf *bp; 1455 int32_t fragno, cgbno; 1456 int i, error, cg, blk, frags, bbase; 1457 u_int8_t *blksfree; 1458 const int needswap = UFS_FSNEEDSWAP(fs); 1459 1460 KASSERT((u_int)size <= fs->fs_bsize && fragoff(fs, size) == 0 && 1461 fragnum(fs, bno) + numfrags(fs, size) <= fs->fs_frag); 1462 KASSERT(bno < fs->fs_size); 1463 1464 cg = dtog(fs, bno); 1465 error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)), 1466 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1467 if (error) { 1468 brelse(bp, 0); 1469 return error; 1470 } 1471 cgp = (struct cg *)bp->b_data; 1472 if (!cg_chkmagic(cgp, needswap)) { 1473 brelse(bp, 0); 1474 return EIO; 1475 } 1476 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1477 cgp->cg_time = ufs_rw64(time_second, needswap); 1478 cgbno = dtogd(fs, bno); 1479 blksfree = cg_blksfree(cgp, needswap); 1480 1481 mutex_enter(&ump->um_lock); 1482 if (size == fs->fs_bsize) { 1483 fragno = fragstoblks(fs, cgbno); 1484 if (!ffs_isblock(fs, blksfree, fragno)) { 1485 mutex_exit(&ump->um_lock); 1486 brelse(bp, 0); 1487 return EBUSY; 1488 } 1489 ffs_clrblock(fs, blksfree, fragno); 1490 ffs_clusteracct(fs, cgp, fragno, -1); 1491 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1492 fs->fs_cstotal.cs_nbfree--; 1493 fs->fs_cs(fs, cg).cs_nbfree--; 1494 } else { 1495 bbase = cgbno - fragnum(fs, cgbno); 1496 1497 frags = numfrags(fs, size); 1498 for (i = 0; i < frags; i++) { 1499 if (isclr(blksfree, cgbno + i)) { 1500 mutex_exit(&ump->um_lock); 1501 brelse(bp, 0); 1502 return EBUSY; 1503 } 1504 } 1505 /* 1506 * if a complete block is being split, account for it 1507 */ 1508 fragno = fragstoblks(fs, bbase); 1509 if (ffs_isblock(fs, blksfree, fragno)) { 1510 ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap); 1511 fs->fs_cstotal.cs_nffree += fs->fs_frag; 1512 fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag; 1513 ffs_clusteracct(fs, cgp, fragno, -1); 1514 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1515 fs->fs_cstotal.cs_nbfree--; 1516 fs->fs_cs(fs, cg).cs_nbfree--; 1517 } 1518 /* 1519 * decrement the counts associated with the old frags 1520 */ 1521 blk = blkmap(fs, blksfree, bbase); 1522 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1523 /* 1524 * allocate the fragment 1525 */ 1526 for (i = 0; i < frags; i++) { 1527 clrbit(blksfree, cgbno + i); 1528 } 1529 ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap); 1530 fs->fs_cstotal.cs_nffree -= i; 1531 fs->fs_cs(fs, cg).cs_nffree -= i; 1532 /* 1533 * add back in counts associated with the new frags 1534 */ 1535 blk = blkmap(fs, blksfree, bbase); 1536 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1537 } 1538 fs->fs_fmod = 1; 1539 ACTIVECG_CLR(fs, cg); 1540 mutex_exit(&ump->um_lock); 1541 bdwrite(bp); 1542 return 0; 1543 } 1544 1545 /* 1546 * Free a block or fragment. 1547 * 1548 * The specified block or fragment is placed back in the 1549 * free map. If a fragment is deallocated, a possible 1550 * block reassembly is checked. 1551 * 1552 * => um_lock not held on entry or exit 1553 */ 1554 void 1555 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1556 ino_t inum) 1557 { 1558 struct cg *cgp; 1559 struct buf *bp; 1560 struct ufsmount *ump; 1561 daddr_t cgblkno; 1562 int error, cg; 1563 dev_t dev; 1564 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1565 #ifdef FFS_EI 1566 const int needswap = UFS_FSNEEDSWAP(fs); 1567 #endif 1568 1569 KASSERT(!devvp_is_snapshot); 1570 1571 cg = dtog(fs, bno); 1572 dev = devvp->v_rdev; 1573 ump = VFSTOUFS(devvp->v_specmountpoint); 1574 KASSERT(fs == ump->um_fs); 1575 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1576 if (ffs_snapblkfree(fs, devvp, bno, size, inum)) 1577 return; 1578 1579 error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1580 if (error) 1581 return; 1582 1583 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1584 NOCRED, B_MODIFY, &bp); 1585 if (error) { 1586 brelse(bp, 0); 1587 return; 1588 } 1589 cgp = (struct cg *)bp->b_data; 1590 if (!cg_chkmagic(cgp, needswap)) { 1591 brelse(bp, 0); 1592 return; 1593 } 1594 1595 ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1596 1597 bdwrite(bp); 1598 } 1599 1600 /* 1601 * Free a block or fragment from a snapshot cg copy. 1602 * 1603 * The specified block or fragment is placed back in the 1604 * free map. If a fragment is deallocated, a possible 1605 * block reassembly is checked. 1606 * 1607 * => um_lock not held on entry or exit 1608 */ 1609 void 1610 ffs_blkfree_snap(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1611 ino_t inum) 1612 { 1613 struct cg *cgp; 1614 struct buf *bp; 1615 struct ufsmount *ump; 1616 daddr_t cgblkno; 1617 int error, cg; 1618 dev_t dev; 1619 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1620 #ifdef FFS_EI 1621 const int needswap = UFS_FSNEEDSWAP(fs); 1622 #endif 1623 1624 KASSERT(devvp_is_snapshot); 1625 1626 cg = dtog(fs, bno); 1627 dev = VTOI(devvp)->i_devvp->v_rdev; 1628 ump = VFSTOUFS(devvp->v_mount); 1629 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1630 1631 error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1632 if (error) 1633 return; 1634 1635 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1636 NOCRED, B_MODIFY, &bp); 1637 if (error) { 1638 brelse(bp, 0); 1639 return; 1640 } 1641 cgp = (struct cg *)bp->b_data; 1642 if (!cg_chkmagic(cgp, needswap)) { 1643 brelse(bp, 0); 1644 return; 1645 } 1646 1647 ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1648 1649 bdwrite(bp); 1650 } 1651 1652 static void 1653 ffs_blkfree_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 1654 struct buf *bp, daddr_t bno, long size, bool devvp_is_snapshot) 1655 { 1656 struct cg *cgp; 1657 int32_t fragno, cgbno; 1658 int i, cg, blk, frags, bbase; 1659 u_int8_t *blksfree; 1660 const int needswap = UFS_FSNEEDSWAP(fs); 1661 1662 cg = dtog(fs, bno); 1663 cgp = (struct cg *)bp->b_data; 1664 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1665 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1666 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1667 cgp->cg_time = ufs_rw64(time_second, needswap); 1668 cgbno = dtogd(fs, bno); 1669 blksfree = cg_blksfree(cgp, needswap); 1670 mutex_enter(&ump->um_lock); 1671 if (size == fs->fs_bsize) { 1672 fragno = fragstoblks(fs, cgbno); 1673 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1674 if (devvp_is_snapshot) { 1675 mutex_exit(&ump->um_lock); 1676 return; 1677 } 1678 printf("dev = 0x%llx, block = %" PRId64 ", fs = %s\n", 1679 (unsigned long long)dev, bno, fs->fs_fsmnt); 1680 panic("blkfree: freeing free block"); 1681 } 1682 ffs_setblock(fs, blksfree, fragno); 1683 ffs_clusteracct(fs, cgp, fragno, 1); 1684 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1685 fs->fs_cstotal.cs_nbfree++; 1686 fs->fs_cs(fs, cg).cs_nbfree++; 1687 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1688 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1689 i = old_cbtocylno(fs, cgbno); 1690 KASSERT(i >= 0); 1691 KASSERT(i < fs->fs_old_ncyl); 1692 KASSERT(old_cbtorpos(fs, cgbno) >= 0); 1693 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos); 1694 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1, 1695 needswap); 1696 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1697 } 1698 } else { 1699 bbase = cgbno - fragnum(fs, cgbno); 1700 /* 1701 * decrement the counts associated with the old frags 1702 */ 1703 blk = blkmap(fs, blksfree, bbase); 1704 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1705 /* 1706 * deallocate the fragment 1707 */ 1708 frags = numfrags(fs, size); 1709 for (i = 0; i < frags; i++) { 1710 if (isset(blksfree, cgbno + i)) { 1711 printf("dev = 0x%llx, block = %" PRId64 1712 ", fs = %s\n", 1713 (unsigned long long)dev, bno + i, 1714 fs->fs_fsmnt); 1715 panic("blkfree: freeing free frag"); 1716 } 1717 setbit(blksfree, cgbno + i); 1718 } 1719 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1720 fs->fs_cstotal.cs_nffree += i; 1721 fs->fs_cs(fs, cg).cs_nffree += i; 1722 /* 1723 * add back in counts associated with the new frags 1724 */ 1725 blk = blkmap(fs, blksfree, bbase); 1726 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1727 /* 1728 * if a complete block has been reassembled, account for it 1729 */ 1730 fragno = fragstoblks(fs, bbase); 1731 if (ffs_isblock(fs, blksfree, fragno)) { 1732 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap); 1733 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1734 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1735 ffs_clusteracct(fs, cgp, fragno, 1); 1736 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1737 fs->fs_cstotal.cs_nbfree++; 1738 fs->fs_cs(fs, cg).cs_nbfree++; 1739 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1740 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1741 i = old_cbtocylno(fs, bbase); 1742 KASSERT(i >= 0); 1743 KASSERT(i < fs->fs_old_ncyl); 1744 KASSERT(old_cbtorpos(fs, bbase) >= 0); 1745 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos); 1746 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, 1747 bbase)], 1, needswap); 1748 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1749 } 1750 } 1751 } 1752 fs->fs_fmod = 1; 1753 ACTIVECG_CLR(fs, cg); 1754 mutex_exit(&ump->um_lock); 1755 } 1756 1757 /* 1758 * Free an inode. 1759 */ 1760 int 1761 ffs_vfree(struct vnode *vp, ino_t ino, int mode) 1762 { 1763 1764 return ffs_freefile(vp->v_mount, ino, mode); 1765 } 1766 1767 /* 1768 * Do the actual free operation. 1769 * The specified inode is placed back in the free map. 1770 * 1771 * => um_lock not held on entry or exit 1772 */ 1773 int 1774 ffs_freefile(struct mount *mp, ino_t ino, int mode) 1775 { 1776 struct ufsmount *ump = VFSTOUFS(mp); 1777 struct fs *fs = ump->um_fs; 1778 struct vnode *devvp; 1779 struct cg *cgp; 1780 struct buf *bp; 1781 int error, cg; 1782 daddr_t cgbno; 1783 dev_t dev; 1784 #ifdef FFS_EI 1785 const int needswap = UFS_FSNEEDSWAP(fs); 1786 #endif 1787 1788 cg = ino_to_cg(fs, ino); 1789 devvp = ump->um_devvp; 1790 dev = devvp->v_rdev; 1791 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1792 1793 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1794 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s", 1795 (long long)dev, (unsigned long long)ino, fs->fs_fsmnt); 1796 error = bread(devvp, cgbno, (int)fs->fs_cgsize, 1797 NOCRED, B_MODIFY, &bp); 1798 if (error) { 1799 brelse(bp, 0); 1800 return (error); 1801 } 1802 cgp = (struct cg *)bp->b_data; 1803 if (!cg_chkmagic(cgp, needswap)) { 1804 brelse(bp, 0); 1805 return (0); 1806 } 1807 1808 ffs_freefile_common(ump, fs, dev, bp, ino, mode, false); 1809 1810 bdwrite(bp); 1811 1812 return 0; 1813 } 1814 1815 int 1816 ffs_freefile_snap(struct fs *fs, struct vnode *devvp, ino_t ino, int mode) 1817 { 1818 struct ufsmount *ump; 1819 struct cg *cgp; 1820 struct buf *bp; 1821 int error, cg; 1822 daddr_t cgbno; 1823 dev_t dev; 1824 #ifdef FFS_EI 1825 const int needswap = UFS_FSNEEDSWAP(fs); 1826 #endif 1827 1828 KASSERT(devvp->v_type != VBLK); 1829 1830 cg = ino_to_cg(fs, ino); 1831 dev = VTOI(devvp)->i_devvp->v_rdev; 1832 ump = VFSTOUFS(devvp->v_mount); 1833 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1834 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1835 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s", 1836 (unsigned long long)dev, (unsigned long long)ino, 1837 fs->fs_fsmnt); 1838 error = bread(devvp, cgbno, (int)fs->fs_cgsize, 1839 NOCRED, B_MODIFY, &bp); 1840 if (error) { 1841 brelse(bp, 0); 1842 return (error); 1843 } 1844 cgp = (struct cg *)bp->b_data; 1845 if (!cg_chkmagic(cgp, needswap)) { 1846 brelse(bp, 0); 1847 return (0); 1848 } 1849 ffs_freefile_common(ump, fs, dev, bp, ino, mode, true); 1850 1851 bdwrite(bp); 1852 1853 return 0; 1854 } 1855 1856 static void 1857 ffs_freefile_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 1858 struct buf *bp, ino_t ino, int mode, bool devvp_is_snapshot) 1859 { 1860 int cg; 1861 struct cg *cgp; 1862 u_int8_t *inosused; 1863 #ifdef FFS_EI 1864 const int needswap = UFS_FSNEEDSWAP(fs); 1865 #endif 1866 1867 cg = ino_to_cg(fs, ino); 1868 cgp = (struct cg *)bp->b_data; 1869 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1870 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1871 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1872 cgp->cg_time = ufs_rw64(time_second, needswap); 1873 inosused = cg_inosused(cgp, needswap); 1874 ino %= fs->fs_ipg; 1875 if (isclr(inosused, ino)) { 1876 printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n", 1877 (unsigned long long)dev, (unsigned long long)ino + 1878 cg * fs->fs_ipg, fs->fs_fsmnt); 1879 if (fs->fs_ronly == 0) 1880 panic("ifree: freeing free inode"); 1881 } 1882 clrbit(inosused, ino); 1883 if (!devvp_is_snapshot) 1884 UFS_WAPBL_UNREGISTER_INODE(ump->um_mountp, 1885 ino + cg * fs->fs_ipg, mode); 1886 if (ino < ufs_rw32(cgp->cg_irotor, needswap)) 1887 cgp->cg_irotor = ufs_rw32(ino, needswap); 1888 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap); 1889 mutex_enter(&ump->um_lock); 1890 fs->fs_cstotal.cs_nifree++; 1891 fs->fs_cs(fs, cg).cs_nifree++; 1892 if ((mode & IFMT) == IFDIR) { 1893 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap); 1894 fs->fs_cstotal.cs_ndir--; 1895 fs->fs_cs(fs, cg).cs_ndir--; 1896 } 1897 fs->fs_fmod = 1; 1898 ACTIVECG_CLR(fs, cg); 1899 mutex_exit(&ump->um_lock); 1900 } 1901 1902 /* 1903 * Check to see if a file is free. 1904 */ 1905 int 1906 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino) 1907 { 1908 struct cg *cgp; 1909 struct buf *bp; 1910 daddr_t cgbno; 1911 int ret, cg; 1912 u_int8_t *inosused; 1913 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1914 1915 KASSERT(devvp_is_snapshot); 1916 1917 cg = ino_to_cg(fs, ino); 1918 if (devvp_is_snapshot) 1919 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1920 else 1921 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1922 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1923 return 1; 1924 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, 0, &bp)) { 1925 brelse(bp, 0); 1926 return 1; 1927 } 1928 cgp = (struct cg *)bp->b_data; 1929 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) { 1930 brelse(bp, 0); 1931 return 1; 1932 } 1933 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs)); 1934 ino %= fs->fs_ipg; 1935 ret = isclr(inosused, ino); 1936 brelse(bp, 0); 1937 return ret; 1938 } 1939 1940 /* 1941 * Find a block of the specified size in the specified cylinder group. 1942 * 1943 * It is a panic if a request is made to find a block if none are 1944 * available. 1945 */ 1946 static int32_t 1947 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz) 1948 { 1949 int32_t bno; 1950 int start, len, loc, i; 1951 int blk, field, subfield, pos; 1952 int ostart, olen; 1953 u_int8_t *blksfree; 1954 #ifdef FFS_EI 1955 const int needswap = UFS_FSNEEDSWAP(fs); 1956 #endif 1957 1958 /* KASSERT(mutex_owned(&ump->um_lock)); */ 1959 1960 /* 1961 * find the fragment by searching through the free block 1962 * map for an appropriate bit pattern 1963 */ 1964 if (bpref) 1965 start = dtogd(fs, bpref) / NBBY; 1966 else 1967 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY; 1968 blksfree = cg_blksfree(cgp, needswap); 1969 len = howmany(fs->fs_fpg, NBBY) - start; 1970 ostart = start; 1971 olen = len; 1972 loc = scanc((u_int)len, 1973 (const u_char *)&blksfree[start], 1974 (const u_char *)fragtbl[fs->fs_frag], 1975 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 1976 if (loc == 0) { 1977 len = start + 1; 1978 start = 0; 1979 loc = scanc((u_int)len, 1980 (const u_char *)&blksfree[0], 1981 (const u_char *)fragtbl[fs->fs_frag], 1982 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 1983 if (loc == 0) { 1984 printf("start = %d, len = %d, fs = %s\n", 1985 ostart, olen, fs->fs_fsmnt); 1986 printf("offset=%d %ld\n", 1987 ufs_rw32(cgp->cg_freeoff, needswap), 1988 (long)blksfree - (long)cgp); 1989 printf("cg %d\n", cgp->cg_cgx); 1990 panic("ffs_alloccg: map corrupted"); 1991 /* NOTREACHED */ 1992 } 1993 } 1994 bno = (start + len - loc) * NBBY; 1995 cgp->cg_frotor = ufs_rw32(bno, needswap); 1996 /* 1997 * found the byte in the map 1998 * sift through the bits to find the selected frag 1999 */ 2000 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2001 blk = blkmap(fs, blksfree, bno); 2002 blk <<= 1; 2003 field = around[allocsiz]; 2004 subfield = inside[allocsiz]; 2005 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2006 if ((blk & field) == subfield) 2007 return (bno + pos); 2008 field <<= 1; 2009 subfield <<= 1; 2010 } 2011 } 2012 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); 2013 panic("ffs_alloccg: block not in map"); 2014 /* return (-1); */ 2015 } 2016 2017 /* 2018 * Fserr prints the name of a file system with an error diagnostic. 2019 * 2020 * The form of the error message is: 2021 * fs: error message 2022 */ 2023 static void 2024 ffs_fserr(struct fs *fs, u_int uid, const char *cp) 2025 { 2026 2027 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n", 2028 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp); 2029 } 2030