1 /* $NetBSD: lfs_alloc.c,v 1.86 2005/12/11 12:25:26 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.86 2005/12/11 12:25:26 christos Exp $"); 71 72 #if defined(_KERNEL_OPT) 73 #include "opt_quota.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/buf.h> 80 #include <sys/lock.h> 81 #include <sys/vnode.h> 82 #include <sys/syslog.h> 83 #include <sys/mount.h> 84 #include <sys/pool.h> 85 #include <sys/proc.h> 86 #include <sys/tree.h> 87 88 #include <ufs/ufs/quota.h> 89 #include <ufs/ufs/inode.h> 90 #include <ufs/ufs/ufsmount.h> 91 #include <ufs/ufs/ufs_extern.h> 92 93 #include <ufs/lfs/lfs.h> 94 #include <ufs/lfs/lfs_extern.h> 95 96 extern struct lock ufs_hashlock; 97 98 static int extend_ifile(struct lfs *, struct ucred *); 99 static int lfs_ialloc(struct lfs *, struct vnode *, ino_t, int, 100 struct vnode **); 101 102 /* 103 * Allocate a particular inode with a particular version number, freeing 104 * any previous versions of this inode that may have gone before. 105 * Used by the roll-forward code. 106 * 107 * XXX this function does not have appropriate locking to be used on a live fs; 108 * XXX but something similar could probably be used for an "undelete" call. 109 * 110 * Called with the Ifile inode locked. 111 */ 112 int 113 lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l, 114 struct vnode **vpp) 115 { 116 IFILE *ifp; 117 struct buf *bp, *cbp; 118 struct vnode *vp; 119 struct inode *ip; 120 ino_t tino, oldnext; 121 int error; 122 CLEANERINFO *cip; 123 124 ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */ 125 126 /* 127 * First, just try a vget. If the version number is the one we want, 128 * we don't have to do anything else. If the version number is wrong, 129 * take appropriate action. 130 */ 131 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp); 132 if (error == 0) { 133 DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n", ino, vp)); 134 135 *vpp = vp; 136 ip = VTOI(vp); 137 if (ip->i_gen == vers) 138 return 0; 139 else if (ip->i_gen < vers) { 140 lfs_truncate(vp, (off_t)0, 0, NOCRED, l); 141 ip->i_gen = ip->i_ffs1_gen = vers; 142 LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); 143 return 0; 144 } else { 145 DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n", 146 ino, vers, ip->i_ffs1_gen)); 147 vput(vp); 148 *vpp = NULLVP; 149 return EEXIST; 150 } 151 } 152 153 /* 154 * The inode is not in use. Find it on the free list. 155 */ 156 /* If the Ifile is too short to contain this inum, extend it */ 157 while (VTOI(fs->lfs_ivnode)->i_size <= (ino / 158 fs->lfs_ifpb + fs->lfs_cleansz + fs->lfs_segtabsz) 159 << fs->lfs_bshift) { 160 extend_ifile(fs, NOCRED); 161 } 162 163 LFS_IENTRY(ifp, fs, ino, bp); 164 oldnext = ifp->if_nextfree; 165 ifp->if_version = vers; 166 brelse(bp); 167 168 LFS_GET_HEADFREE(fs, cip, cbp, &ino); 169 if (ino) { 170 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext); 171 } else { 172 tino = ino; 173 while (1) { 174 LFS_IENTRY(ifp, fs, tino, bp); 175 if (ifp->if_nextfree == ino || 176 ifp->if_nextfree == LFS_UNUSED_INUM) 177 break; 178 tino = ifp->if_nextfree; 179 brelse(bp); 180 } 181 if (ifp->if_nextfree == LFS_UNUSED_INUM) { 182 brelse(bp); 183 return ENOENT; 184 } 185 ifp->if_nextfree = oldnext; 186 LFS_BWRITE_LOG(bp); 187 } 188 189 error = lfs_ialloc(fs, fs->lfs_ivnode, ino, vers, &vp); 190 if (error == 0) { 191 /* 192 * Make it VREG so we can put blocks on it. We will change 193 * this later if it turns out to be some other kind of file. 194 */ 195 ip = VTOI(vp); 196 ip->i_mode = ip->i_ffs1_mode = IFREG; 197 ip->i_nlink = ip->i_ffs1_nlink = 1; 198 ip->i_ffs_effnlink = 1; 199 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp); 200 ip = VTOI(vp); 201 202 DLOG((DLOG_RF, "lfs_rf_valloc: ino %d vp %p\n", ino, vp)); 203 204 /* The dirop-nature of this vnode is past */ 205 lfs_unmark_vnode(vp); 206 (void)lfs_vunref(vp); 207 vp->v_flag &= ~VDIROP; 208 simple_lock(&fs->lfs_interlock); 209 simple_lock(&lfs_subsys_lock); 210 --lfs_dirvcount; 211 simple_unlock(&lfs_subsys_lock); 212 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 213 simple_unlock(&fs->lfs_interlock); 214 } 215 *vpp = vp; 216 return error; 217 } 218 219 /* 220 * Add a new block to the Ifile, to accommodate future file creations. 221 * Called with the segment lock held. 222 */ 223 static int 224 extend_ifile(struct lfs *fs, struct ucred *cred) 225 { 226 struct vnode *vp; 227 struct inode *ip; 228 IFILE *ifp; 229 IFILE_V1 *ifp_v1; 230 struct buf *bp, *cbp; 231 int error; 232 daddr_t i, blkno, xmax; 233 ino_t oldlast; 234 CLEANERINFO *cip; 235 236 ASSERT_SEGLOCK(fs); 237 238 vp = fs->lfs_ivnode; 239 ip = VTOI(vp); 240 blkno = lblkno(fs, ip->i_size); 241 if ((error = lfs_balloc(vp, ip->i_size, fs->lfs_bsize, cred, 0, 242 &bp)) != 0) { 243 return (error); 244 } 245 ip->i_size += fs->lfs_bsize; 246 ip->i_ffs1_size = ip->i_size; 247 uvm_vnp_setsize(vp, ip->i_size); 248 249 i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) * 250 fs->lfs_ifpb; 251 LFS_GET_HEADFREE(fs, cip, cbp, &oldlast); 252 LFS_PUT_HEADFREE(fs, cip, cbp, i); 253 #ifdef DIAGNOSTIC 254 if (fs->lfs_freehd == LFS_UNUSED_INUM) 255 panic("inode 0 allocated [2]"); 256 #endif /* DIAGNOSTIC */ 257 xmax = i + fs->lfs_ifpb; 258 259 if (fs->lfs_version == 1) { 260 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) { 261 ifp_v1->if_version = 1; 262 ifp_v1->if_daddr = LFS_UNUSED_DADDR; 263 ifp_v1->if_nextfree = ++i; 264 } 265 ifp_v1--; 266 ifp_v1->if_nextfree = oldlast; 267 } else { 268 for (ifp = (IFILE *)bp->b_data; i < xmax; ++ifp) { 269 ifp->if_version = 1; 270 ifp->if_daddr = LFS_UNUSED_DADDR; 271 ifp->if_nextfree = ++i; 272 } 273 ifp--; 274 ifp->if_nextfree = oldlast; 275 } 276 LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1); 277 278 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 279 280 return 0; 281 } 282 283 /* Allocate a new inode. */ 284 /* ARGSUSED */ 285 /* VOP_BWRITE 2i times */ 286 int 287 lfs_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 288 { 289 struct lfs *fs; 290 struct buf *bp, *cbp; 291 struct ifile *ifp; 292 ino_t new_ino; 293 int error; 294 int new_gen; 295 CLEANERINFO *cip; 296 297 fs = VTOI(pvp)->i_lfs; 298 if (fs->lfs_ronly) 299 return EROFS; 300 301 ASSERT_NO_SEGLOCK(fs); 302 303 lfs_seglock(fs, SEGM_PROT); 304 vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE); 305 306 /* Get the head of the freelist. */ 307 LFS_GET_HEADFREE(fs, cip, cbp, &new_ino); 308 309 #ifdef DIAGNOSTIC 310 if (new_ino == LFS_UNUSED_INUM) { 311 #ifdef DEBUG 312 lfs_dump_super(fs); 313 #endif /* DEBUG */ 314 panic("inode 0 allocated [1]"); 315 } 316 #endif /* DIAGNOSTIC */ 317 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %d\n", new_ino)); 318 319 /* 320 * Remove the inode from the free list and write the new start 321 * of the free list into the superblock. 322 */ 323 LFS_IENTRY(ifp, fs, new_ino, bp); 324 if (ifp->if_daddr != LFS_UNUSED_DADDR) 325 panic("lfs_valloc: inuse inode %llu on the free list", 326 (unsigned long long)new_ino); 327 LFS_PUT_HEADFREE(fs, cip, cbp, ifp->if_nextfree); 328 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %d -> %d\n", new_ino, 329 ifp->if_nextfree)); 330 331 new_gen = ifp->if_version; /* version was updated by vfree */ 332 brelse(bp); 333 334 /* Extend IFILE so that the next lfs_valloc will succeed. */ 335 if (fs->lfs_freehd == LFS_UNUSED_INUM) { 336 if ((error = extend_ifile(fs, cred)) != 0) { 337 LFS_PUT_HEADFREE(fs, cip, cbp, new_ino); 338 VOP_UNLOCK(fs->lfs_ivnode, 0); 339 lfs_segunlock(fs); 340 return error; 341 } 342 } 343 #ifdef DIAGNOSTIC 344 if (fs->lfs_freehd == LFS_UNUSED_INUM) 345 panic("inode 0 allocated [3]"); 346 #endif /* DIAGNOSTIC */ 347 348 /* Set superblock modified bit and increment file count. */ 349 simple_lock(&fs->lfs_interlock); 350 fs->lfs_fmod = 1; 351 simple_unlock(&fs->lfs_interlock); 352 ++fs->lfs_nfiles; 353 354 VOP_UNLOCK(fs->lfs_ivnode, 0); 355 lfs_segunlock(fs); 356 357 return lfs_ialloc(fs, pvp, new_ino, new_gen, vpp); 358 } 359 360 /* 361 * Finish allocating a new inode, given an inode and generation number. 362 */ 363 static int 364 lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen, 365 struct vnode **vpp) 366 { 367 struct inode *ip; 368 struct vnode *vp; 369 370 ASSERT_NO_SEGLOCK(fs); 371 372 vp = *vpp; 373 lockmgr(&ufs_hashlock, LK_EXCLUSIVE, 0); 374 /* Create an inode to associate with the vnode. */ 375 lfs_vcreate(pvp->v_mount, new_ino, vp); 376 377 ip = VTOI(vp); 378 LFS_SET_UINO(ip, IN_CHANGE); 379 /* on-disk structure has been zeroed out by lfs_vcreate */ 380 ip->i_din.ffs1_din->di_inumber = new_ino; 381 382 /* Note no blocks yet */ 383 ip->i_lfs_hiblk = -1; 384 385 /* Set a new generation number for this inode. */ 386 if (new_gen) { 387 ip->i_gen = new_gen; 388 ip->i_ffs1_gen = new_gen; 389 } 390 391 /* Insert into the inode hash table. */ 392 ufs_ihashins(ip); 393 lockmgr(&ufs_hashlock, LK_RELEASE, 0); 394 395 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, vpp); 396 vp = *vpp; 397 ip = VTOI(vp); 398 399 memset(ip->i_lfs_fragsize, 0, NDADDR * sizeof(*ip->i_lfs_fragsize)); 400 401 uvm_vnp_setsize(vp, 0); 402 lfs_mark_vnode(vp); 403 genfs_node_init(vp, &lfs_genfsops); 404 VREF(ip->i_devvp); 405 return (0); 406 } 407 408 /* Create a new vnode/inode pair and initialize what fields we can. */ 409 void 410 lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp) 411 { 412 struct inode *ip; 413 struct ufs1_dinode *dp; 414 struct ufsmount *ump; 415 #ifdef QUOTA 416 int i; 417 #endif 418 419 /* Get a pointer to the private mount structure. */ 420 ump = VFSTOUFS(mp); 421 422 ASSERT_NO_SEGLOCK(ump->um_lfs); 423 424 /* Initialize the inode. */ 425 ip = pool_get(&lfs_inode_pool, PR_WAITOK); 426 memset(ip, 0, sizeof(*ip)); 427 dp = pool_get(&lfs_dinode_pool, PR_WAITOK); 428 memset(dp, 0, sizeof(*dp)); 429 ip->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); 430 memset(ip->inode_ext.lfs, 0, sizeof(*ip->inode_ext.lfs)); 431 vp->v_data = ip; 432 ip->i_din.ffs1_din = dp; 433 ip->i_ump = ump; 434 ip->i_vnode = vp; 435 ip->i_devvp = ump->um_devvp; 436 ip->i_dev = ump->um_dev; 437 ip->i_number = dp->di_inumber = ino; 438 ip->i_lfs = ump->um_lfs; 439 ip->i_lfs_effnblks = 0; 440 SPLAY_INIT(&ip->i_lfs_lbtree); 441 ip->i_lfs_nbtree = 0; 442 #ifdef QUOTA 443 for (i = 0; i < MAXQUOTAS; i++) 444 ip->i_dquot[i] = NODQUOT; 445 #endif 446 #ifdef DEBUG 447 if (ino == LFS_IFILE_INUM) 448 vp->v_vnlock->lk_wmesg = "inlock"; 449 #endif 450 } 451 452 /* Free an inode. */ 453 /* ARGUSED */ 454 /* VOP_BWRITE 2i times */ 455 int 456 lfs_vfree(struct vnode *vp, ino_t ino, int mode) 457 { 458 SEGUSE *sup; 459 CLEANERINFO *cip; 460 struct buf *cbp, *bp; 461 struct ifile *ifp; 462 struct inode *ip; 463 struct lfs *fs; 464 daddr_t old_iaddr; 465 ino_t otail; 466 int s; 467 468 /* Get the inode number and file system. */ 469 ip = VTOI(vp); 470 fs = ip->i_lfs; 471 ino = ip->i_number; 472 473 ASSERT_NO_SEGLOCK(fs); 474 475 /* Drain of pending writes */ 476 simple_lock(&vp->v_interlock); 477 s = splbio(); 478 if (fs->lfs_version > 1 && WRITEINPROG(vp)) 479 ltsleep(vp, (PRIBIO+1), "lfs_vfree", 0, &vp->v_interlock); 480 splx(s); 481 simple_unlock(&vp->v_interlock); 482 483 lfs_seglock(fs, SEGM_PROT); 484 vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE); 485 486 lfs_unmark_vnode(vp); 487 if (vp->v_flag & VDIROP) { 488 vp->v_flag &= ~VDIROP; 489 simple_lock(&fs->lfs_interlock); 490 simple_lock(&lfs_subsys_lock); 491 --lfs_dirvcount; 492 simple_unlock(&lfs_subsys_lock); 493 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 494 simple_unlock(&fs->lfs_interlock); 495 wakeup(&lfs_dirvcount); 496 lfs_vunref(vp); 497 } 498 499 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED); 500 ip->i_flag &= ~IN_ALLMOD; 501 502 /* 503 * Set the ifile's inode entry to unused, increment its version number 504 * and link it onto the free chain. 505 */ 506 LFS_IENTRY(ifp, fs, ino, bp); 507 old_iaddr = ifp->if_daddr; 508 ifp->if_daddr = LFS_UNUSED_DADDR; 509 ++ifp->if_version; 510 if (fs->lfs_version == 1) { 511 LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree)); 512 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 513 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 514 } else { 515 ifp->if_nextfree = LFS_UNUSED_INUM; 516 /* 517 * XXX Writing the freed node here means that it might not 518 * XXX make it into the free list in the event of a crash 519 * XXX (the ifile could be written before the rest of this 520 * XXX completes). 521 */ 522 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 523 LFS_GET_TAILFREE(fs, cip, cbp, &otail); 524 LFS_IENTRY(ifp, fs, otail, bp); 525 ifp->if_nextfree = ino; 526 LFS_BWRITE_LOG(bp); 527 LFS_PUT_TAILFREE(fs, cip, cbp, ino); 528 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %d -> %d\n", otail, 529 ino)); 530 } 531 #ifdef DIAGNOSTIC 532 if (ino == LFS_UNUSED_INUM) { 533 panic("inode 0 freed"); 534 } 535 #endif /* DIAGNOSTIC */ 536 if (old_iaddr != LFS_UNUSED_DADDR) { 537 LFS_SEGENTRY(sup, fs, dtosn(fs, old_iaddr), bp); 538 #ifdef DIAGNOSTIC 539 if (sup->su_nbytes < sizeof (struct ufs1_dinode)) { 540 printf("lfs_vfree: negative byte count" 541 " (segment %" PRIu32 " short by %d)\n", 542 dtosn(fs, old_iaddr), 543 (int)sizeof (struct ufs1_dinode) - 544 sup->su_nbytes); 545 panic("lfs_vfree: negative byte count"); 546 sup->su_nbytes = sizeof (struct ufs1_dinode); 547 } 548 #endif 549 sup->su_nbytes -= sizeof (struct ufs1_dinode); 550 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, old_iaddr), bp); /* Ifile */ 551 } 552 553 /* Set superblock modified bit and decrement file count. */ 554 simple_lock(&fs->lfs_interlock); 555 fs->lfs_fmod = 1; 556 simple_unlock(&fs->lfs_interlock); 557 --fs->lfs_nfiles; 558 559 VOP_UNLOCK(fs->lfs_ivnode, 0); 560 lfs_segunlock(fs); 561 562 return (0); 563 } 564