1 /* $NetBSD: lfs_segment.c,v 1.48 2000/05/31 03:37:35 fredb Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 71 */ 72 73 #define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str)) 74 75 #include "opt_ddb.h" 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/kernel.h> 80 #include <sys/resourcevar.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/buf.h> 84 #include <sys/proc.h> 85 #include <sys/conf.h> 86 #include <sys/vnode.h> 87 #include <sys/malloc.h> 88 #include <sys/mount.h> 89 90 #include <miscfs/specfs/specdev.h> 91 #include <miscfs/fifofs/fifo.h> 92 93 #include <ufs/ufs/quota.h> 94 #include <ufs/ufs/inode.h> 95 #include <ufs/ufs/dir.h> 96 #include <ufs/ufs/ufsmount.h> 97 #include <ufs/ufs/ufs_extern.h> 98 99 #include <ufs/lfs/lfs.h> 100 #include <ufs/lfs/lfs_extern.h> 101 102 extern int count_lock_queue __P((void)); 103 extern struct simplelock vnode_free_list_slock; /* XXX */ 104 105 /* 106 * Determine if it's OK to start a partial in this segment, or if we need 107 * to go on to a new segment. 108 */ 109 #define LFS_PARTIAL_FITS(fs) \ 110 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 111 1 << (fs)->lfs_fsbtodb) 112 113 void lfs_callback __P((struct buf *)); 114 int lfs_gather __P((struct lfs *, struct segment *, 115 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 116 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 117 void lfs_iset __P((struct inode *, ufs_daddr_t, time_t)); 118 int lfs_match_fake __P((struct lfs *, struct buf *)); 119 int lfs_match_data __P((struct lfs *, struct buf *)); 120 int lfs_match_dindir __P((struct lfs *, struct buf *)); 121 int lfs_match_indir __P((struct lfs *, struct buf *)); 122 int lfs_match_tindir __P((struct lfs *, struct buf *)); 123 void lfs_newseg __P((struct lfs *)); 124 void lfs_shellsort __P((struct buf **, ufs_daddr_t *, int)); 125 void lfs_supercallback __P((struct buf *)); 126 void lfs_updatemeta __P((struct segment *)); 127 int lfs_vref __P((struct vnode *)); 128 void lfs_vunref __P((struct vnode *)); 129 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 130 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 131 int lfs_writeseg __P((struct lfs *, struct segment *)); 132 void lfs_writesuper __P((struct lfs *, daddr_t)); 133 int lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 134 struct segment *sp, int dirops)); 135 136 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 137 int lfs_writeindir = 1; /* whether to flush indir on non-ckp */ 138 int lfs_clean_vnhead = 0; /* Allow freeing to head of vn list */ 139 int lfs_dirvcount = 0; /* # active dirops */ 140 141 /* Statistics Counters */ 142 int lfs_dostats = 1; 143 struct lfs_stats lfs_stats; 144 145 /* op values to lfs_writevnodes */ 146 #define VN_REG 0 147 #define VN_DIROP 1 148 #define VN_EMPTY 2 149 #define VN_CLEAN 3 150 151 #define LFS_MAX_ACTIVE 10 152 153 /* 154 * XXX KS - Set modification time on the Ifile, so the cleaner can 155 * read the fs mod time off of it. We don't set IN_UPDATE here, 156 * since we don't really need this to be flushed to disk (and in any 157 * case that wouldn't happen to the Ifile until we checkpoint). 158 */ 159 void 160 lfs_imtime(fs) 161 struct lfs *fs; 162 { 163 struct timespec ts; 164 struct inode *ip; 165 166 TIMEVAL_TO_TIMESPEC(&time, &ts); 167 ip = VTOI(fs->lfs_ivnode); 168 ip->i_ffs_mtime = ts.tv_sec; 169 ip->i_ffs_mtimensec = ts.tv_nsec; 170 } 171 172 /* 173 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 174 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 175 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 176 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 177 */ 178 179 #define SET_FLUSHING(fs,vp) (fs)->lfs_flushvp = (vp) 180 #define IS_FLUSHING(fs,vp) ((fs)->lfs_flushvp == (vp)) 181 #define CLR_FLUSHING(fs,vp) (fs)->lfs_flushvp = NULL 182 183 int 184 lfs_vflush(vp) 185 struct vnode *vp; 186 { 187 struct inode *ip; 188 struct lfs *fs; 189 struct segment *sp; 190 struct buf *bp, *nbp, *tbp, *tnbp; 191 int error, s; 192 193 ip = VTOI(vp); 194 fs = VFSTOUFS(vp->v_mount)->um_lfs; 195 196 if(ip->i_flag & IN_CLEANING) { 197 #ifdef DEBUG_LFS 198 ivndebug(vp,"vflush/in_cleaning"); 199 #endif 200 ip->i_flag &= ~IN_CLEANING; 201 if(ip->i_flag & (IN_MODIFIED | IN_ACCESSED)) { 202 fs->lfs_uinodes--; 203 } else 204 ip->i_flag |= IN_MODIFIED; 205 /* 206 * Toss any cleaning buffers that have real counterparts 207 * to avoid losing new data 208 */ 209 s = splbio(); 210 for(bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) { 211 nbp = bp->b_vnbufs.le_next; 212 if(bp->b_flags & B_CALL) { 213 for(tbp=vp->v_dirtyblkhd.lh_first; tbp; 214 tbp=tnbp) 215 { 216 tnbp = tbp->b_vnbufs.le_next; 217 if(tbp->b_vp == bp->b_vp 218 && tbp->b_lblkno == bp->b_lblkno 219 && tbp != bp) 220 { 221 lfs_freebuf(bp); 222 } 223 } 224 } 225 } 226 splx(s); 227 } 228 229 /* If the node is being written, wait until that is done */ 230 if(WRITEINPROG(vp)) { 231 #ifdef DEBUG_LFS 232 ivndebug(vp,"vflush/writeinprog"); 233 #endif 234 tsleep(vp, PRIBIO+1, "lfs_vw", 0); 235 } 236 237 /* Protect against VXLOCK deadlock in vinvalbuf() */ 238 lfs_seglock(fs, SEGM_SYNC); 239 240 /* If we're supposed to flush a freed inode, just toss it */ 241 /* XXX - seglock, so these buffers can't be gathered, right? */ 242 if(ip->i_ffs_mode == 0) { 243 printf("lfs_vflush: ino %d is freed, not flushing\n", 244 ip->i_number); 245 s = splbio(); 246 for(bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) { 247 nbp = bp->b_vnbufs.le_next; 248 /* Copied from lfs_writeseg */ 249 if (bp->b_flags & B_CALL) { 250 /* if B_CALL, it was created with newbuf */ 251 lfs_freebuf(bp); 252 } else { 253 bremfree(bp); 254 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 255 B_LOCKED | B_GATHERED); 256 bp->b_flags |= B_DONE; 257 reassignbuf(bp, vp); 258 brelse(bp); 259 } 260 } 261 splx(s); 262 if(ip->i_flag & IN_CLEANING) 263 fs->lfs_uinodes--; 264 if(ip->i_flag & (IN_MODIFIED | IN_ACCESSED)) 265 fs->lfs_uinodes--; 266 ip->i_flag &= ~IN_ALLMOD; 267 printf("lfs_vflush: done not flushing ino %d\n", 268 ip->i_number); 269 lfs_segunlock(fs); 270 return 0; 271 } 272 273 SET_FLUSHING(fs,vp); 274 if (fs->lfs_nactive > LFS_MAX_ACTIVE) { 275 error = lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP); 276 CLR_FLUSHING(fs,vp); 277 lfs_segunlock(fs); 278 return error; 279 } 280 sp = fs->lfs_sp; 281 282 if (vp->v_dirtyblkhd.lh_first == NULL) { 283 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); 284 } else if((ip->i_flag & IN_CLEANING) && (fs->lfs_sp->seg_flags & SEGM_CLEAN)) { 285 #ifdef DEBUG_LFS 286 ivndebug(vp,"vflush/clean"); 287 #endif 288 lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN); 289 } 290 else if(lfs_dostats) { 291 if(vp->v_dirtyblkhd.lh_first || (VTOI(vp)->i_flag & IN_ALLMOD)) 292 ++lfs_stats.vflush_invoked; 293 #ifdef DEBUG_LFS 294 ivndebug(vp,"vflush"); 295 #endif 296 } 297 298 #ifdef DIAGNOSTIC 299 /* XXX KS This actually can happen right now, though it shouldn't(?) */ 300 if(vp->v_flag & VDIROP) { 301 printf("lfs_vflush: flushing VDIROP, this shouldn\'t be\n"); 302 /* panic("VDIROP being flushed...this can\'t happen"); */ 303 } 304 if(vp->v_usecount<0) { 305 printf("usecount=%ld\n",vp->v_usecount); 306 panic("lfs_vflush: usecount<0"); 307 } 308 #endif 309 310 do { 311 do { 312 if (vp->v_dirtyblkhd.lh_first != NULL) 313 lfs_writefile(fs, sp, vp); 314 } while (lfs_writeinode(fs, sp, ip)); 315 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 316 317 if(lfs_dostats) { 318 ++lfs_stats.nwrites; 319 if (sp->seg_flags & SEGM_SYNC) 320 ++lfs_stats.nsync_writes; 321 if (sp->seg_flags & SEGM_CKP) 322 ++lfs_stats.ncheckpoints; 323 } 324 lfs_segunlock(fs); 325 326 CLR_FLUSHING(fs,vp); 327 return (0); 328 } 329 330 #ifdef DEBUG_LFS_VERBOSE 331 # define vndebug(vp,str) if(VTOI(vp)->i_flag & IN_CLEANING) printf("not writing ino %d because %s (op %d)\n",VTOI(vp)->i_number,(str),op) 332 #else 333 # define vndebug(vp,str) 334 #endif 335 336 int 337 lfs_writevnodes(fs, mp, sp, op) 338 struct lfs *fs; 339 struct mount *mp; 340 struct segment *sp; 341 int op; 342 { 343 struct inode *ip; 344 struct vnode *vp; 345 int inodes_written=0, only_cleaning; 346 int needs_unlock; 347 348 #ifndef LFS_NO_BACKVP_HACK 349 /* BEGIN HACK */ 350 #define VN_OFFSET (((caddr_t)&vp->v_mntvnodes.le_next) - (caddr_t)vp) 351 #define BACK_VP(VP) ((struct vnode *)(((caddr_t)VP->v_mntvnodes.le_prev) - VN_OFFSET)) 352 #define BEG_OF_VLIST ((struct vnode *)(((caddr_t)&mp->mnt_vnodelist.lh_first) - VN_OFFSET)) 353 354 /* Find last vnode. */ 355 loop: for (vp = mp->mnt_vnodelist.lh_first; 356 vp && vp->v_mntvnodes.le_next != NULL; 357 vp = vp->v_mntvnodes.le_next); 358 for (; vp && vp != BEG_OF_VLIST; vp = BACK_VP(vp)) { 359 #else 360 loop: 361 for (vp = mp->mnt_vnodelist.lh_first; 362 vp != NULL; 363 vp = vp->v_mntvnodes.le_next) { 364 #endif 365 /* 366 * If the vnode that we are about to sync is no longer 367 * associated with this mount point, start over. 368 */ 369 if (vp->v_mount != mp) 370 goto loop; 371 372 ip = VTOI(vp); 373 if ((op == VN_DIROP && !(vp->v_flag & VDIROP)) || 374 (op != VN_DIROP && op != VN_CLEAN && (vp->v_flag & VDIROP))) { 375 vndebug(vp,"dirop"); 376 continue; 377 } 378 379 if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first) { 380 vndebug(vp,"empty"); 381 continue; 382 } 383 384 if (vp->v_type == VNON) { 385 continue; 386 } 387 388 if(op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM 389 && vp != fs->lfs_flushvp 390 && !(ip->i_flag & IN_CLEANING)) { 391 vndebug(vp,"cleaning"); 392 continue; 393 } 394 395 if (lfs_vref(vp)) { 396 vndebug(vp,"vref"); 397 continue; 398 } 399 400 #if 0 /* XXX KS - if we skip the ifile, things could go badly for us. */ 401 if(WRITEINPROG(vp)) { 402 lfs_vunref(vp); 403 #ifdef DEBUG_LFS 404 ivndebug(vp,"writevnodes/writeinprog"); 405 #endif 406 continue; 407 } 408 #endif 409 410 needs_unlock = 0; 411 if(VOP_ISLOCKED(vp)) { 412 if (vp != fs->lfs_ivnode && 413 vp->v_lock.lk_lockholder != curproc->p_pid) { 414 #ifdef DEBUG_LFS 415 printf("lfs_writevnodes: not writing ino %d, locked by pid %d\n", 416 VTOI(vp)->i_number, 417 vp->v_lock.lk_lockholder); 418 #endif 419 lfs_vunref(vp); 420 continue; 421 } 422 } else if (vp != fs->lfs_ivnode) { 423 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 424 needs_unlock = 1; 425 } 426 427 only_cleaning = 0; 428 /* 429 * Write the inode/file if dirty and it's not the 430 * the IFILE. 431 */ 432 if ((ip->i_flag & IN_ALLMOD) || 433 (vp->v_dirtyblkhd.lh_first != NULL)) 434 { 435 only_cleaning = ((ip->i_flag & IN_ALLMOD)==IN_CLEANING); 436 437 if(ip->i_number != LFS_IFILE_INUM 438 && vp->v_dirtyblkhd.lh_first != NULL) 439 { 440 lfs_writefile(fs, sp, vp); 441 } 442 if(vp->v_dirtyblkhd.lh_first != NULL) { 443 if(WRITEINPROG(vp)) { 444 #ifdef DEBUG_LFS 445 ivndebug(vp,"writevnodes/write2"); 446 #endif 447 } else if(!(ip->i_flag & IN_ALLMOD)) { 448 #ifdef DEBUG_LFS 449 printf("<%d>",ip->i_number); 450 #endif 451 ip->i_flag |= IN_MODIFIED; 452 ++fs->lfs_uinodes; 453 } 454 } 455 (void) lfs_writeinode(fs, sp, ip); 456 inodes_written++; 457 } 458 459 if(needs_unlock) 460 VOP_UNLOCK(vp,0); 461 462 if(lfs_clean_vnhead && only_cleaning) 463 lfs_vunref_head(vp); 464 else 465 lfs_vunref(vp); 466 } 467 return inodes_written; 468 } 469 470 int 471 lfs_segwrite(mp, flags) 472 struct mount *mp; 473 int flags; /* Do a checkpoint. */ 474 { 475 struct buf *bp; 476 struct inode *ip; 477 struct lfs *fs; 478 struct segment *sp; 479 struct vnode *vp; 480 SEGUSE *segusep; 481 ufs_daddr_t ibno; 482 int do_ckp, error, i; 483 int writer_set = 0; 484 int need_unlock = 0; 485 486 fs = VFSTOUFS(mp)->um_lfs; 487 488 lfs_imtime(fs); 489 490 /* 491 * If we are not the cleaner, and we have fewer than MIN_FREE_SEGS 492 * clean segments, wait until cleaner writes. 493 */ 494 if(!(flags & SEGM_CLEAN) 495 && (!fs->lfs_seglock || !(fs->lfs_sp->seg_flags & SEGM_CLEAN))) 496 { 497 do { 498 if (fs->lfs_nclean <= MIN_FREE_SEGS 499 || fs->lfs_avail <= 0) 500 { 501 wakeup(&lfs_allclean_wakeup); 502 wakeup(&fs->lfs_nextseg); 503 error = tsleep(&fs->lfs_avail, PRIBIO + 1, 504 "lfs_avail", 0); 505 if (error) { 506 return (error); 507 } 508 } 509 } while (fs->lfs_nclean <= MIN_FREE_SEGS || fs->lfs_avail <= 0); 510 } 511 512 /* 513 * Allocate a segment structure and enough space to hold pointers to 514 * the maximum possible number of buffers which can be described in a 515 * single summary block. 516 */ 517 do_ckp = (flags & SEGM_CKP) || fs->lfs_nactive > LFS_MAX_ACTIVE; 518 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0)); 519 sp = fs->lfs_sp; 520 521 /* 522 * If lfs_flushvp is non-NULL, we are called from lfs_vflush, 523 * in which case we have to flush *all* buffers off of this vnode. 524 * We don't care about other nodes, but write any non-dirop nodes 525 * anyway in anticipation of another getnewvnode(). 526 * 527 * If we're cleaning we only write cleaning and ifile blocks, and 528 * no dirops, since otherwise we'd risk corruption in a crash. 529 */ 530 if(sp->seg_flags & SEGM_CLEAN) 531 lfs_writevnodes(fs, mp, sp, VN_CLEAN); 532 else { 533 lfs_writevnodes(fs, mp, sp, VN_REG); 534 if(!fs->lfs_dirops || !fs->lfs_flushvp) { 535 while(fs->lfs_dirops) 536 if((error = tsleep(&fs->lfs_writer, PRIBIO + 1, 537 "lfs writer", 0))) 538 { 539 free(sp->bpp, M_SEGMENT); 540 free(sp, M_SEGMENT); 541 return (error); 542 } 543 fs->lfs_writer++; 544 writer_set=1; 545 lfs_writevnodes(fs, mp, sp, VN_DIROP); 546 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 547 } 548 } 549 550 /* 551 * If we are doing a checkpoint, mark everything since the 552 * last checkpoint as no longer ACTIVE. 553 */ 554 if (do_ckp) { 555 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 556 --ibno >= fs->lfs_cleansz; ) { 557 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, NOCRED, &bp)) 558 559 panic("lfs_segwrite: ifile read"); 560 segusep = (SEGUSE *)bp->b_data; 561 for (i = fs->lfs_sepb; i--; segusep++) 562 segusep->su_flags &= ~SEGUSE_ACTIVE; 563 564 /* But the current segment is still ACTIVE */ 565 if (fs->lfs_curseg/fs->lfs_sepb==(ibno-fs->lfs_cleansz)) 566 ((SEGUSE *)(bp->b_data))[fs->lfs_curseg%fs->lfs_sepb].su_flags |= SEGUSE_ACTIVE; 567 error = VOP_BWRITE(bp); 568 } 569 } 570 571 if (do_ckp || fs->lfs_doifile) { 572 redo: 573 vp = fs->lfs_ivnode; 574 /* 575 * Depending on the circumstances of our calling, the ifile 576 * inode might be locked. If it is, and if it is locked by 577 * us, we should VREF instead of vget here. 578 */ 579 need_unlock = 0; 580 if(VOP_ISLOCKED(vp) 581 && vp->v_lock.lk_lockholder == curproc->p_pid) { 582 VREF(vp); 583 } else { 584 while (vget(vp, LK_EXCLUSIVE)) 585 continue; 586 need_unlock = 1; 587 } 588 ip = VTOI(vp); 589 if (vp->v_dirtyblkhd.lh_first != NULL) 590 lfs_writefile(fs, sp, vp); 591 (void)lfs_writeinode(fs, sp, ip); 592 593 /* Only vput if we used vget() above. */ 594 if(need_unlock) 595 vput(vp); 596 else 597 vrele(vp); 598 599 if (lfs_writeseg(fs, sp) && do_ckp) 600 goto redo; 601 } else { 602 (void) lfs_writeseg(fs, sp); 603 } 604 605 /* 606 * If the I/O count is non-zero, sleep until it reaches zero. 607 * At the moment, the user's process hangs around so we can 608 * sleep. 609 */ 610 fs->lfs_doifile = 0; 611 if(writer_set && --fs->lfs_writer==0) 612 wakeup(&fs->lfs_dirops); 613 614 if(lfs_dostats) { 615 ++lfs_stats.nwrites; 616 if (sp->seg_flags & SEGM_SYNC) 617 ++lfs_stats.nsync_writes; 618 if (sp->seg_flags & SEGM_CKP) 619 ++lfs_stats.ncheckpoints; 620 } 621 lfs_segunlock(fs); 622 return (0); 623 } 624 625 /* 626 * Write the dirty blocks associated with a vnode. 627 */ 628 void 629 lfs_writefile(fs, sp, vp) 630 struct lfs *fs; 631 struct segment *sp; 632 struct vnode *vp; 633 { 634 struct buf *bp; 635 struct finfo *fip; 636 IFILE *ifp; 637 638 639 if (sp->seg_bytes_left < fs->lfs_bsize || 640 sp->sum_bytes_left < sizeof(struct finfo)) 641 (void) lfs_writeseg(fs, sp); 642 643 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(ufs_daddr_t); 644 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 645 646 if(vp->v_flag & VDIROP) 647 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 648 649 fip = sp->fip; 650 fip->fi_nblocks = 0; 651 fip->fi_ino = VTOI(vp)->i_number; 652 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 653 fip->fi_version = ifp->if_version; 654 brelse(bp); 655 656 if(sp->seg_flags & SEGM_CLEAN) 657 { 658 lfs_gather(fs, sp, vp, lfs_match_fake); 659 /* 660 * For a file being flushed, we need to write *all* blocks. 661 * This means writing the cleaning blocks first, and then 662 * immediately following with any non-cleaning blocks. 663 * The same is true of the Ifile since checkpoints assume 664 * that all valid Ifile blocks are written. 665 */ 666 if(IS_FLUSHING(fs,vp) || VTOI(vp)->i_number == LFS_IFILE_INUM) 667 lfs_gather(fs, sp, vp, lfs_match_data); 668 } else 669 lfs_gather(fs, sp, vp, lfs_match_data); 670 671 /* 672 * It may not be necessary to write the meta-data blocks at this point, 673 * as the roll-forward recovery code should be able to reconstruct the 674 * list. 675 * 676 * We have to write them anyway, though, under two conditions: (1) the 677 * vnode is being flushed (for reuse by vinvalbuf); or (2) we are 678 * checkpointing. 679 */ 680 if(lfs_writeindir 681 || IS_FLUSHING(fs,vp) 682 || (sp->seg_flags & SEGM_CKP)) 683 { 684 lfs_gather(fs, sp, vp, lfs_match_indir); 685 lfs_gather(fs, sp, vp, lfs_match_dindir); 686 lfs_gather(fs, sp, vp, lfs_match_tindir); 687 } 688 fip = sp->fip; 689 if (fip->fi_nblocks != 0) { 690 sp->fip = (FINFO*)((caddr_t)fip + sizeof(struct finfo) + 691 sizeof(ufs_daddr_t) * (fip->fi_nblocks-1)); 692 sp->start_lbp = &sp->fip->fi_blocks[0]; 693 } else { 694 sp->sum_bytes_left += sizeof(FINFO) - sizeof(ufs_daddr_t); 695 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 696 } 697 } 698 699 int 700 lfs_writeinode(fs, sp, ip) 701 struct lfs *fs; 702 struct segment *sp; 703 struct inode *ip; 704 { 705 struct buf *bp, *ibp; 706 IFILE *ifp; 707 SEGUSE *sup; 708 ufs_daddr_t daddr; 709 ino_t ino; 710 int error, i, ndx; 711 int redo_ifile = 0; 712 struct timespec ts; 713 int gotblk=0; 714 715 if (!(ip->i_flag & IN_ALLMOD)) 716 return(0); 717 718 /* Allocate a new inode block if necessary. */ 719 if ((ip->i_number != LFS_IFILE_INUM || sp->idp==NULL) && sp->ibp == NULL) { 720 /* Allocate a new segment if necessary. */ 721 if (sp->seg_bytes_left < fs->lfs_bsize || 722 sp->sum_bytes_left < sizeof(ufs_daddr_t)) 723 (void) lfs_writeseg(fs, sp); 724 725 /* Get next inode block. */ 726 daddr = fs->lfs_offset; 727 fs->lfs_offset += fsbtodb(fs, 1); 728 sp->ibp = *sp->cbpp++ = 729 getblk(VTOI(fs->lfs_ivnode)->i_devvp, daddr, fs->lfs_bsize, 0, 0); 730 gotblk++; 731 732 /* Zero out inode numbers */ 733 for (i = 0; i < INOPB(fs); ++i) 734 ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0; 735 736 ++sp->start_bpp; 737 fs->lfs_avail -= fsbtodb(fs, 1); 738 /* Set remaining space counters. */ 739 sp->seg_bytes_left -= fs->lfs_bsize; 740 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 741 ndx = LFS_SUMMARY_SIZE / sizeof(ufs_daddr_t) - 742 sp->ninodes / INOPB(fs) - 1; 743 ((ufs_daddr_t *)(sp->segsum))[ndx] = daddr; 744 } 745 746 /* Update the inode times and copy the inode onto the inode page. */ 747 if (ip->i_flag & (IN_CLEANING | IN_MODIFIED | IN_ACCESSED)) 748 --fs->lfs_uinodes; 749 TIMEVAL_TO_TIMESPEC(&time, &ts); 750 LFS_ITIMES(ip, &ts, &ts, &ts); 751 752 /* XXX IN_ALLMOD */ 753 if(ip->i_flag & IN_CLEANING) 754 ip->i_flag &= ~IN_CLEANING; 755 else 756 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | 757 IN_UPDATE | IN_ACCESSED); 758 759 /* 760 * If this is the Ifile, and we've already written the Ifile in this 761 * partial segment, just overwrite it (it's not on disk yet) and 762 * continue. 763 * 764 * XXX we know that the bp that we get the second time around has 765 * already been gathered. 766 */ 767 if(ip->i_number == LFS_IFILE_INUM && sp->idp) { 768 *(sp->idp) = ip->i_din.ffs_din; 769 return 0; 770 } 771 772 bp = sp->ibp; 773 ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = 774 ip->i_din.ffs_din; 775 776 if(ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 777 sp->idp = ((struct dinode *)bp->b_data)+(sp->ninodes % INOPB(fs)); 778 if(gotblk) { 779 bp->b_flags |= B_LOCKED; 780 brelse(bp); 781 } 782 783 /* Increment inode count in segment summary block. */ 784 ++((SEGSUM *)(sp->segsum))->ss_ninos; 785 786 /* If this page is full, set flag to allocate a new page. */ 787 if (++sp->ninodes % INOPB(fs) == 0) 788 sp->ibp = NULL; 789 790 /* 791 * If updating the ifile, update the super-block. Update the disk 792 * address and access times for this inode in the ifile. 793 */ 794 ino = ip->i_number; 795 if (ino == LFS_IFILE_INUM) { 796 daddr = fs->lfs_idaddr; 797 fs->lfs_idaddr = bp->b_blkno; 798 } else { 799 LFS_IENTRY(ifp, fs, ino, ibp); 800 daddr = ifp->if_daddr; 801 ifp->if_daddr = bp->b_blkno; 802 #ifdef LFS_DEBUG_NEXTFREE 803 if(ino > 3 && ifp->if_nextfree) { 804 vprint("lfs_writeinode",ITOV(ip)); 805 printf("lfs_writeinode: updating free ino %d\n", 806 ip->i_number); 807 } 808 #endif 809 error = VOP_BWRITE(ibp); 810 } 811 812 /* 813 * No need to update segment usage if there was no former inode address 814 * or if the last inode address is in the current partial segment. 815 */ 816 if (daddr != LFS_UNUSED_DADDR && 817 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 818 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 819 #ifdef DIAGNOSTIC 820 if (sup->su_nbytes < DINODE_SIZE) { 821 /* XXX -- Change to a panic. */ 822 printf("lfs_writeinode: negative bytes (segment %d short by %d)\n", 823 datosn(fs, daddr), (int)DINODE_SIZE - sup->su_nbytes); 824 panic("lfs_writeinode: negative bytes"); 825 sup->su_nbytes = DINODE_SIZE; 826 } 827 #endif 828 sup->su_nbytes -= DINODE_SIZE; 829 redo_ifile = 830 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 831 error = VOP_BWRITE(bp); 832 } 833 return (redo_ifile); 834 } 835 836 int 837 lfs_gatherblock(sp, bp, sptr) 838 struct segment *sp; 839 struct buf *bp; 840 int *sptr; 841 { 842 struct lfs *fs; 843 int version; 844 845 /* 846 * If full, finish this segment. We may be doing I/O, so 847 * release and reacquire the splbio(). 848 */ 849 #ifdef DIAGNOSTIC 850 if (sp->vp == NULL) 851 panic ("lfs_gatherblock: Null vp in segment"); 852 #endif 853 fs = sp->fs; 854 if (sp->sum_bytes_left < sizeof(ufs_daddr_t) || 855 sp->seg_bytes_left < bp->b_bcount) { 856 if (sptr) 857 splx(*sptr); 858 lfs_updatemeta(sp); 859 860 version = sp->fip->fi_version; 861 (void) lfs_writeseg(fs, sp); 862 863 sp->fip->fi_version = version; 864 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 865 /* Add the current file to the segment summary. */ 866 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 867 sp->sum_bytes_left -= 868 sizeof(struct finfo) - sizeof(ufs_daddr_t); 869 870 if (sptr) 871 *sptr = splbio(); 872 return(1); 873 } 874 875 #ifdef DEBUG 876 if(bp->b_flags & B_GATHERED) { 877 printf("lfs_gatherblock: already gathered! Ino %d, lbn %d\n", 878 sp->fip->fi_ino, bp->b_lblkno); 879 return(0); 880 } 881 #endif 882 /* Insert into the buffer list, update the FINFO block. */ 883 bp->b_flags |= B_GATHERED; 884 *sp->cbpp++ = bp; 885 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 886 887 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 888 sp->seg_bytes_left -= bp->b_bcount; 889 return(0); 890 } 891 892 int 893 lfs_gather(fs, sp, vp, match) 894 struct lfs *fs; 895 struct segment *sp; 896 struct vnode *vp; 897 int (*match) __P((struct lfs *, struct buf *)); 898 { 899 struct buf *bp; 900 int s, count=0; 901 902 sp->vp = vp; 903 s = splbio(); 904 905 #ifndef LFS_NO_BACKBUF_HACK 906 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) { 907 #else /* LFS_NO_BACKBUF_HACK */ 908 /* This is a hack to see if ordering the blocks in LFS makes a difference. */ 909 # define BUF_OFFSET (((void *)&bp->b_vnbufs.le_next) - (void *)bp) 910 # define BACK_BUF(BP) ((struct buf *)(((void *)BP->b_vnbufs.le_prev) - BUF_OFFSET)) 911 # define BEG_OF_LIST ((struct buf *)(((void *)&vp->v_dirtyblkhd.lh_first) - BUF_OFFSET)) 912 /* Find last buffer. */ 913 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL; 914 bp = bp->b_vnbufs.le_next); 915 for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) { 916 #endif /* LFS_NO_BACKBUF_HACK */ 917 if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp)) 918 continue; 919 if(vp->v_type == VBLK) { 920 /* For block devices, just write the blocks. */ 921 /* XXX Do we really need to even do this? */ 922 #ifdef DEBUG_LFS 923 if(count==0) 924 printf("BLK("); 925 printf("."); 926 #endif 927 /* Get the block before bwrite, so we don't corrupt the free list */ 928 bp->b_flags |= B_BUSY; 929 bremfree(bp); 930 bwrite(bp); 931 } else { 932 #ifdef DIAGNOSTIC 933 if ((bp->b_flags & (B_CALL|B_INVAL))==B_INVAL) { 934 printf("lfs_gather: lbn %d is B_INVAL\n", 935 bp->b_lblkno); 936 VOP_PRINT(bp->b_vp); 937 } 938 if (!(bp->b_flags & B_DELWRI)) 939 panic("lfs_gather: bp not B_DELWRI"); 940 if (!(bp->b_flags & B_LOCKED)) { 941 printf("lfs_gather: lbn %d blk %d not B_LOCKED\n", bp->b_lblkno, bp->b_blkno); 942 VOP_PRINT(bp->b_vp); 943 panic("lfs_gather: bp not B_LOCKED"); 944 } 945 #endif 946 if (lfs_gatherblock(sp, bp, &s)) { 947 goto loop; 948 } 949 } 950 count++; 951 } 952 splx(s); 953 #ifdef DEBUG_LFS 954 if(vp->v_type == VBLK && count) 955 printf(")\n"); 956 #endif 957 lfs_updatemeta(sp); 958 sp->vp = NULL; 959 return count; 960 } 961 962 /* 963 * Update the metadata that points to the blocks listed in the FINFO 964 * array. 965 */ 966 void 967 lfs_updatemeta(sp) 968 struct segment *sp; 969 { 970 SEGUSE *sup; 971 struct buf *bp, *ibp; 972 struct lfs *fs; 973 struct vnode *vp; 974 struct indir a[NIADDR + 2], *ap; 975 struct inode *ip; 976 ufs_daddr_t daddr, lbn, off; 977 daddr_t ooff; 978 int error, i, nblocks, num; 979 980 vp = sp->vp; 981 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 982 if (nblocks < 0) 983 panic("This is a bad thing\n"); 984 if (vp == NULL || nblocks == 0) 985 return; 986 987 /* Sort the blocks. */ 988 /* 989 * XXX KS - We have to sort even if the blocks come from the 990 * cleaner, because there might be other pending blocks on the 991 * same inode...and if we don't sort, and there are fragments 992 * present, blocks may be written in the wrong place. 993 */ 994 /* if (!(sp->seg_flags & SEGM_CLEAN)) */ 995 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 996 997 /* 998 * Record the length of the last block in case it's a fragment. 999 * If there are indirect blocks present, they sort last. An 1000 * indirect block will be lfs_bsize and its presence indicates 1001 * that you cannot have fragments. 1002 */ 1003 sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount; 1004 1005 /* 1006 * Assign disk addresses, and update references to the logical 1007 * block and the segment usage information. 1008 */ 1009 fs = sp->fs; 1010 for (i = nblocks; i--; ++sp->start_bpp) { 1011 lbn = *sp->start_lbp++; 1012 1013 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 1014 if((*sp->start_bpp)->b_blkno == (*sp->start_bpp)->b_lblkno) { 1015 printf("lfs_updatemeta: ino %d blk %d has same lbn and daddr\n", VTOI(vp)->i_number, off); 1016 } 1017 fs->lfs_offset += 1018 fragstodb(fs, numfrags(fs, (*sp->start_bpp)->b_bcount)); 1019 error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL); 1020 if (error) 1021 panic("lfs_updatemeta: ufs_bmaparray %d", error); 1022 ip = VTOI(vp); 1023 switch (num) { 1024 case 0: 1025 ooff = ip->i_ffs_db[lbn]; 1026 if(vp != fs->lfs_ivnode && (ooff == 0 || ooff == UNASSIGNED)) { 1027 #ifdef DEBUG_LFS 1028 printf("lfs_updatemeta[1]: warning: writing ino %d lbn %d at 0x%x, was 0x%x\n", ip->i_number, lbn, off, ooff); 1029 #endif 1030 } else 1031 ip->i_ffs_db[lbn] = off; 1032 break; 1033 case 1: 1034 ooff = ip->i_ffs_ib[a[0].in_off]; 1035 if(vp != fs->lfs_ivnode && (ooff == 0 || ooff == UNASSIGNED)) { 1036 #ifdef DEBUG_LFS 1037 printf("lfs_updatemeta[2]: warning: writing ino %d lbn %d at 0x%x, was 0x%x\n", ip->i_number, lbn, off, ooff); 1038 #endif 1039 } else 1040 ip->i_ffs_ib[a[0].in_off] = off; 1041 break; 1042 default: 1043 ap = &a[num - 1]; 1044 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 1045 panic("lfs_updatemeta: bread bno %d", 1046 ap->in_lbn); 1047 /* 1048 * Bread may create a new (indirect) block which needs 1049 * to get counted for the inode. 1050 * 1051 * XXX - why would it ever do this (except possibly 1052 * for the Ifile)? lfs_balloc is supposed to take 1053 * care of this. 1054 */ 1055 if (bp->b_blkno == UNASSIGNED) { 1056 ip->i_ffs_blocks += fsbtodb(fs, 1); 1057 fs->lfs_bfree -= fragstodb(fs, fs->lfs_frag); 1058 1059 /* Note the new address */ 1060 bp->b_blkno = UNWRITTEN; 1061 1062 if(num == 2) { 1063 ip->i_ffs_ib[a[0].in_off] = UNWRITTEN; 1064 } else { 1065 ap = &a[num - 2]; 1066 if (bread(vp, ap->in_lbn, 1067 fs->lfs_bsize, NOCRED, &ibp)) 1068 panic("lfs_updatemeta: bread bno %d", 1069 ap->in_lbn); 1070 ((ufs_daddr_t *)ibp->b_data)[ap->in_off] = UNWRITTEN; 1071 VOP_BWRITE(ibp); 1072 } 1073 } 1074 #ifdef DEBUG 1075 else if(!(bp->b_flags & (B_DONE|B_DELWRI))) 1076 printf("lfs_updatemeta: unaccounted indirect block ino %d block %d\n", ip->i_number, ap->in_lbn); 1077 #endif 1078 ooff = ((ufs_daddr_t *)bp->b_data)[ap->in_off]; 1079 if(vp != fs->lfs_ivnode && (ooff == 0 || ooff == UNASSIGNED)) { 1080 #ifdef DEBUG_LFS 1081 printf("lfs_updatemeta[3]: warning: writing ino %d lbn %d at 0x%x, was 0x%x\n", ip->i_number, lbn, off, ooff); 1082 #endif 1083 brelse(bp); 1084 } else { 1085 ((ufs_daddr_t *)bp->b_data)[ap->in_off] = off; 1086 VOP_BWRITE(bp); 1087 } 1088 } 1089 /* Update segment usage information. */ 1090 if (daddr > 0 && !(daddr >= fs->lfs_lastpseg && daddr <= off)) { 1091 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 1092 #ifdef DIAGNOSTIC 1093 if (sup->su_nbytes < (*sp->start_bpp)->b_bcount) { 1094 /* XXX -- Change to a panic. */ 1095 printf("lfs_updatemeta: negative bytes (segment %d short by %ld)\n", 1096 datosn(fs, daddr), (*sp->start_bpp)->b_bcount - sup->su_nbytes); 1097 printf("lfs_updatemeta: ino %d, lbn %d, addr = %x\n", 1098 VTOI(sp->vp)->i_number, (*sp->start_bpp)->b_lblkno, daddr); 1099 panic("lfs_updatemeta: negative bytes"); 1100 sup->su_nbytes = (*sp->start_bpp)->b_bcount; 1101 } 1102 #endif 1103 sup->su_nbytes -= (*sp->start_bpp)->b_bcount; 1104 error = VOP_BWRITE(bp); 1105 } 1106 } 1107 } 1108 1109 /* 1110 * Start a new segment. 1111 */ 1112 int 1113 lfs_initseg(fs) 1114 struct lfs *fs; 1115 { 1116 struct segment *sp; 1117 SEGUSE *sup; 1118 SEGSUM *ssp; 1119 struct buf *bp; 1120 int repeat; 1121 1122 sp = fs->lfs_sp; 1123 1124 repeat = 0; 1125 /* Advance to the next segment. */ 1126 if (!LFS_PARTIAL_FITS(fs)) { 1127 /* Wake up any cleaning procs waiting on this file system. */ 1128 wakeup(&lfs_allclean_wakeup); 1129 wakeup(&fs->lfs_nextseg); 1130 lfs_newseg(fs); 1131 repeat = 1; 1132 fs->lfs_offset = fs->lfs_curseg; 1133 sp->seg_number = datosn(fs, fs->lfs_curseg); 1134 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 1135 /* 1136 * If the segment contains a superblock, update the offset 1137 * and summary address to skip over it. 1138 */ 1139 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 1140 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 1141 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 1142 sp->seg_bytes_left -= LFS_SBPAD; 1143 } 1144 brelse(bp); 1145 } else { 1146 sp->seg_number = datosn(fs, fs->lfs_curseg); 1147 sp->seg_bytes_left = (fs->lfs_dbpseg - 1148 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 1149 } 1150 fs->lfs_lastpseg = fs->lfs_offset; 1151 1152 sp->fs = fs; 1153 sp->ibp = NULL; 1154 sp->idp = NULL; 1155 sp->ninodes = 0; 1156 1157 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 1158 sp->cbpp = sp->bpp; 1159 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 1160 fs->lfs_offset, LFS_SUMMARY_SIZE); 1161 sp->segsum = (*sp->cbpp)->b_data; 1162 bzero(sp->segsum, LFS_SUMMARY_SIZE); 1163 sp->start_bpp = ++sp->cbpp; 1164 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 1165 1166 /* Set point to SEGSUM, initialize it. */ 1167 ssp = sp->segsum; 1168 ssp->ss_next = fs->lfs_nextseg; 1169 ssp->ss_nfinfo = ssp->ss_ninos = 0; 1170 ssp->ss_magic = SS_MAGIC; 1171 1172 /* Set pointer to first FINFO, initialize it. */ 1173 sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM)); 1174 sp->fip->fi_nblocks = 0; 1175 sp->start_lbp = &sp->fip->fi_blocks[0]; 1176 sp->fip->fi_lastlength = 0; 1177 1178 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 1179 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 1180 1181 return(repeat); 1182 } 1183 1184 /* 1185 * Return the next segment to write. 1186 */ 1187 void 1188 lfs_newseg(fs) 1189 struct lfs *fs; 1190 { 1191 CLEANERINFO *cip; 1192 SEGUSE *sup; 1193 struct buf *bp; 1194 int curseg, isdirty, sn; 1195 1196 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 1197 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 1198 sup->su_nbytes = 0; 1199 sup->su_nsums = 0; 1200 sup->su_ninos = 0; 1201 (void) VOP_BWRITE(bp); 1202 1203 LFS_CLEANERINFO(cip, fs, bp); 1204 --cip->clean; 1205 ++cip->dirty; 1206 fs->lfs_nclean = cip->clean; 1207 (void) VOP_BWRITE(bp); 1208 1209 fs->lfs_lastseg = fs->lfs_curseg; 1210 fs->lfs_curseg = fs->lfs_nextseg; 1211 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 1212 sn = (sn + 1) % fs->lfs_nseg; 1213 if (sn == curseg) 1214 panic("lfs_nextseg: no clean segments"); 1215 LFS_SEGENTRY(sup, fs, sn, bp); 1216 isdirty = sup->su_flags & SEGUSE_DIRTY; 1217 brelse(bp); 1218 if (!isdirty) 1219 break; 1220 } 1221 1222 ++fs->lfs_nactive; 1223 fs->lfs_nextseg = sntoda(fs, sn); 1224 if(lfs_dostats) { 1225 ++lfs_stats.segsused; 1226 } 1227 } 1228 1229 int 1230 lfs_writeseg(fs, sp) 1231 struct lfs *fs; 1232 struct segment *sp; 1233 { 1234 extern int locked_queue_count; 1235 extern long locked_queue_bytes; 1236 struct buf **bpp, *bp, *cbp; 1237 SEGUSE *sup; 1238 SEGSUM *ssp; 1239 dev_t i_dev; 1240 u_long *datap, *dp; 1241 int do_again, i, nblocks, s; 1242 #ifdef LFS_TRACK_IOS 1243 int j; 1244 #endif 1245 int (*strategy)__P((void *)); 1246 struct vop_strategy_args vop_strategy_a; 1247 u_short ninos; 1248 struct vnode *devvp; 1249 char *p; 1250 struct vnode *vn; 1251 struct inode *ip; 1252 #if defined(DEBUG) && defined(LFS_PROPELLER) 1253 static int propeller; 1254 char propstring[4] = "-\\|/"; 1255 1256 printf("%c\b",propstring[propeller++]); 1257 if(propeller==4) 1258 propeller = 0; 1259 #endif 1260 1261 /* 1262 * If there are no buffers other than the segment summary to write 1263 * and it is not a checkpoint, don't do anything. On a checkpoint, 1264 * even if there aren't any buffers, you need to write the superblock. 1265 */ 1266 if ((nblocks = sp->cbpp - sp->bpp) == 1) 1267 return (0); 1268 1269 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 1270 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 1271 1272 /* Update the segment usage information. */ 1273 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 1274 1275 /* Loop through all blocks, except the segment summary. */ 1276 for (bpp = sp->bpp; ++bpp < sp->cbpp; ) { 1277 if((*bpp)->b_vp != devvp) 1278 sup->su_nbytes += (*bpp)->b_bcount; 1279 } 1280 1281 ssp = (SEGSUM *)sp->segsum; 1282 1283 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 1284 sup->su_nbytes += ssp->ss_ninos * DINODE_SIZE; 1285 /* sup->su_nbytes += LFS_SUMMARY_SIZE; */ 1286 sup->su_lastmod = time.tv_sec; 1287 sup->su_ninos += ninos; 1288 ++sup->su_nsums; 1289 1290 do_again = !(bp->b_flags & B_GATHERED); 1291 (void)VOP_BWRITE(bp); 1292 /* 1293 * Compute checksum across data and then across summary; the first 1294 * block (the summary block) is skipped. Set the create time here 1295 * so that it's guaranteed to be later than the inode mod times. 1296 * 1297 * XXX 1298 * Fix this to do it inline, instead of malloc/copy. 1299 */ 1300 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 1301 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 1302 if (((*++bpp)->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) { 1303 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 1304 panic("lfs_writeseg: copyin failed [1]: ino %d blk %d", VTOI((*bpp)->b_vp)->i_number, (*bpp)->b_lblkno); 1305 } else { 1306 if( !((*bpp)->b_flags & B_CALL) ) { 1307 /* 1308 * Before we record data for a checksm, 1309 * make sure the data won't change in between 1310 * the checksum calculation and the write, 1311 * by marking the buffer B_BUSY. It will 1312 * be freed later by brelse(). 1313 */ 1314 again: 1315 s = splbio(); 1316 if((*bpp)->b_flags & B_BUSY) { 1317 #ifdef DEBUG 1318 printf("lfs_writeseg: avoiding potential data summary corruption for ino %d, lbn %d\n", 1319 VTOI((*bpp)->b_vp)->i_number, 1320 bp->b_lblkno); 1321 #endif 1322 (*bpp)->b_flags |= B_WANTED; 1323 tsleep((*bpp), (PRIBIO + 1), 1324 "lfs_writeseg", 0); 1325 splx(s); 1326 goto again; 1327 } 1328 (*bpp)->b_flags |= B_BUSY; 1329 splx(s); 1330 } 1331 *dp++ = ((u_long *)(*bpp)->b_data)[0]; 1332 } 1333 } 1334 ssp->ss_create = time.tv_sec; 1335 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 1336 ssp->ss_sumsum = 1337 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 1338 free(datap, M_SEGMENT); 1339 #ifdef DIAGNOSTIC 1340 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 1341 panic("lfs_writeseg: No diskspace for summary"); 1342 #endif 1343 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 1344 1345 strategy = devvp->v_op[VOFFSET(vop_strategy)]; 1346 1347 /* 1348 * When we simply write the blocks we lose a rotation for every block 1349 * written. To avoid this problem, we allocate memory in chunks, copy 1350 * the buffers into the chunk and write the chunk. CHUNKSIZE is the 1351 * largest size I/O devices can handle. 1352 * When the data is copied to the chunk, turn off the B_LOCKED bit 1353 * and brelse the buffer (which will move them to the LRU list). Add 1354 * the B_CALL flag to the buffer header so we can count I/O's for the 1355 * checkpoints and so we can release the allocated memory. 1356 * 1357 * XXX 1358 * This should be removed if the new virtual memory system allows us to 1359 * easily make the buffers contiguous in kernel memory and if that's 1360 * fast enough. 1361 */ 1362 1363 #define CHUNKSIZE MAXPHYS 1364 1365 if(devvp==NULL) 1366 panic("devvp is NULL"); 1367 for (bpp = sp->bpp,i = nblocks; i;) { 1368 cbp = lfs_newbuf(devvp, (*bpp)->b_blkno, CHUNKSIZE); 1369 cbp->b_dev = i_dev; 1370 cbp->b_flags |= B_ASYNC | B_BUSY; 1371 cbp->b_bcount = 0; 1372 1373 #ifdef DIAGNOSTIC 1374 if(datosn(fs,(*bpp)->b_blkno + ((*bpp)->b_bcount - 1)/DEV_BSIZE) != datosn(fs,cbp->b_blkno)) { 1375 panic("lfs_writeseg: Segment overwrite"); 1376 } 1377 #endif 1378 1379 s = splbio(); 1380 if(fs->lfs_iocount >= LFS_THROTTLE) { 1381 tsleep(&fs->lfs_iocount, PRIBIO+1, "lfs throttle", 0); 1382 } 1383 ++fs->lfs_iocount; 1384 #ifdef LFS_TRACK_IOS 1385 for(j=0;j<LFS_THROTTLE;j++) { 1386 if(fs->lfs_pending[j]==LFS_UNUSED_DADDR) { 1387 fs->lfs_pending[j] = cbp->b_blkno; 1388 break; 1389 } 1390 } 1391 #endif /* LFS_TRACK_IOS */ 1392 for (p = cbp->b_data; i && cbp->b_bcount < CHUNKSIZE; i--) { 1393 bp = *bpp; 1394 1395 if (bp->b_bcount > (CHUNKSIZE - cbp->b_bcount)) 1396 break; 1397 1398 /* 1399 * Fake buffers from the cleaner are marked as B_INVAL. 1400 * We need to copy the data from user space rather than 1401 * from the buffer indicated. 1402 * XXX == what do I do on an error? 1403 */ 1404 if ((bp->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) { 1405 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 1406 panic("lfs_writeseg: copyin failed [2]"); 1407 } else 1408 bcopy(bp->b_data, p, bp->b_bcount); 1409 p += bp->b_bcount; 1410 cbp->b_bcount += bp->b_bcount; 1411 if (bp->b_flags & B_LOCKED) { 1412 --locked_queue_count; 1413 locked_queue_bytes -= bp->b_bufsize; 1414 } 1415 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 1416 B_LOCKED | B_GATHERED); 1417 vn = bp->b_vp; 1418 if (bp->b_flags & B_CALL) { 1419 /* if B_CALL, it was created with newbuf */ 1420 lfs_freebuf(bp); 1421 } else { 1422 bremfree(bp); 1423 bp->b_flags |= B_DONE; 1424 if(vn) 1425 reassignbuf(bp, vn); 1426 brelse(bp); 1427 } 1428 if(bp->b_flags & B_NEEDCOMMIT) { /* XXX */ 1429 bp->b_flags &= ~B_NEEDCOMMIT; 1430 wakeup(bp); 1431 } 1432 1433 bpp++; 1434 1435 /* 1436 * If this is the last block for this vnode, but 1437 * there are other blocks on its dirty list, 1438 * set IN_MODIFIED/IN_CLEANING depending on what 1439 * sort of block. Only do this for our mount point, 1440 * not for, e.g., inode blocks that are attached to 1441 * the devvp. 1442 */ 1443 if(i>1 && vn && *bpp && (*bpp)->b_vp != vn 1444 && (*bpp)->b_vp && (bp=vn->v_dirtyblkhd.lh_first)!=NULL && 1445 vn->v_mount == fs->lfs_ivnode->v_mount) 1446 { 1447 ip = VTOI(vn); 1448 #ifdef DEBUG_LFS 1449 printf("lfs_writeseg: marking ino %d\n",ip->i_number); 1450 #endif 1451 if(!(ip->i_flag & (IN_CLEANING | IN_MODIFIED | 1452 IN_ACCESSED))) { 1453 fs->lfs_uinodes++; 1454 if(bp->b_flags & B_CALL) 1455 ip->i_flag |= IN_CLEANING; 1456 else 1457 ip->i_flag |= IN_MODIFIED; 1458 } 1459 } 1460 /* if(vn->v_dirtyblkhd.lh_first == NULL) */ 1461 wakeup(vn); 1462 } 1463 ++cbp->b_vp->v_numoutput; 1464 splx(s); 1465 /* 1466 * XXXX This is a gross and disgusting hack. Since these 1467 * buffers are physically addressed, they hang off the 1468 * device vnode (devvp). As a result, they have no way 1469 * of getting to the LFS superblock or lfs structure to 1470 * keep track of the number of I/O's pending. So, I am 1471 * going to stuff the fs into the saveaddr field of 1472 * the buffer (yuk). 1473 */ 1474 cbp->b_saveaddr = (caddr_t)fs; 1475 vop_strategy_a.a_desc = VDESC(vop_strategy); 1476 vop_strategy_a.a_bp = cbp; 1477 (strategy)(&vop_strategy_a); 1478 } 1479 /* 1480 * XXX 1481 * Vinvalbuf can move locked buffers off the locked queue 1482 * and we have no way of knowing about this. So, after 1483 * doing a big write, we recalculate how many buffers are 1484 * really still left on the locked queue. 1485 */ 1486 lfs_countlocked(&locked_queue_count,&locked_queue_bytes); 1487 wakeup(&locked_queue_count); 1488 if(lfs_dostats) { 1489 ++lfs_stats.psegwrites; 1490 lfs_stats.blocktot += nblocks - 1; 1491 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 1492 ++lfs_stats.psyncwrites; 1493 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) { 1494 ++lfs_stats.pcleanwrites; 1495 lfs_stats.cleanblocks += nblocks - 1; 1496 } 1497 } 1498 return (lfs_initseg(fs) || do_again); 1499 } 1500 1501 void 1502 lfs_writesuper(fs, daddr) 1503 struct lfs *fs; 1504 daddr_t daddr; 1505 { 1506 struct buf *bp; 1507 dev_t i_dev; 1508 int (*strategy) __P((void *)); 1509 int s; 1510 struct vop_strategy_args vop_strategy_a; 1511 1512 #ifdef LFS_CANNOT_ROLLFW 1513 /* 1514 * If we can write one superblock while another is in 1515 * progress, we risk not having a complete checkpoint if we crash. 1516 * So, block here if a superblock write is in progress. 1517 */ 1518 s = splbio(); 1519 while(fs->lfs_sbactive) { 1520 tsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0); 1521 } 1522 fs->lfs_sbactive = daddr; 1523 splx(s); 1524 #endif 1525 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 1526 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 1527 1528 /* Set timestamp of this version of the superblock */ 1529 fs->lfs_tstamp = time.tv_sec; 1530 1531 /* Checksum the superblock and copy it into a buffer. */ 1532 fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs)); 1533 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, LFS_SBPAD); 1534 *(struct dlfs *)bp->b_data = fs->lfs_dlfs; 1535 1536 bp->b_dev = i_dev; 1537 bp->b_flags |= B_BUSY | B_CALL | B_ASYNC; 1538 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 1539 bp->b_iodone = lfs_supercallback; 1540 /* XXX KS - same nasty hack as above */ 1541 bp->b_saveaddr = (caddr_t)fs; 1542 1543 vop_strategy_a.a_desc = VDESC(vop_strategy); 1544 vop_strategy_a.a_bp = bp; 1545 s = splbio(); 1546 ++bp->b_vp->v_numoutput; 1547 splx(s); 1548 (strategy)(&vop_strategy_a); 1549 } 1550 1551 /* 1552 * Logical block number match routines used when traversing the dirty block 1553 * chain. 1554 */ 1555 int 1556 lfs_match_fake(fs, bp) 1557 struct lfs *fs; 1558 struct buf *bp; 1559 { 1560 return (bp->b_flags & B_CALL); 1561 } 1562 1563 int 1564 lfs_match_data(fs, bp) 1565 struct lfs *fs; 1566 struct buf *bp; 1567 { 1568 return (bp->b_lblkno >= 0); 1569 } 1570 1571 int 1572 lfs_match_indir(fs, bp) 1573 struct lfs *fs; 1574 struct buf *bp; 1575 { 1576 int lbn; 1577 1578 lbn = bp->b_lblkno; 1579 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 1580 } 1581 1582 int 1583 lfs_match_dindir(fs, bp) 1584 struct lfs *fs; 1585 struct buf *bp; 1586 { 1587 int lbn; 1588 1589 lbn = bp->b_lblkno; 1590 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 1591 } 1592 1593 int 1594 lfs_match_tindir(fs, bp) 1595 struct lfs *fs; 1596 struct buf *bp; 1597 { 1598 int lbn; 1599 1600 lbn = bp->b_lblkno; 1601 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 1602 } 1603 1604 /* 1605 * XXX - The only buffers that are going to hit these functions are the 1606 * segment write blocks, or the segment summaries, or the superblocks. 1607 * 1608 * All of the above are created by lfs_newbuf, and so do not need to be 1609 * released via brelse. 1610 */ 1611 void 1612 lfs_callback(bp) 1613 struct buf *bp; 1614 { 1615 struct lfs *fs; 1616 #ifdef LFS_TRACK_IOS 1617 int j; 1618 #endif 1619 1620 fs = (struct lfs *)bp->b_saveaddr; 1621 #ifdef DIAGNOSTIC 1622 if (fs->lfs_iocount == 0) 1623 panic("lfs_callback: zero iocount\n"); 1624 #endif 1625 if (--fs->lfs_iocount < LFS_THROTTLE) 1626 wakeup(&fs->lfs_iocount); 1627 #ifdef LFS_TRACK_IOS 1628 for(j=0;j<LFS_THROTTLE;j++) { 1629 if(fs->lfs_pending[j]==bp->b_blkno) { 1630 fs->lfs_pending[j] = LFS_UNUSED_DADDR; 1631 wakeup(&(fs->lfs_pending[j])); 1632 break; 1633 } 1634 } 1635 #endif /* LFS_TRACK_IOS */ 1636 1637 lfs_freebuf(bp); 1638 } 1639 1640 void 1641 lfs_supercallback(bp) 1642 struct buf *bp; 1643 { 1644 #ifdef LFS_CANNOT_ROLLFW 1645 struct lfs *fs; 1646 1647 fs = (struct lfs *)bp->b_saveaddr; 1648 fs->lfs_sbactive = 0; 1649 wakeup(&fs->lfs_sbactive); 1650 #endif 1651 lfs_freebuf(bp); 1652 } 1653 1654 /* 1655 * Shellsort (diminishing increment sort) from Data Structures and 1656 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1657 * see also Knuth Vol. 3, page 84. The increments are selected from 1658 * formula (8), page 95. Roughly O(N^3/2). 1659 */ 1660 /* 1661 * This is our own private copy of shellsort because we want to sort 1662 * two parallel arrays (the array of buffer pointers and the array of 1663 * logical block numbers) simultaneously. Note that we cast the array 1664 * of logical block numbers to a unsigned in this routine so that the 1665 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1666 */ 1667 1668 void 1669 lfs_shellsort(bp_array, lb_array, nmemb) 1670 struct buf **bp_array; 1671 ufs_daddr_t *lb_array; 1672 int nmemb; 1673 { 1674 static int __rsshell_increments[] = { 4, 1, 0 }; 1675 int incr, *incrp, t1, t2; 1676 struct buf *bp_temp; 1677 u_long lb_temp; 1678 1679 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 1680 for (t1 = incr; t1 < nmemb; ++t1) 1681 for (t2 = t1 - incr; t2 >= 0;) 1682 if (lb_array[t2] > lb_array[t2 + incr]) { 1683 lb_temp = lb_array[t2]; 1684 lb_array[t2] = lb_array[t2 + incr]; 1685 lb_array[t2 + incr] = lb_temp; 1686 bp_temp = bp_array[t2]; 1687 bp_array[t2] = bp_array[t2 + incr]; 1688 bp_array[t2 + incr] = bp_temp; 1689 t2 -= incr; 1690 } else 1691 break; 1692 } 1693 1694 /* 1695 * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it. 1696 */ 1697 int 1698 lfs_vref(vp) 1699 struct vnode *vp; 1700 { 1701 /* 1702 * If we return 1 here during a flush, we risk vinvalbuf() not 1703 * being able to flush all of the pages from this vnode, which 1704 * will cause it to panic. So, return 0 if a flush is in progress. 1705 */ 1706 if (vp->v_flag & VXLOCK) { 1707 if(IS_FLUSHING(VTOI(vp)->i_lfs,vp)) { 1708 return 0; 1709 } 1710 return(1); 1711 } 1712 return (vget(vp, 0)); 1713 } 1714 1715 /* 1716 * This is vrele except that we do not want to VOP_INACTIVE this vnode. We 1717 * inline vrele here to avoid the vn_lock and VOP_INACTIVE call at the end. 1718 */ 1719 void 1720 lfs_vunref(vp) 1721 struct vnode *vp; 1722 { 1723 /* 1724 * Analogous to lfs_vref, if the node is flushing, fake it. 1725 */ 1726 if((vp->v_flag & VXLOCK) && IS_FLUSHING(VTOI(vp)->i_lfs,vp)) { 1727 return; 1728 } 1729 1730 simple_lock(&vp->v_interlock); 1731 #ifdef DIAGNOSTIC 1732 if(vp->v_usecount<=0) { 1733 printf("lfs_vunref: flags are 0x%lx\n", vp->v_flag); 1734 printf("lfs_vunref: usecount = %ld\n", vp->v_usecount); 1735 panic("lfs_vunref: v_usecount<0"); 1736 } 1737 #endif 1738 vp->v_usecount--; 1739 if (vp->v_usecount > 0) { 1740 simple_unlock(&vp->v_interlock); 1741 return; 1742 } 1743 #ifdef DIAGNOSTIC 1744 if(VOP_ISLOCKED(vp)) 1745 panic("lfs_vunref: vnode locked"); 1746 #endif 1747 /* 1748 * insert at tail of LRU list 1749 */ 1750 simple_lock(&vnode_free_list_slock); 1751 if (vp->v_holdcnt > 0) 1752 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1753 else 1754 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1755 simple_unlock(&vnode_free_list_slock); 1756 simple_unlock(&vp->v_interlock); 1757 } 1758 1759 /* 1760 * We use this when we have vnodes that were loaded in solely for cleaning. 1761 * There is no reason to believe that these vnodes will be referenced again 1762 * soon, since the cleaning process is unrelated to normal filesystem 1763 * activity. Putting cleaned vnodes at the tail of the list has the effect 1764 * of flushing the vnode LRU. So, put vnodes that were loaded only for 1765 * cleaning at the head of the list, instead. 1766 */ 1767 void 1768 lfs_vunref_head(vp) 1769 struct vnode *vp; 1770 { 1771 simple_lock(&vp->v_interlock); 1772 #ifdef DIAGNOSTIC 1773 if(vp->v_usecount==0) { 1774 panic("lfs_vunref: v_usecount<0"); 1775 } 1776 #endif 1777 vp->v_usecount--; 1778 if (vp->v_usecount > 0) { 1779 simple_unlock(&vp->v_interlock); 1780 return; 1781 } 1782 #ifdef DIAGNOSTIC 1783 if(VOP_ISLOCKED(vp)) 1784 panic("lfs_vunref_head: vnode locked"); 1785 #endif 1786 /* 1787 * insert at head of LRU list 1788 */ 1789 simple_lock(&vnode_free_list_slock); 1790 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1791 simple_unlock(&vnode_free_list_slock); 1792 simple_unlock(&vp->v_interlock); 1793 } 1794 1795